diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index d69168b01ee..1df8cea4ad5 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -3,6 +3,9 @@ name: BackportPR env: # Force the stdout and stderr streams to be unbuffered PYTHONUNBUFFERED: 1 + # Export system tables to ClickHouse Cloud + CLICKHOUSE_CI_LOGS_HOST: ${{ secrets.CLICKHOUSE_CI_LOGS_HOST }} + CLICKHOUSE_CI_LOGS_PASSWORD: ${{ secrets.CLICKHOUSE_CI_LOGS_PASSWORD }} on: # yamllint disable-line rule:truthy push: diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index ae1862e327f..284d94b5c5a 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -3,6 +3,9 @@ name: MasterCI env: # Force the stdout and stderr streams to be unbuffered PYTHONUNBUFFERED: 1 + # Export system tables to ClickHouse Cloud + CLICKHOUSE_CI_LOGS_HOST: ${{ secrets.CLICKHOUSE_CI_LOGS_HOST }} + CLICKHOUSE_CI_LOGS_PASSWORD: ${{ secrets.CLICKHOUSE_CI_LOGS_PASSWORD }} on: # yamllint disable-line rule:truthy push: @@ -892,6 +895,48 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinS390X: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_s390x + EOF + - name: Download changed images + uses: actions/download-artifact@v3 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ @@ -975,6 +1020,7 @@ jobs: - BuilderBinFreeBSD - BuilderBinPPC64 - BuilderBinRISCV64 + - BuilderBinS390X - BuilderBinAmd64Compat - BuilderBinAarch64V80Compat - BuilderBinClangTidy diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index d97b9975c3c..cf9f025585d 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -3,6 +3,9 @@ name: PullRequestCI env: # Force the stdout and stderr streams to be unbuffered PYTHONUNBUFFERED: 1 + # Export system tables to ClickHouse Cloud + CLICKHOUSE_CI_LOGS_HOST: ${{ secrets.CLICKHOUSE_CI_LOGS_HOST }} + CLICKHOUSE_CI_LOGS_PASSWORD: ${{ secrets.CLICKHOUSE_CI_LOGS_PASSWORD }} on: # yamllint disable-line rule:truthy pull_request: @@ -952,6 +955,47 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinS390X: + needs: [DockerHubPush, FastTest, StyleCheck] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_s390x + EOF + - name: Download changed images + uses: actions/download-artifact@v3 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ @@ -1034,6 +1078,7 @@ jobs: - BuilderBinFreeBSD - BuilderBinPPC64 - BuilderBinRISCV64 + - BuilderBinS390X - BuilderBinAmd64Compat - BuilderBinAarch64V80Compat - BuilderBinClangTidy @@ -5182,3 +5227,39 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" +############################################################################################## +##################################### SQL TEST ############################################### +############################################################################################## + SQLTest: + needs: [BuilderDebRelease] + runs-on: [self-hosted, fuzzer-unit-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/sqltest + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=SQLTest + REPO_COPY=${{runner.temp}}/sqltest/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: SQLTest + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 sqltest.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 21284815583..e5527177aa5 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -3,6 +3,9 @@ name: ReleaseBranchCI env: # Force the stdout and stderr streams to be unbuffered PYTHONUNBUFFERED: 1 + # Export system tables to ClickHouse Cloud + CLICKHOUSE_CI_LOGS_HOST: ${{ secrets.CLICKHOUSE_CI_LOGS_HOST }} + CLICKHOUSE_CI_LOGS_PASSWORD: ${{ secrets.CLICKHOUSE_CI_LOGS_PASSWORD }} on: # yamllint disable-line rule:truthy push: diff --git a/.gitmodules b/.gitmodules index 30085fb8dd4..c3592372b7e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -331,6 +331,10 @@ [submodule "contrib/liburing"] path = contrib/liburing url = https://github.com/axboe/liburing +[submodule "contrib/libarchive"] + path = contrib/libarchive + url = https://github.com/libarchive/libarchive.git + ignore = dirty [submodule "contrib/libfiu"] path = contrib/libfiu url = https://github.com/ClickHouse/libfiu.git @@ -343,3 +347,15 @@ [submodule "contrib/incbin"] path = contrib/incbin url = https://github.com/graphitemaster/incbin.git +[submodule "contrib/usearch"] + path = contrib/usearch + url = https://github.com/unum-cloud/usearch.git +[submodule "contrib/SimSIMD"] + path = contrib/SimSIMD + url = https://github.com/ashvardanian/SimSIMD.git +[submodule "contrib/FP16"] + path = contrib/FP16 + url = https://github.com/Maratyszcza/FP16.git +[submodule "contrib/robin-map"] + path = contrib/robin-map + url = https://github.com/Tessil/robin-map.git diff --git a/CHANGELOG.md b/CHANGELOG.md index bf26708ebb0..ce57affb161 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,7 +52,6 @@ * Add new setting `disable_url_encoding` that allows to disable decoding/encoding path in uri in URL engine. [#52337](https://github.com/ClickHouse/ClickHouse/pull/52337) ([Kruglov Pavel](https://github.com/Avogar)). #### Performance Improvement -* Writing parquet files is 10x faster, it's multi-threaded now. Almost the same speed as reading. [#49367](https://github.com/ClickHouse/ClickHouse/pull/49367) ([Michael Kolupaev](https://github.com/al13n321)). * Enable automatic selection of the sparse serialization format by default. It improves performance. The format is supported since version 22.1. After this change, downgrading to versions older than 22.1 might not be possible. You can turn off the usage of the sparse serialization format by providing the `ratio_of_defaults_for_sparse_serialization = 1` setting for your MergeTree tables. [#49631](https://github.com/ClickHouse/ClickHouse/pull/49631) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Enable `move_all_conditions_to_prewhere` and `enable_multiple_prewhere_read_steps` settings by default. [#46365](https://github.com/ClickHouse/ClickHouse/pull/46365) ([Alexander Gololobov](https://github.com/davenger)). * Improves performance of some queries by tuning allocator. [#46416](https://github.com/ClickHouse/ClickHouse/pull/46416) ([Azat Khuzhin](https://github.com/azat)). @@ -114,6 +113,7 @@ * Now interserver port will be closed only after tables are shut down. [#52498](https://github.com/ClickHouse/ClickHouse/pull/52498) ([alesapin](https://github.com/alesapin)). #### Experimental Feature +* Writing parquet files is 10x faster, it's multi-threaded now. Almost the same speed as reading. [#49367](https://github.com/ClickHouse/ClickHouse/pull/49367) ([Michael Kolupaev](https://github.com/al13n321)). This is controlled by the setting `output_format_parquet_use_custom_encoder` which is disabled by default, because the feature is non-ideal. * Added support for [PRQL](https://prql-lang.org/) as a query language. [#50686](https://github.com/ClickHouse/ClickHouse/pull/50686) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). * Allow to add disk name for custom disks. Previously custom disks would use an internal generated disk name. Now it will be possible with `disk = disk_(...)` (e.g. disk will have name `name`) . [#51552](https://github.com/ClickHouse/ClickHouse/pull/51552) ([Kseniia Sumarokova](https://github.com/kssenii)). This syntax can be changed in this release. * (experimental MaterializedMySQL) Fixed crash when `mysqlxx::Pool::Entry` is used after it was disconnected. [#52063](https://github.com/ClickHouse/ClickHouse/pull/52063) ([Val Doroshchuk](https://github.com/valbok)). diff --git a/CMakeLists.txt b/CMakeLists.txt index 157f8c9fac0..55bcf5fbf3c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -208,9 +208,6 @@ option(OMIT_HEAVY_DEBUG_SYMBOLS "Do not generate debugger info for heavy modules (ClickHouse functions and dictionaries, some contrib)" ${OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT}) -if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") - set(USE_DEBUG_HELPERS ON) -endif() option(USE_DEBUG_HELPERS "Enable debug helpers" ${USE_DEBUG_HELPERS}) option(BUILD_STANDALONE_KEEPER "Build keeper as small standalone binary" OFF) diff --git a/README.md b/README.md index f0a7dbe2408..4ff9b9caaa1 100644 --- a/README.md +++ b/README.md @@ -23,11 +23,8 @@ curl https://clickhouse.com/ | sh ## Upcoming Events -* [**v23.7 Release Webinar**](https://clickhouse.com/company/events/v23-7-community-release-call?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-07) - Jul 27 - 23.7 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. -* [**ClickHouse Meetup in Boston**](https://www.meetup.com/clickhouse-boston-user-group/events/293913596) - Jul 18 -* [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/293913441) - Jul 19 -* [**ClickHouse Meetup in Toronto**](https://www.meetup.com/clickhouse-toronto-user-group/events/294183127) - Jul 20 -* [**ClickHouse Meetup in Singapore**](https://www.meetup.com/clickhouse-singapore-meetup-group/events/294428050/) - Jul 27 +* [**v23.8 Community Call**](https://clickhouse.com/company/events/v23-8-community-release-call?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-08) - Aug 31 - 23.8 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. +* [**ClickHouse & AI - A Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/294472987) - Aug 8 * [**ClickHouse Meetup in Paris**](https://www.meetup.com/clickhouse-france-user-group/events/294283460) - Sep 12 Also, keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler clickhouse com. diff --git a/base/base/EnumReflection.h b/base/base/EnumReflection.h index 0d1f8ae0a40..e3208f16a75 100644 --- a/base/base/EnumReflection.h +++ b/base/base/EnumReflection.h @@ -3,6 +3,7 @@ #include #include + template concept is_enum = std::is_enum_v; namespace detail diff --git a/base/base/JSON.cpp b/base/base/JSON.cpp index 4c6d97b4444..0b43be38149 100644 --- a/base/base/JSON.cpp +++ b/base/base/JSON.cpp @@ -7,8 +7,6 @@ #include #include -#include - #define JSON_MAX_DEPTH 100 diff --git a/base/base/StringRef.h b/base/base/StringRef.h index f428b7c747f..9a97b2ea5cc 100644 --- a/base/base/StringRef.h +++ b/base/base/StringRef.h @@ -8,8 +8,10 @@ #include #include +#include #include #include +#include #include @@ -28,6 +30,11 @@ #define CRC_INT __crc32cd #endif +#if defined(__aarch64__) && defined(__ARM_NEON) + #include + #pragma clang diagnostic ignored "-Wreserved-identifier" +#endif + /** * The std::string_view-like container to avoid creating strings to find substrings in the hash table. @@ -73,14 +80,14 @@ using StringRefs = std::vector; * For more information, see hash_map_string_2.cpp */ -inline bool compareSSE2(const char * p1, const char * p2) +inline bool compare8(const char * p1, const char * p2) { return 0xFFFF == _mm_movemask_epi8(_mm_cmpeq_epi8( _mm_loadu_si128(reinterpret_cast(p1)), _mm_loadu_si128(reinterpret_cast(p2)))); } -inline bool compareSSE2x4(const char * p1, const char * p2) +inline bool compare64(const char * p1, const char * p2) { return 0xFFFF == _mm_movemask_epi8( _mm_and_si128( @@ -100,7 +107,30 @@ inline bool compareSSE2x4(const char * p1, const char * p2) _mm_loadu_si128(reinterpret_cast(p2) + 3))))); } -inline bool memequalSSE2Wide(const char * p1, const char * p2, size_t size) +#elif defined(__aarch64__) && defined(__ARM_NEON) + +inline bool compare8(const char * p1, const char * p2) +{ + uint64_t mask = getNibbleMask(vceqq_u8( + vld1q_u8(reinterpret_cast(p1)), vld1q_u8(reinterpret_cast(p2)))); + return 0xFFFFFFFFFFFFFFFF == mask; +} + +inline bool compare64(const char * p1, const char * p2) +{ + uint64_t mask = getNibbleMask(vandq_u8( + vandq_u8(vceqq_u8(vld1q_u8(reinterpret_cast(p1)), vld1q_u8(reinterpret_cast(p2))), + vceqq_u8(vld1q_u8(reinterpret_cast(p1 + 16)), vld1q_u8(reinterpret_cast(p2 + 16)))), + vandq_u8(vceqq_u8(vld1q_u8(reinterpret_cast(p1 + 32)), vld1q_u8(reinterpret_cast(p2 + 32))), + vceqq_u8(vld1q_u8(reinterpret_cast(p1 + 48)), vld1q_u8(reinterpret_cast(p2 + 48)))))); + return 0xFFFFFFFFFFFFFFFF == mask; +} + +#endif + +#if defined(__SSE2__) || (defined(__aarch64__) && defined(__ARM_NEON)) + +inline bool memequalWide(const char * p1, const char * p2, size_t size) { /** The order of branches and the trick with overlapping comparisons * are the same as in memcpy implementation. @@ -137,7 +167,7 @@ inline bool memequalSSE2Wide(const char * p1, const char * p2, size_t size) while (size >= 64) { - if (compareSSE2x4(p1, p2)) + if (compare64(p1, p2)) { p1 += 64; p2 += 64; @@ -149,17 +179,16 @@ inline bool memequalSSE2Wide(const char * p1, const char * p2, size_t size) switch (size / 16) { - case 3: if (!compareSSE2(p1 + 32, p2 + 32)) return false; [[fallthrough]]; - case 2: if (!compareSSE2(p1 + 16, p2 + 16)) return false; [[fallthrough]]; - case 1: if (!compareSSE2(p1, p2)) return false; + case 3: if (!compare8(p1 + 32, p2 + 32)) return false; [[fallthrough]]; + case 2: if (!compare8(p1 + 16, p2 + 16)) return false; [[fallthrough]]; + case 1: if (!compare8(p1, p2)) return false; } - return compareSSE2(p1 + size - 16, p2 + size - 16); + return compare8(p1 + size - 16, p2 + size - 16); } #endif - inline bool operator== (StringRef lhs, StringRef rhs) { if (lhs.size != rhs.size) @@ -168,8 +197,8 @@ inline bool operator== (StringRef lhs, StringRef rhs) if (lhs.size == 0) return true; -#if defined(__SSE2__) - return memequalSSE2Wide(lhs.data, rhs.data, lhs.size); +#if defined(__SSE2__) || (defined(__aarch64__) && defined(__ARM_NEON)) + return memequalWide(lhs.data, rhs.data, lhs.size); #else return 0 == memcmp(lhs.data, rhs.data, lhs.size); #endif @@ -274,6 +303,8 @@ struct CRC32Hash if (size == 0) return 0; + chassert(pos); + if (size < 8) { return static_cast(hashLessThan8(x.data, x.size)); diff --git a/base/base/defines.h b/base/base/defines.h index 6abf8155b95..ee29ecf6118 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -115,8 +115,15 @@ /// because SIGABRT is easier to debug than SIGTRAP (the second one makes gdb crazy) #if !defined(chassert) #if defined(ABORT_ON_LOGICAL_ERROR) + // clang-format off + #include + namespace DB + { + void abortOnFailedAssertion(const String & description); + } #define chassert(x) static_cast(x) ? void(0) : ::DB::abortOnFailedAssertion(#x) #define UNREACHABLE() abort() + // clang-format off #else /// Here sizeof() trick is used to suppress unused warning for result, /// since simple "(void)x" will evaluate the expression, while diff --git a/base/base/simd.h b/base/base/simd.h new file mode 100644 index 00000000000..3283c40971c --- /dev/null +++ b/base/base/simd.h @@ -0,0 +1,14 @@ +#pragma once + +#if defined(__aarch64__) && defined(__ARM_NEON) + +# include +# pragma clang diagnostic ignored "-Wreserved-identifier" + +/// Returns a 64 bit mask of nibbles (4 bits for each byte). +inline uint64_t getNibbleMask(uint8x16_t res) +{ + return vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(res), 4)), 0); +} + +#endif diff --git a/base/base/wide_integer_impl.h b/base/base/wide_integer_impl.h index 411841e6d9f..fc4e9e551ca 100644 --- a/base/base/wide_integer_impl.h +++ b/base/base/wide_integer_impl.h @@ -12,7 +12,6 @@ #include #include -#include #include // NOLINTBEGIN(*) @@ -22,6 +21,7 @@ #define CONSTEXPR_FROM_DOUBLE constexpr using FromDoubleIntermediateType = long double; #else +#include /// `wide_integer_from_builtin` can't be constexpr with non-literal `cpp_bin_float_double_extended` #define CONSTEXPR_FROM_DOUBLE using FromDoubleIntermediateType = boost::multiprecision::cpp_bin_float_double_extended; diff --git a/base/poco/Data/ODBC/src/Unicode_UNIXODBC.cpp b/base/poco/Data/ODBC/src/Unicode_UNIXODBC.cpp index 4caf097c28a..1c5555f8cf3 100644 --- a/base/poco/Data/ODBC/src/Unicode_UNIXODBC.cpp +++ b/base/poco/Data/ODBC/src/Unicode_UNIXODBC.cpp @@ -19,7 +19,6 @@ #include "Poco/UTF16Encoding.h" #include "Poco/Buffer.h" #include "Poco/Exception.h" -#include using Poco::Buffer; diff --git a/base/poco/Data/include/Poco/Data/TypeHandler.h b/base/poco/Data/include/Poco/Data/TypeHandler.h index 34f88e986f7..e7633de7018 100644 --- a/base/poco/Data/include/Poco/Data/TypeHandler.h +++ b/base/poco/Data/include/Poco/Data/TypeHandler.h @@ -97,7 +97,7 @@ namespace Data /// /// static void extract(std::size_t pos, Person& obj, const Person& defVal, AbstractExtractor::Ptr pExt) /// { - /// // defVal is the default person we should use if we encunter NULL entries, so we take the individual fields + /// // defVal is the default person we should use if we encounter NULL entries, so we take the individual fields /// // as defaults. You can do more complex checking, ie return defVal if only one single entry of the fields is null etc... /// poco_assert_dbg (!pExt.isNull()); /// std::string lastName; diff --git a/base/poco/Foundation/include/Poco/URI.h b/base/poco/Foundation/include/Poco/URI.h index eba8109253d..30654504e0b 100644 --- a/base/poco/Foundation/include/Poco/URI.h +++ b/base/poco/Foundation/include/Poco/URI.h @@ -57,7 +57,7 @@ public: URI(); /// Creates an empty URI. - explicit URI(const std::string & uri, bool disable_url_encoding = false); + explicit URI(const std::string & uri, bool enable_url_encoding = true); /// Parses an URI from the given string. Throws a /// SyntaxException if the uri is not valid. @@ -362,7 +362,7 @@ private: std::string _query; std::string _fragment; - bool _disable_url_encoding = false; + bool _enable_url_encoding = true; }; diff --git a/base/poco/Foundation/src/Task.cpp b/base/poco/Foundation/src/Task.cpp index a850ae37eff..4303d50d6e8 100644 --- a/base/poco/Foundation/src/Task.cpp +++ b/base/poco/Foundation/src/Task.cpp @@ -16,7 +16,6 @@ #include "Poco/TaskManager.h" #include "Poco/Exception.h" -#include #include diff --git a/base/poco/Foundation/src/URI.cpp b/base/poco/Foundation/src/URI.cpp index 3354c69d188..41e331bb080 100644 --- a/base/poco/Foundation/src/URI.cpp +++ b/base/poco/Foundation/src/URI.cpp @@ -36,8 +36,8 @@ URI::URI(): } -URI::URI(const std::string& uri, bool decode_and_encode_path): - _port(0), _disable_url_encoding(decode_and_encode_path) +URI::URI(const std::string& uri, bool enable_url_encoding): + _port(0), _enable_url_encoding(enable_url_encoding) { parse(uri); } @@ -108,7 +108,7 @@ URI::URI(const URI& uri): _path(uri._path), _query(uri._query), _fragment(uri._fragment), - _disable_url_encoding(uri._disable_url_encoding) + _enable_url_encoding(uri._enable_url_encoding) { } @@ -121,7 +121,7 @@ URI::URI(const URI& baseURI, const std::string& relativeURI): _path(baseURI._path), _query(baseURI._query), _fragment(baseURI._fragment), - _disable_url_encoding(baseURI._disable_url_encoding) + _enable_url_encoding(baseURI._enable_url_encoding) { resolve(relativeURI); } @@ -153,7 +153,7 @@ URI& URI::operator = (const URI& uri) _path = uri._path; _query = uri._query; _fragment = uri._fragment; - _disable_url_encoding = uri._disable_url_encoding; + _enable_url_encoding = uri._enable_url_encoding; } return *this; } @@ -184,7 +184,7 @@ void URI::swap(URI& uri) std::swap(_path, uri._path); std::swap(_query, uri._query); std::swap(_fragment, uri._fragment); - std::swap(_disable_url_encoding, uri._disable_url_encoding); + std::swap(_enable_url_encoding, uri._enable_url_encoding); } @@ -687,18 +687,18 @@ void URI::decode(const std::string& str, std::string& decodedStr, bool plusAsSpa void URI::encodePath(std::string & encodedStr) const { - if (_disable_url_encoding) - encodedStr = _path; - else + if (_enable_url_encoding) encode(_path, RESERVED_PATH, encodedStr); + else + encodedStr = _path; } void URI::decodePath(const std::string & encodedStr) { - if (_disable_url_encoding) - _path = encodedStr; - else + if (_enable_url_encoding) decode(encodedStr, _path); + else + _path = encodedStr; } bool URI::isWellKnownPort() const diff --git a/base/poco/JSON/src/Object.cpp b/base/poco/JSON/src/Object.cpp index 7fca65c5b01..b041f570934 100644 --- a/base/poco/JSON/src/Object.cpp +++ b/base/poco/JSON/src/Object.cpp @@ -14,7 +14,6 @@ #include "Poco/JSON/Object.h" #include -#include using Poco::Dynamic::Var; diff --git a/base/poco/Net/src/HTTPClientSession.cpp b/base/poco/Net/src/HTTPClientSession.cpp index c5697b556d1..2712c0c452e 100644 --- a/base/poco/Net/src/HTTPClientSession.cpp +++ b/base/poco/Net/src/HTTPClientSession.cpp @@ -26,7 +26,6 @@ #include "Poco/CountingStream.h" #include "Poco/RegularExpression.h" #include -#include using Poco::NumberFormatter; diff --git a/base/poco/NetSSL_OpenSSL/include/Poco/Net/Context.h b/base/poco/NetSSL_OpenSSL/include/Poco/Net/Context.h index 65917ac9dd4..c19eecf5c73 100644 --- a/base/poco/NetSSL_OpenSSL/include/Poco/Net/Context.h +++ b/base/poco/NetSSL_OpenSSL/include/Poco/Net/Context.h @@ -146,7 +146,7 @@ namespace Net std::string cipherList; /// Specifies the supported ciphers in OpenSSL notation. - /// Defaults to "ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH". + /// Defaults to "ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH". std::string dhParamsFile; /// Specifies a file containing Diffie-Hellman parameters. @@ -172,7 +172,7 @@ namespace Net VerificationMode verificationMode = VERIFY_RELAXED, int verificationDepth = 9, bool loadDefaultCAs = false, - const std::string & cipherList = "ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH"); + const std::string & cipherList = "ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH"); /// Creates a Context. /// /// * usage specifies whether the context is used by a client or server. @@ -200,7 +200,7 @@ namespace Net VerificationMode verificationMode = VERIFY_RELAXED, int verificationDepth = 9, bool loadDefaultCAs = false, - const std::string & cipherList = "ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH"); + const std::string & cipherList = "ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH"); /// Creates a Context. /// /// * usage specifies whether the context is used by a client or server. diff --git a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h index 21a1ed685e5..e4037c87927 100644 --- a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h +++ b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h @@ -76,7 +76,7 @@ namespace Net /// none|relaxed|strict|once /// 1..9 /// true|false - /// ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH + /// ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH /// true|false /// /// KeyFileHandler diff --git a/base/poco/NetSSL_OpenSSL/src/Context.cpp b/base/poco/NetSSL_OpenSSL/src/Context.cpp index ca220c40a33..d0bab902b89 100644 --- a/base/poco/NetSSL_OpenSSL/src/Context.cpp +++ b/base/poco/NetSSL_OpenSSL/src/Context.cpp @@ -41,7 +41,7 @@ Context::Params::Params(): verificationMode(VERIFY_RELAXED), verificationDepth(9), loadDefaultCAs(false), - cipherList("ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH") + cipherList("ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH") { } diff --git a/cmake/dbms_glob_sources.cmake b/cmake/dbms_glob_sources.cmake index 01c4a8b16e9..fbe7f96cea3 100644 --- a/cmake/dbms_glob_sources.cmake +++ b/cmake/dbms_glob_sources.cmake @@ -4,10 +4,19 @@ macro(add_glob cur_list) endmacro() macro(add_headers_and_sources prefix common_path) - add_glob(${prefix}_headers ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h) - add_glob(${prefix}_sources ${common_path}/*.cpp ${common_path}/*.c ${common_path}/*.h) + add_glob(${prefix}_headers ${common_path}/*.h) + add_glob(${prefix}_sources ${common_path}/*.cpp ${common_path}/*.c) endmacro() macro(add_headers_only prefix common_path) - add_glob(${prefix}_headers ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h) + add_glob(${prefix}_headers ${common_path}/*.h) +endmacro() + +macro(extract_into_parent_list src_list dest_list) + list(REMOVE_ITEM ${src_list} ${ARGN}) + get_filename_component(__dir_name ${CMAKE_CURRENT_SOURCE_DIR} NAME) + foreach(file IN ITEMS ${ARGN}) + list(APPEND ${dest_list} ${__dir_name}/${file}) + endforeach() + set(${dest_list} "${${dest_list}}" PARENT_SCOPE) endmacro() diff --git a/cmake/linux/toolchain-s390x.cmake b/cmake/linux/toolchain-s390x.cmake index b85d4253b89..945eb9affa4 100644 --- a/cmake/linux/toolchain-s390x.cmake +++ b/cmake/linux/toolchain-s390x.cmake @@ -20,6 +20,9 @@ set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/s390x-linux-gnu/libc") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=mold -Wl,-L${CMAKE_SYSROOT}/usr/lib64") +set (CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -fuse-ld=mold -Wl,-L${CMAKE_SYSROOT}/usr/lib64") +set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=mold -Wl,-L${CMAKE_SYSROOT}/usr/lib64") set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE) set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE) diff --git a/cmake/target.cmake b/cmake/target.cmake index ffab08f1103..e4a2f060f1e 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -19,6 +19,19 @@ else () message (FATAL_ERROR "Platform ${CMAKE_SYSTEM_NAME} is not supported") endif () +# Since we always use toolchain files to generate hermetic builds, cmake will +# always think it's a cross-compilation, See +# https://cmake.org/cmake/help/latest/variable/CMAKE_CROSSCOMPILING.html +# +# This will slow down cmake configuration and compilation. For instance, LLVM +# will try to configure NATIVE LLVM targets with all tests enabled (You'll see +# Building native llvm-tblgen...). +# +# Here, we set it manually by checking the system name and processor. +if (${CMAKE_SYSTEM_NAME} STREQUAL ${CMAKE_HOST_SYSTEM_NAME} AND ${CMAKE_SYSTEM_PROCESSOR} STREQUAL ${CMAKE_HOST_SYSTEM_PROCESSOR}) + set (CMAKE_CROSSCOMPILING 0) +endif () + if (CMAKE_CROSSCOMPILING) if (OS_DARWIN) # FIXME: broken dependencies @@ -47,7 +60,7 @@ if (CMAKE_CROSSCOMPILING) set (ENABLE_RUST OFF CACHE INTERNAL "") elseif (ARCH_S390X) set (ENABLE_GRPC OFF CACHE INTERNAL "") - set (ENABLE_SENTRY OFF CACHE INTERNAL "") + set (ENABLE_RUST OFF CACHE INTERNAL "") endif () elseif (OS_FREEBSD) # FIXME: broken dependencies diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index fdf6e60e58f..2557ebf78ae 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -92,6 +92,7 @@ add_contrib (google-protobuf-cmake google-protobuf) add_contrib (openldap-cmake openldap) add_contrib (grpc-cmake grpc) add_contrib (msgpack-c-cmake msgpack-c) +add_contrib (libarchive-cmake libarchive) add_contrib (corrosion-cmake corrosion) @@ -195,6 +196,17 @@ if (ARCH_S390X) add_contrib(crc32-s390x-cmake crc32-s390x) endif() add_contrib (annoy-cmake annoy) + +option(ENABLE_USEARCH "Enable USearch (Approximate Neighborhood Search, HNSW) support" ${ENABLE_LIBRARIES}) +if (ENABLE_USEARCH) + add_contrib (FP16-cmake FP16) + add_contrib (robin-map-cmake robin-map) + add_contrib (SimSIMD-cmake SimSIMD) + add_contrib (usearch-cmake usearch) # requires: FP16, robin-map, SimdSIMD +else () + message(STATUS "Not using USearch") +endif () + add_contrib (xxHash-cmake xxHash) add_contrib (libbcrypt-cmake libbcrypt) diff --git a/contrib/FP16 b/contrib/FP16 new file mode 160000 index 00000000000..0a92994d729 --- /dev/null +++ b/contrib/FP16 @@ -0,0 +1 @@ +Subproject commit 0a92994d729ff76a58f692d3028ca1b64b145d91 diff --git a/contrib/FP16-cmake/CMakeLists.txt b/contrib/FP16-cmake/CMakeLists.txt new file mode 100644 index 00000000000..f82ad705dcc --- /dev/null +++ b/contrib/FP16-cmake/CMakeLists.txt @@ -0,0 +1 @@ +# See contrib/usearch-cmake/CMakeLists.txt diff --git a/contrib/SimSIMD b/contrib/SimSIMD new file mode 160000 index 00000000000..de2cb75b9e9 --- /dev/null +++ b/contrib/SimSIMD @@ -0,0 +1 @@ +Subproject commit de2cb75b9e9e3389d5e1e51fd9f8ed151f3c17cf diff --git a/contrib/SimSIMD-cmake/CMakeLists.txt b/contrib/SimSIMD-cmake/CMakeLists.txt new file mode 100644 index 00000000000..f82ad705dcc --- /dev/null +++ b/contrib/SimSIMD-cmake/CMakeLists.txt @@ -0,0 +1 @@ +# See contrib/usearch-cmake/CMakeLists.txt diff --git a/contrib/base64 b/contrib/base64 index 9499e0c4945..8628e258090 160000 --- a/contrib/base64 +++ b/contrib/base64 @@ -1 +1 @@ -Subproject commit 9499e0c4945589973b9ea1bc927377cfbc84aa46 +Subproject commit 8628e258090f9eb76d90ac3c91e1ab4690e9aa11 diff --git a/contrib/boost b/contrib/boost index aec12eea7fc..063a9372b4a 160000 --- a/contrib/boost +++ b/contrib/boost @@ -1 +1 @@ -Subproject commit aec12eea7fc762721ae16943d1361340c66c9c17 +Subproject commit 063a9372b4ae304e869a5c5724971d0501552731 diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt index 6f9dce0b042..7c2f2b27c47 100644 --- a/contrib/boost-cmake/CMakeLists.txt +++ b/contrib/boost-cmake/CMakeLists.txt @@ -19,6 +19,12 @@ add_library (_boost_filesystem ${SRCS_FILESYSTEM}) add_library (boost::filesystem ALIAS _boost_filesystem) target_include_directories (_boost_filesystem SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}) +if (OS_LINUX) + target_compile_definitions (_boost_filesystem PRIVATE + BOOST_FILESYSTEM_HAS_POSIX_AT_APIS=1 + ) +endif () + # headers-only add_library (_boost_headers_only INTERFACE) @@ -172,9 +178,9 @@ endif() # coroutine set (SRCS_COROUTINE - "${LIBRARY_DIR}/libs/coroutine/detail/coroutine_context.cpp" - "${LIBRARY_DIR}/libs/coroutine/exceptions.cpp" - "${LIBRARY_DIR}/libs/coroutine/posix/stack_traits.cpp" + "${LIBRARY_DIR}/libs/coroutine/src/detail/coroutine_context.cpp" + "${LIBRARY_DIR}/libs/coroutine/src/exceptions.cpp" + "${LIBRARY_DIR}/libs/coroutine/src/posix/stack_traits.cpp" ) add_library (_boost_coroutine ${SRCS_COROUTINE}) add_library (boost::coroutine ALIAS _boost_coroutine) diff --git a/contrib/cityhash102/include/city.h b/contrib/cityhash102/include/city.h index 87363d16444..c98eb7e3585 100644 --- a/contrib/cityhash102/include/city.h +++ b/contrib/cityhash102/include/city.h @@ -73,8 +73,8 @@ struct uint128 uint128() = default; uint128(uint64 low64_, uint64 high64_) : low64(low64_), high64(high64_) {} - friend bool operator ==(const uint128 & x, const uint128 & y) { return (x.low64 == y.low64) && (x.high64 == y.high64); } - friend bool operator !=(const uint128 & x, const uint128 & y) { return !(x == y); } + + friend auto operator<=>(const uint128 &, const uint128 &) = default; }; inline uint64 Uint128Low64(const uint128 & x) { return x.low64; } diff --git a/contrib/curl b/contrib/curl index b0edf0b7dae..eb3b049df52 160000 --- a/contrib/curl +++ b/contrib/curl @@ -1 +1 @@ -Subproject commit b0edf0b7dae44d9e66f270a257cf654b35d5263d +Subproject commit eb3b049df526bf125eda23218e680ce7fa9ec46c diff --git a/contrib/curl-cmake/CMakeLists.txt b/contrib/curl-cmake/CMakeLists.txt index 70d9c2816dc..733865d5101 100644 --- a/contrib/curl-cmake/CMakeLists.txt +++ b/contrib/curl-cmake/CMakeLists.txt @@ -8,125 +8,122 @@ endif() set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/curl") set (SRCS - "${LIBRARY_DIR}/lib/fopen.c" - "${LIBRARY_DIR}/lib/noproxy.c" - "${LIBRARY_DIR}/lib/idn.c" - "${LIBRARY_DIR}/lib/cfilters.c" - "${LIBRARY_DIR}/lib/cf-socket.c" + "${LIBRARY_DIR}/lib/altsvc.c" + "${LIBRARY_DIR}/lib/amigaos.c" + "${LIBRARY_DIR}/lib/asyn-thread.c" + "${LIBRARY_DIR}/lib/base64.c" + "${LIBRARY_DIR}/lib/bufq.c" + "${LIBRARY_DIR}/lib/bufref.c" + "${LIBRARY_DIR}/lib/cf-h1-proxy.c" "${LIBRARY_DIR}/lib/cf-haproxy.c" "${LIBRARY_DIR}/lib/cf-https-connect.c" - "${LIBRARY_DIR}/lib/file.c" - "${LIBRARY_DIR}/lib/timeval.c" - "${LIBRARY_DIR}/lib/base64.c" - "${LIBRARY_DIR}/lib/hostip.c" - "${LIBRARY_DIR}/lib/progress.c" - "${LIBRARY_DIR}/lib/formdata.c" - "${LIBRARY_DIR}/lib/cookie.c" - "${LIBRARY_DIR}/lib/http.c" - "${LIBRARY_DIR}/lib/sendf.c" - "${LIBRARY_DIR}/lib/url.c" - "${LIBRARY_DIR}/lib/dict.c" - "${LIBRARY_DIR}/lib/if2ip.c" - "${LIBRARY_DIR}/lib/speedcheck.c" - "${LIBRARY_DIR}/lib/ldap.c" - "${LIBRARY_DIR}/lib/version.c" - "${LIBRARY_DIR}/lib/getenv.c" - "${LIBRARY_DIR}/lib/escape.c" - "${LIBRARY_DIR}/lib/mprintf.c" - "${LIBRARY_DIR}/lib/telnet.c" - "${LIBRARY_DIR}/lib/netrc.c" - "${LIBRARY_DIR}/lib/getinfo.c" - "${LIBRARY_DIR}/lib/transfer.c" - "${LIBRARY_DIR}/lib/strcase.c" - "${LIBRARY_DIR}/lib/easy.c" - "${LIBRARY_DIR}/lib/curl_fnmatch.c" - "${LIBRARY_DIR}/lib/curl_log.c" - "${LIBRARY_DIR}/lib/fileinfo.c" - "${LIBRARY_DIR}/lib/krb5.c" - "${LIBRARY_DIR}/lib/memdebug.c" - "${LIBRARY_DIR}/lib/http_chunks.c" - "${LIBRARY_DIR}/lib/strtok.c" + "${LIBRARY_DIR}/lib/cf-socket.c" + "${LIBRARY_DIR}/lib/cfilters.c" + "${LIBRARY_DIR}/lib/conncache.c" "${LIBRARY_DIR}/lib/connect.c" - "${LIBRARY_DIR}/lib/llist.c" - "${LIBRARY_DIR}/lib/hash.c" - "${LIBRARY_DIR}/lib/multi.c" "${LIBRARY_DIR}/lib/content_encoding.c" - "${LIBRARY_DIR}/lib/share.c" - "${LIBRARY_DIR}/lib/http_digest.c" - "${LIBRARY_DIR}/lib/md4.c" - "${LIBRARY_DIR}/lib/md5.c" - "${LIBRARY_DIR}/lib/http_negotiate.c" - "${LIBRARY_DIR}/lib/inet_pton.c" - "${LIBRARY_DIR}/lib/strtoofft.c" - "${LIBRARY_DIR}/lib/strerror.c" - "${LIBRARY_DIR}/lib/amigaos.c" + "${LIBRARY_DIR}/lib/cookie.c" + "${LIBRARY_DIR}/lib/curl_addrinfo.c" + "${LIBRARY_DIR}/lib/curl_des.c" + "${LIBRARY_DIR}/lib/curl_endian.c" + "${LIBRARY_DIR}/lib/curl_fnmatch.c" + "${LIBRARY_DIR}/lib/curl_get_line.c" + "${LIBRARY_DIR}/lib/curl_gethostname.c" + "${LIBRARY_DIR}/lib/curl_gssapi.c" + "${LIBRARY_DIR}/lib/curl_memrchr.c" + "${LIBRARY_DIR}/lib/curl_multibyte.c" + "${LIBRARY_DIR}/lib/curl_ntlm_core.c" + "${LIBRARY_DIR}/lib/curl_ntlm_wb.c" + "${LIBRARY_DIR}/lib/curl_path.c" + "${LIBRARY_DIR}/lib/curl_range.c" + "${LIBRARY_DIR}/lib/curl_rtmp.c" + "${LIBRARY_DIR}/lib/curl_sasl.c" + "${LIBRARY_DIR}/lib/curl_sspi.c" + "${LIBRARY_DIR}/lib/curl_threads.c" + "${LIBRARY_DIR}/lib/curl_trc.c" + "${LIBRARY_DIR}/lib/dict.c" + "${LIBRARY_DIR}/lib/doh.c" + "${LIBRARY_DIR}/lib/dynbuf.c" + "${LIBRARY_DIR}/lib/dynhds.c" + "${LIBRARY_DIR}/lib/easy.c" + "${LIBRARY_DIR}/lib/escape.c" + "${LIBRARY_DIR}/lib/file.c" + "${LIBRARY_DIR}/lib/fileinfo.c" + "${LIBRARY_DIR}/lib/fopen.c" + "${LIBRARY_DIR}/lib/formdata.c" + "${LIBRARY_DIR}/lib/getenv.c" + "${LIBRARY_DIR}/lib/getinfo.c" + "${LIBRARY_DIR}/lib/gopher.c" + "${LIBRARY_DIR}/lib/hash.c" + "${LIBRARY_DIR}/lib/headers.c" + "${LIBRARY_DIR}/lib/hmac.c" "${LIBRARY_DIR}/lib/hostasyn.c" + "${LIBRARY_DIR}/lib/hostip.c" "${LIBRARY_DIR}/lib/hostip4.c" "${LIBRARY_DIR}/lib/hostip6.c" "${LIBRARY_DIR}/lib/hostsyn.c" + "${LIBRARY_DIR}/lib/hsts.c" + "${LIBRARY_DIR}/lib/http.c" + "${LIBRARY_DIR}/lib/http2.c" + "${LIBRARY_DIR}/lib/http_aws_sigv4.c" + "${LIBRARY_DIR}/lib/http_chunks.c" + "${LIBRARY_DIR}/lib/http_digest.c" + "${LIBRARY_DIR}/lib/http_negotiate.c" + "${LIBRARY_DIR}/lib/http_ntlm.c" + "${LIBRARY_DIR}/lib/http_proxy.c" + "${LIBRARY_DIR}/lib/idn.c" + "${LIBRARY_DIR}/lib/if2ip.c" + "${LIBRARY_DIR}/lib/imap.c" "${LIBRARY_DIR}/lib/inet_ntop.c" + "${LIBRARY_DIR}/lib/inet_pton.c" + "${LIBRARY_DIR}/lib/krb5.c" + "${LIBRARY_DIR}/lib/ldap.c" + "${LIBRARY_DIR}/lib/llist.c" + "${LIBRARY_DIR}/lib/md4.c" + "${LIBRARY_DIR}/lib/md5.c" + "${LIBRARY_DIR}/lib/memdebug.c" + "${LIBRARY_DIR}/lib/mime.c" + "${LIBRARY_DIR}/lib/mprintf.c" + "${LIBRARY_DIR}/lib/mqtt.c" + "${LIBRARY_DIR}/lib/multi.c" + "${LIBRARY_DIR}/lib/netrc.c" + "${LIBRARY_DIR}/lib/nonblock.c" + "${LIBRARY_DIR}/lib/noproxy.c" + "${LIBRARY_DIR}/lib/openldap.c" "${LIBRARY_DIR}/lib/parsedate.c" + "${LIBRARY_DIR}/lib/pingpong.c" + "${LIBRARY_DIR}/lib/pop3.c" + "${LIBRARY_DIR}/lib/progress.c" + "${LIBRARY_DIR}/lib/psl.c" + "${LIBRARY_DIR}/lib/rand.c" + "${LIBRARY_DIR}/lib/rename.c" + "${LIBRARY_DIR}/lib/rtsp.c" "${LIBRARY_DIR}/lib/select.c" - "${LIBRARY_DIR}/lib/splay.c" - "${LIBRARY_DIR}/lib/strdup.c" + "${LIBRARY_DIR}/lib/sendf.c" + "${LIBRARY_DIR}/lib/setopt.c" + "${LIBRARY_DIR}/lib/sha256.c" + "${LIBRARY_DIR}/lib/share.c" + "${LIBRARY_DIR}/lib/slist.c" + "${LIBRARY_DIR}/lib/smb.c" + "${LIBRARY_DIR}/lib/smtp.c" + "${LIBRARY_DIR}/lib/socketpair.c" "${LIBRARY_DIR}/lib/socks.c" - "${LIBRARY_DIR}/lib/curl_addrinfo.c" "${LIBRARY_DIR}/lib/socks_gssapi.c" "${LIBRARY_DIR}/lib/socks_sspi.c" - "${LIBRARY_DIR}/lib/curl_sspi.c" - "${LIBRARY_DIR}/lib/slist.c" - "${LIBRARY_DIR}/lib/nonblock.c" - "${LIBRARY_DIR}/lib/curl_memrchr.c" - "${LIBRARY_DIR}/lib/imap.c" - "${LIBRARY_DIR}/lib/pop3.c" - "${LIBRARY_DIR}/lib/smtp.c" - "${LIBRARY_DIR}/lib/pingpong.c" - "${LIBRARY_DIR}/lib/rtsp.c" - "${LIBRARY_DIR}/lib/curl_threads.c" - "${LIBRARY_DIR}/lib/warnless.c" - "${LIBRARY_DIR}/lib/hmac.c" - "${LIBRARY_DIR}/lib/curl_rtmp.c" - "${LIBRARY_DIR}/lib/openldap.c" - "${LIBRARY_DIR}/lib/curl_gethostname.c" - "${LIBRARY_DIR}/lib/gopher.c" - "${LIBRARY_DIR}/lib/http_proxy.c" - "${LIBRARY_DIR}/lib/asyn-thread.c" - "${LIBRARY_DIR}/lib/curl_gssapi.c" - "${LIBRARY_DIR}/lib/http_ntlm.c" - "${LIBRARY_DIR}/lib/curl_ntlm_wb.c" - "${LIBRARY_DIR}/lib/curl_ntlm_core.c" - "${LIBRARY_DIR}/lib/curl_sasl.c" - "${LIBRARY_DIR}/lib/rand.c" - "${LIBRARY_DIR}/lib/curl_multibyte.c" - "${LIBRARY_DIR}/lib/conncache.c" - "${LIBRARY_DIR}/lib/cf-h1-proxy.c" - "${LIBRARY_DIR}/lib/http2.c" - "${LIBRARY_DIR}/lib/smb.c" - "${LIBRARY_DIR}/lib/curl_endian.c" - "${LIBRARY_DIR}/lib/curl_des.c" + "${LIBRARY_DIR}/lib/speedcheck.c" + "${LIBRARY_DIR}/lib/splay.c" + "${LIBRARY_DIR}/lib/strcase.c" + "${LIBRARY_DIR}/lib/strdup.c" + "${LIBRARY_DIR}/lib/strerror.c" + "${LIBRARY_DIR}/lib/strtok.c" + "${LIBRARY_DIR}/lib/strtoofft.c" "${LIBRARY_DIR}/lib/system_win32.c" - "${LIBRARY_DIR}/lib/mime.c" - "${LIBRARY_DIR}/lib/sha256.c" - "${LIBRARY_DIR}/lib/setopt.c" - "${LIBRARY_DIR}/lib/curl_path.c" - "${LIBRARY_DIR}/lib/curl_range.c" - "${LIBRARY_DIR}/lib/psl.c" - "${LIBRARY_DIR}/lib/doh.c" - "${LIBRARY_DIR}/lib/urlapi.c" - "${LIBRARY_DIR}/lib/curl_get_line.c" - "${LIBRARY_DIR}/lib/altsvc.c" - "${LIBRARY_DIR}/lib/socketpair.c" - "${LIBRARY_DIR}/lib/bufref.c" - "${LIBRARY_DIR}/lib/bufq.c" - "${LIBRARY_DIR}/lib/dynbuf.c" - "${LIBRARY_DIR}/lib/dynhds.c" - "${LIBRARY_DIR}/lib/hsts.c" - "${LIBRARY_DIR}/lib/http_aws_sigv4.c" - "${LIBRARY_DIR}/lib/mqtt.c" - "${LIBRARY_DIR}/lib/rename.c" - "${LIBRARY_DIR}/lib/headers.c" + "${LIBRARY_DIR}/lib/telnet.c" "${LIBRARY_DIR}/lib/timediff.c" - "${LIBRARY_DIR}/lib/vauth/vauth.c" + "${LIBRARY_DIR}/lib/timeval.c" + "${LIBRARY_DIR}/lib/transfer.c" + "${LIBRARY_DIR}/lib/url.c" + "${LIBRARY_DIR}/lib/urlapi.c" "${LIBRARY_DIR}/lib/vauth/cleartext.c" "${LIBRARY_DIR}/lib/vauth/cram.c" "${LIBRARY_DIR}/lib/vauth/digest.c" @@ -138,23 +135,24 @@ set (SRCS "${LIBRARY_DIR}/lib/vauth/oauth2.c" "${LIBRARY_DIR}/lib/vauth/spnego_gssapi.c" "${LIBRARY_DIR}/lib/vauth/spnego_sspi.c" + "${LIBRARY_DIR}/lib/vauth/vauth.c" + "${LIBRARY_DIR}/lib/version.c" "${LIBRARY_DIR}/lib/vquic/vquic.c" - "${LIBRARY_DIR}/lib/vtls/openssl.c" + "${LIBRARY_DIR}/lib/vssh/libssh.c" + "${LIBRARY_DIR}/lib/vssh/libssh2.c" + "${LIBRARY_DIR}/lib/vtls/bearssl.c" "${LIBRARY_DIR}/lib/vtls/gtls.c" - "${LIBRARY_DIR}/lib/vtls/vtls.c" - "${LIBRARY_DIR}/lib/vtls/nss.c" - "${LIBRARY_DIR}/lib/vtls/wolfssl.c" + "${LIBRARY_DIR}/lib/vtls/hostcheck.c" + "${LIBRARY_DIR}/lib/vtls/keylog.c" + "${LIBRARY_DIR}/lib/vtls/mbedtls.c" + "${LIBRARY_DIR}/lib/vtls/openssl.c" "${LIBRARY_DIR}/lib/vtls/schannel.c" "${LIBRARY_DIR}/lib/vtls/schannel_verify.c" "${LIBRARY_DIR}/lib/vtls/sectransp.c" - "${LIBRARY_DIR}/lib/vtls/gskit.c" - "${LIBRARY_DIR}/lib/vtls/mbedtls.c" - "${LIBRARY_DIR}/lib/vtls/bearssl.c" - "${LIBRARY_DIR}/lib/vtls/keylog.c" + "${LIBRARY_DIR}/lib/vtls/vtls.c" + "${LIBRARY_DIR}/lib/vtls/wolfssl.c" "${LIBRARY_DIR}/lib/vtls/x509asn1.c" - "${LIBRARY_DIR}/lib/vtls/hostcheck.c" - "${LIBRARY_DIR}/lib/vssh/libssh2.c" - "${LIBRARY_DIR}/lib/vssh/libssh.c" + "${LIBRARY_DIR}/lib/warnless.c" ) add_library (_curl ${SRCS}) diff --git a/contrib/isa-l-cmake/CMakeLists.txt b/contrib/isa-l-cmake/CMakeLists.txt index d4d6d648268..10f7d7bad64 100644 --- a/contrib/isa-l-cmake/CMakeLists.txt +++ b/contrib/isa-l-cmake/CMakeLists.txt @@ -1,6 +1,7 @@ option(ENABLE_ISAL_LIBRARY "Enable ISA-L library" ${ENABLE_LIBRARIES}) -if (ARCH_AARCH64) - # Disable ISA-L libray on aarch64. + +# ISA-L is only available for x86-64, so it shall be disabled for other platforms +if (NOT ARCH_AMD64) set (ENABLE_ISAL_LIBRARY OFF) endif () diff --git a/contrib/krb5 b/contrib/krb5 index b56ce6ba690..71b06c22760 160000 --- a/contrib/krb5 +++ b/contrib/krb5 @@ -1 +1 @@ -Subproject commit b56ce6ba690e1f320df1a64afa34980c3e462617 +Subproject commit 71b06c2276009ae649c7703019f3b4605f66fd3d diff --git a/contrib/libarchive b/contrib/libarchive new file mode 160000 index 00000000000..ee457961713 --- /dev/null +++ b/contrib/libarchive @@ -0,0 +1 @@ +Subproject commit ee45796171324519f0c0bfd012018dd099296336 diff --git a/contrib/libarchive-cmake/CMakeLists.txt b/contrib/libarchive-cmake/CMakeLists.txt new file mode 100644 index 00000000000..cd5658b7086 --- /dev/null +++ b/contrib/libarchive-cmake/CMakeLists.txt @@ -0,0 +1,182 @@ +set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/libarchive") + +set(SRCS + "${LIBRARY_DIR}/libarchive/archive_acl.c" + "${LIBRARY_DIR}/libarchive/archive_blake2sp_ref.c" + "${LIBRARY_DIR}/libarchive/archive_blake2s_ref.c" + "${LIBRARY_DIR}/libarchive/archive_check_magic.c" + "${LIBRARY_DIR}/libarchive/archive_cmdline.c" + "${LIBRARY_DIR}/libarchive/archive_cryptor.c" + "${LIBRARY_DIR}/libarchive/archive_digest.c" + "${LIBRARY_DIR}/libarchive/archive_disk_acl_darwin.c" + "${LIBRARY_DIR}/libarchive/archive_disk_acl_freebsd.c" + "${LIBRARY_DIR}/libarchive/archive_disk_acl_linux.c" + "${LIBRARY_DIR}/libarchive/archive_disk_acl_sunos.c" + "${LIBRARY_DIR}/libarchive/archive_entry.c" + "${LIBRARY_DIR}/libarchive/archive_entry_copy_bhfi.c" + "${LIBRARY_DIR}/libarchive/archive_entry_copy_stat.c" + "${LIBRARY_DIR}/libarchive/archive_entry_link_resolver.c" + "${LIBRARY_DIR}/libarchive/archive_entry_sparse.c" + "${LIBRARY_DIR}/libarchive/archive_entry_stat.c" + "${LIBRARY_DIR}/libarchive/archive_entry_strmode.c" + "${LIBRARY_DIR}/libarchive/archive_entry_xattr.c" + "${LIBRARY_DIR}/libarchive/archive_getdate.c" + "${LIBRARY_DIR}/libarchive/archive_hmac.c" + "${LIBRARY_DIR}/libarchive/archive_match.c" + "${LIBRARY_DIR}/libarchive/archive_options.c" + "${LIBRARY_DIR}/libarchive/archive_pack_dev.c" + "${LIBRARY_DIR}/libarchive/archive_pathmatch.c" + "${LIBRARY_DIR}/libarchive/archive_ppmd7.c" + "${LIBRARY_DIR}/libarchive/archive_ppmd8.c" + "${LIBRARY_DIR}/libarchive/archive_random.c" + "${LIBRARY_DIR}/libarchive/archive_rb.c" + "${LIBRARY_DIR}/libarchive/archive_read_add_passphrase.c" + "${LIBRARY_DIR}/libarchive/archive_read_append_filter.c" + "${LIBRARY_DIR}/libarchive/archive_read.c" + "${LIBRARY_DIR}/libarchive/archive_read_data_into_fd.c" + "${LIBRARY_DIR}/libarchive/archive_read_disk_entry_from_file.c" + "${LIBRARY_DIR}/libarchive/archive_read_disk_posix.c" + "${LIBRARY_DIR}/libarchive/archive_read_disk_set_standard_lookup.c" + "${LIBRARY_DIR}/libarchive/archive_read_disk_windows.c" + "${LIBRARY_DIR}/libarchive/archive_read_extract2.c" + "${LIBRARY_DIR}/libarchive/archive_read_extract.c" + "${LIBRARY_DIR}/libarchive/archive_read_open_fd.c" + "${LIBRARY_DIR}/libarchive/archive_read_open_file.c" + "${LIBRARY_DIR}/libarchive/archive_read_open_filename.c" + "${LIBRARY_DIR}/libarchive/archive_read_open_memory.c" + "${LIBRARY_DIR}/libarchive/archive_read_set_format.c" + "${LIBRARY_DIR}/libarchive/archive_read_set_options.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_all.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_by_code.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_bzip2.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_compress.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_grzip.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_gzip.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_lrzip.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_lz4.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_lzop.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_none.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_program.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_rpm.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_uu.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_xz.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_zstd.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_7zip.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_all.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_ar.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_by_code.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_cab.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_cpio.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_empty.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_iso9660.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_lha.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_mtree.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_rar5.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_rar.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_raw.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_tar.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_warc.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_xar.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_zip.c" + "${LIBRARY_DIR}/libarchive/archive_string.c" + "${LIBRARY_DIR}/libarchive/archive_string_sprintf.c" + "${LIBRARY_DIR}/libarchive/archive_util.c" + "${LIBRARY_DIR}/libarchive/archive_version_details.c" + "${LIBRARY_DIR}/libarchive/archive_virtual.c" + "${LIBRARY_DIR}/libarchive/archive_windows.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_b64encode.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_by_name.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_bzip2.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_compress.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_grzip.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_gzip.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_lrzip.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_lz4.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_lzop.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_none.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_program.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_uuencode.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_xz.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_zstd.c" + "${LIBRARY_DIR}/libarchive/archive_write.c" + "${LIBRARY_DIR}/libarchive/archive_write_disk_posix.c" + "${LIBRARY_DIR}/libarchive/archive_write_disk_set_standard_lookup.c" + "${LIBRARY_DIR}/libarchive/archive_write_disk_windows.c" + "${LIBRARY_DIR}/libarchive/archive_write_open_fd.c" + "${LIBRARY_DIR}/libarchive/archive_write_open_file.c" + "${LIBRARY_DIR}/libarchive/archive_write_open_filename.c" + "${LIBRARY_DIR}/libarchive/archive_write_open_memory.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_7zip.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_ar.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_by_name.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio_binary.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio_newc.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio_odc.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_filter_by_ext.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_gnutar.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_iso9660.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_mtree.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_pax.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_raw.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_shar.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_ustar.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_v7tar.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_warc.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_xar.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_zip.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_options.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_passphrase.c" + "${LIBRARY_DIR}/libarchive/filter_fork_posix.c" + "${LIBRARY_DIR}/libarchive/filter_fork_windows.c" + "${LIBRARY_DIR}/libarchive/xxhash.c" +) + +add_library(_libarchive ${SRCS}) +target_include_directories(_libarchive PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR} + "${LIBRARY_DIR}/libarchive" +) + +target_compile_definitions(_libarchive PUBLIC + HAVE_CONFIG_H +) + +target_compile_options(_libarchive PRIVATE "-Wno-reserved-macro-identifier") + +if (TARGET ch_contrib::xz) + target_compile_definitions(_libarchive PUBLIC HAVE_LZMA_H=1 HAVE_LIBLZMA=1) + target_link_libraries(_libarchive PRIVATE ch_contrib::xz) +endif() + +if (TARGET ch_contrib::zlib) + target_compile_definitions(_libarchive PUBLIC HAVE_ZLIB_H=1) + target_link_libraries(_libarchive PRIVATE ch_contrib::zlib) +endif() + +if (TARGET ch_contrib::zstd) + target_compile_definitions(_libarchive PUBLIC HAVE_ZSTD_H=1 HAVE_LIBZSTD=1) + target_link_libraries(_libarchive PRIVATE ch_contrib::zstd) +endif() + +if (TARGET ch_contrib::bzip2) + target_compile_definitions(_libarchive PUBLIC HAVE_BZLIB_H=1) + target_link_libraries(_libarchive PRIVATE ch_contrib::bzip2) +endif() + +if (OS_LINUX) + target_compile_definitions( + _libarchive PUBLIC + MAJOR_IN_SYSMACROS=1 + HAVE_LINUX_FS_H=1 + HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC=1 + HAVE_LINUX_TYPES_H=1 + HAVE_SYS_STATFS_H=1 + HAVE_FUTIMESAT=1 + HAVE_ICONV=1 + ) +endif() + +add_library(ch_contrib::libarchive ALIAS _libarchive) \ No newline at end of file diff --git a/contrib/libarchive-cmake/config.h b/contrib/libarchive-cmake/config.h new file mode 100644 index 00000000000..0b0cab47a52 --- /dev/null +++ b/contrib/libarchive-cmake/config.h @@ -0,0 +1,1391 @@ +/* config.h. Generated from build/cmake/config.h.in by cmake configure */ +#define __LIBARCHIVE_CONFIG_H_INCLUDED 1 + +/* + * Ensure we have C99-style int64_t, etc, all defined. + */ + +/* First, we need to know if the system has already defined them. */ +#define HAVE_INT16_T +#define HAVE_INT32_T +#define HAVE_INT64_T +#define HAVE_INTMAX_T + +#define HAVE_UINT8_T +#define HAVE_UINT16_T +#define HAVE_UINT32_T +#define HAVE_UINT64_T +#define HAVE_UINTMAX_T + +/* We might have the types we want under other spellings. */ +/* #undef HAVE___INT64 */ +/* #undef HAVE_U_INT64_T */ +/* #undef HAVE_UNSIGNED___INT64 */ + +/* The sizes of various standard integer types. */ +#define SIZEOF_SHORT 2 +#define SIZEOF_INT 4 +#define SIZEOF_LONG 8 +#define SIZEOF_LONG_LONG 8 +#define SIZEOF_UNSIGNED_SHORT 2 +#define SIZEOF_UNSIGNED 4 +#define SIZEOF_UNSIGNED_LONG 8 +#define SIZEOF_UNSIGNED_LONG_LONG 8 + +/* + * If we lack int64_t, define it to the first of __int64, int, long, and long long + * that exists and is the right size. + */ +#if !defined(HAVE_INT64_T) && defined(HAVE___INT64) +typedef __int64 int64_t; +#define HAVE_INT64_T +#endif + +#if !defined(HAVE_INT64_T) && SIZEOF_INT == 8 +typedef int int64_t; +#define HAVE_INT64_T +#endif + +#if !defined(HAVE_INT64_T) && SIZEOF_LONG == 8 +typedef long int64_t; +#define HAVE_INT64_T +#endif + +#if !defined(HAVE_INT64_T) && SIZEOF_LONG_LONG == 8 +typedef long long int64_t; +#define HAVE_INT64_T +#endif + +#if !defined(HAVE_INT64_T) +#error No 64-bit integer type was found. +#endif + +/* + * Similarly for int32_t + */ +#if !defined(HAVE_INT32_T) && SIZEOF_INT == 4 +typedef int int32_t; +#define HAVE_INT32_T +#endif + +#if !defined(HAVE_INT32_T) && SIZEOF_LONG == 4 +typedef long int32_t; +#define HAVE_INT32_T +#endif + +#if !defined(HAVE_INT32_T) +#error No 32-bit integer type was found. +#endif + +/* + * Similarly for int16_t + */ +#if !defined(HAVE_INT16_T) && SIZEOF_INT == 2 +typedef int int16_t; +#define HAVE_INT16_T +#endif + +#if !defined(HAVE_INT16_T) && SIZEOF_SHORT == 2 +typedef short int16_t; +#define HAVE_INT16_T +#endif + +#if !defined(HAVE_INT16_T) +#error No 16-bit integer type was found. +#endif + +/* + * Similarly for uint64_t + */ +#if !defined(HAVE_UINT64_T) && defined(HAVE_UNSIGNED___INT64) +typedef unsigned __int64 uint64_t; +#define HAVE_UINT64_T +#endif + +#if !defined(HAVE_UINT64_T) && SIZEOF_UNSIGNED == 8 +typedef unsigned uint64_t; +#define HAVE_UINT64_T +#endif + +#if !defined(HAVE_UINT64_T) && SIZEOF_UNSIGNED_LONG == 8 +typedef unsigned long uint64_t; +#define HAVE_UINT64_T +#endif + +#if !defined(HAVE_UINT64_T) && SIZEOF_UNSIGNED_LONG_LONG == 8 +typedef unsigned long long uint64_t; +#define HAVE_UINT64_T +#endif + +#if !defined(HAVE_UINT64_T) +#error No 64-bit unsigned integer type was found. +#endif + + +/* + * Similarly for uint32_t + */ +#if !defined(HAVE_UINT32_T) && SIZEOF_UNSIGNED == 4 +typedef unsigned uint32_t; +#define HAVE_UINT32_T +#endif + +#if !defined(HAVE_UINT32_T) && SIZEOF_UNSIGNED_LONG == 4 +typedef unsigned long uint32_t; +#define HAVE_UINT32_T +#endif + +#if !defined(HAVE_UINT32_T) +#error No 32-bit unsigned integer type was found. +#endif + +/* + * Similarly for uint16_t + */ +#if !defined(HAVE_UINT16_T) && SIZEOF_UNSIGNED == 2 +typedef unsigned uint16_t; +#define HAVE_UINT16_T +#endif + +#if !defined(HAVE_UINT16_T) && SIZEOF_UNSIGNED_SHORT == 2 +typedef unsigned short uint16_t; +#define HAVE_UINT16_T +#endif + +#if !defined(HAVE_UINT16_T) +#error No 16-bit unsigned integer type was found. +#endif + +/* + * Similarly for uint8_t + */ +#if !defined(HAVE_UINT8_T) +typedef unsigned char uint8_t; +#define HAVE_UINT8_T +#endif + +#if !defined(HAVE_UINT8_T) +#error No 8-bit unsigned integer type was found. +#endif + +/* Define intmax_t and uintmax_t if they are not already defined. */ +#if !defined(HAVE_INTMAX_T) +typedef int64_t intmax_t; +#endif + +#if !defined(HAVE_UINTMAX_T) +typedef uint64_t uintmax_t; +#endif + +/* Define ZLIB_WINAPI if zlib was built on Visual Studio. */ +/* #undef ZLIB_WINAPI */ + +/* Darwin ACL support */ +/* #undef ARCHIVE_ACL_DARWIN */ + +/* FreeBSD ACL support */ +/* #undef ARCHIVE_ACL_FREEBSD */ + +/* FreeBSD NFSv4 ACL support */ +/* #undef ARCHIVE_ACL_FREEBSD_NFS4 */ + +/* Linux POSIX.1e ACL support via libacl */ +/* #undef ARCHIVE_ACL_LIBACL */ + +/* Linux NFSv4 ACL support via librichacl */ +/* #undef ARCHIVE_ACL_LIBRICHACL */ + +/* Solaris ACL support */ +/* #undef ARCHIVE_ACL_SUNOS */ + +/* Solaris NFSv4 ACL support */ +/* #undef ARCHIVE_ACL_SUNOS_NFS4 */ + +/* MD5 via ARCHIVE_CRYPTO_MD5_LIBC supported. */ +/* #undef ARCHIVE_CRYPTO_MD5_LIBC */ + +/* MD5 via ARCHIVE_CRYPTO_MD5_LIBSYSTEM supported. */ +/* #undef ARCHIVE_CRYPTO_MD5_LIBSYSTEM */ + +/* MD5 via ARCHIVE_CRYPTO_MD5_MBEDTLS supported. */ +/* #undef ARCHIVE_CRYPTO_MD5_MBEDTLS */ + +/* MD5 via ARCHIVE_CRYPTO_MD5_NETTLE supported. */ +/* #undef ARCHIVE_CRYPTO_MD5_NETTLE */ + +/* MD5 via ARCHIVE_CRYPTO_MD5_OPENSSL supported. */ +/* #undef ARCHIVE_CRYPTO_MD5_OPENSSL */ + +/* MD5 via ARCHIVE_CRYPTO_MD5_WIN supported. */ +/* #undef ARCHIVE_CRYPTO_MD5_WIN */ + +/* RMD160 via ARCHIVE_CRYPTO_RMD160_LIBC supported. */ +/* #undef ARCHIVE_CRYPTO_RMD160_LIBC */ + +/* RMD160 via ARCHIVE_CRYPTO_RMD160_NETTLE supported. */ +/* #undef ARCHIVE_CRYPTO_RMD160_NETTLE */ + +/* RMD160 via ARCHIVE_CRYPTO_RMD160_MBEDTLS supported. */ +/* #undef ARCHIVE_CRYPTO_RMD160_MBEDTLS */ + +/* RMD160 via ARCHIVE_CRYPTO_RMD160_OPENSSL supported. */ +/* #undef ARCHIVE_CRYPTO_RMD160_OPENSSL */ + +/* SHA1 via ARCHIVE_CRYPTO_SHA1_LIBC supported. */ +/* #undef ARCHIVE_CRYPTO_SHA1_LIBC */ + +/* SHA1 via ARCHIVE_CRYPTO_SHA1_LIBSYSTEM supported. */ +/* #undef ARCHIVE_CRYPTO_SHA1_LIBSYSTEM */ + +/* SHA1 via ARCHIVE_CRYPTO_SHA1_MBEDTLS supported. */ +/* #undef ARCHIVE_CRYPTO_SHA1_MBEDTLS */ + +/* SHA1 via ARCHIVE_CRYPTO_SHA1_NETTLE supported. */ +/* #undef ARCHIVE_CRYPTO_SHA1_NETTLE */ + +/* SHA1 via ARCHIVE_CRYPTO_SHA1_OPENSSL supported. */ +/* #undef ARCHIVE_CRYPTO_SHA1_OPENSSL */ + +/* SHA1 via ARCHIVE_CRYPTO_SHA1_WIN supported. */ +/* #undef ARCHIVE_CRYPTO_SHA1_WIN */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_LIBC supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_LIBC */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_LIBC2 supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_LIBC2 */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_LIBC3 supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_LIBC3 */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_LIBSYSTEM supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_LIBSYSTEM */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_MBEDTLS supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_MBEDTLS */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_NETTLE supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_NETTLE */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_OPENSSL supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_OPENSSL */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_WIN supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_WIN */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_LIBC supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_LIBC */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_LIBC2 supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_LIBC2 */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_LIBC3 supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_LIBC3 */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_LIBSYSTEM supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_LIBSYSTEM */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_MBEDTLS supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_MBEDTLS */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_NETTLE supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_NETTLE */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_OPENSSL supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_OPENSSL */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_WIN supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_WIN */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_LIBC supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_LIBC */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_LIBC2 supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_LIBC2 */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_LIBC3 supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_LIBC3 */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_LIBSYSTEM supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_LIBSYSTEM */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_MBEDTLS supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_MBEDTLS */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_NETTLE supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_NETTLE */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_OPENSSL supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_OPENSSL */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_WIN supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_WIN */ + +/* AIX xattr support */ +/* #undef ARCHIVE_XATTR_AIX */ + +/* Darwin xattr support */ +/* #undef ARCHIVE_XATTR_DARWIN */ + +/* FreeBSD xattr support */ +/* #undef ARCHIVE_XATTR_FREEBSD */ + +/* Linux xattr support */ +/* #undef ARCHIVE_XATTR_LINUX */ + +/* Version number of bsdcpio */ +#define BSDCPIO_VERSION_STRING "3.7.0" + +/* Version number of bsdtar */ +#define BSDTAR_VERSION_STRING "3.7.0" + +/* Version number of bsdcat */ +#define BSDCAT_VERSION_STRING "3.7.0" + +/* Define to 1 if you have the `acl_create_entry' function. */ +/* #undef HAVE_ACL_CREATE_ENTRY */ + +/* Define to 1 if you have the `acl_get_fd_np' function. */ +/* #undef HAVE_ACL_GET_FD_NP */ + +/* Define to 1 if you have the `acl_get_link' function. */ +/* #undef HAVE_ACL_GET_LINK */ + +/* Define to 1 if you have the `acl_get_link_np' function. */ +/* #undef HAVE_ACL_GET_LINK_NP */ + +/* Define to 1 if you have the `acl_get_perm' function. */ +/* #undef HAVE_ACL_GET_PERM */ + +/* Define to 1 if you have the `acl_get_perm_np' function. */ +/* #undef HAVE_ACL_GET_PERM_NP */ + +/* Define to 1 if you have the `acl_init' function. */ +/* #undef HAVE_ACL_INIT */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ACL_LIBACL_H */ + +/* Define to 1 if the system has the type `acl_permset_t'. */ +/* #undef HAVE_ACL_PERMSET_T */ + +/* Define to 1 if you have the `acl_set_fd' function. */ +/* #undef HAVE_ACL_SET_FD */ + +/* Define to 1 if you have the `acl_set_fd_np' function. */ +/* #undef HAVE_ACL_SET_FD_NP */ + +/* Define to 1 if you have the `acl_set_file' function. */ +/* #undef HAVE_ACL_SET_FILE */ + +/* Define to 1 if you have the `arc4random_buf' function. */ +/* #undef HAVE_ARC4RANDOM_BUF */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ATTR_XATTR_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_BCRYPT_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_BSDXML_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_BZLIB_H */ + +/* Define to 1 if you have the `chflags' function. */ +/* #undef HAVE_CHFLAGS */ + +/* Define to 1 if you have the `chown' function. */ +#define HAVE_CHOWN 1 + +/* Define to 1 if you have the `chroot' function. */ +#define HAVE_CHROOT 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_COPYFILE_H */ + +/* Define to 1 if you have the `ctime_r' function. */ +#define HAVE_CTIME_R 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_CTYPE_H 1 + +/* Define to 1 if you have the `cygwin_conv_path' function. */ +/* #undef HAVE_CYGWIN_CONV_PATH */ + +/* Define to 1 if you have the declaration of `ACE_GETACL', and to 0 if you + don't. */ +/* #undef HAVE_DECL_ACE_GETACL */ + +/* Define to 1 if you have the declaration of `ACE_GETACLCNT', and to 0 if you + don't. */ +/* #undef HAVE_DECL_ACE_GETACLCNT */ + +/* Define to 1 if you have the declaration of `ACE_SETACL', and to 0 if you + don't. */ +/* #undef HAVE_DECL_ACE_SETACL */ + +/* Define to 1 if you have the declaration of `ACL_SYNCHRONIZE', and to 0 if + you don't. */ +/* #undef HAVE_DECL_ACL_SYNCHRONIZE */ + +/* Define to 1 if you have the declaration of `ACL_TYPE_EXTENDED', and to 0 if + you don't. */ +/* #undef HAVE_DECL_ACL_TYPE_EXTENDED */ + +/* Define to 1 if you have the declaration of `ACL_TYPE_NFS4', and to 0 if you + don't. */ +/* #undef HAVE_DECL_ACL_TYPE_NFS4 */ + +/* Define to 1 if you have the declaration of `ACL_USER', and to 0 if you + don't. */ +/* #undef HAVE_DECL_ACL_USER */ + +/* Define to 1 if you have the declaration of `INT32_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_INT32_MAX 1 + +/* Define to 1 if you have the declaration of `INT32_MIN', and to 0 if you + don't. */ +#define HAVE_DECL_INT32_MIN 1 + +/* Define to 1 if you have the declaration of `INT64_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_INT64_MAX 1 + +/* Define to 1 if you have the declaration of `INT64_MIN', and to 0 if you + don't. */ +#define HAVE_DECL_INT64_MIN 1 + +/* Define to 1 if you have the declaration of `INTMAX_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_INTMAX_MAX 1 + +/* Define to 1 if you have the declaration of `INTMAX_MIN', and to 0 if you + don't. */ +#define HAVE_DECL_INTMAX_MIN 1 + +/* Define to 1 if you have the declaration of `SETACL', and to 0 if you don't. + */ +/* #undef HAVE_DECL_SETACL */ + +/* Define to 1 if you have the declaration of `SIZE_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_SIZE_MAX 1 + +/* Define to 1 if you have the declaration of `SSIZE_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_SSIZE_MAX 1 + +/* Define to 1 if you have the declaration of `strerror_r', and to 0 if you + don't. */ +#define HAVE_DECL_STRERROR_R 1 + +/* Define to 1 if you have the declaration of `UINT32_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_UINT32_MAX 1 + +/* Define to 1 if you have the declaration of `UINT64_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_UINT64_MAX 1 + +/* Define to 1 if you have the declaration of `UINTMAX_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_UINTMAX_MAX 1 + +/* Define to 1 if you have the declaration of `XATTR_NOFOLLOW', and to 0 if + you don't. */ +/* #undef HAVE_DECL_XATTR_NOFOLLOW */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_DIRECT_H */ + +/* Define to 1 if you have the header file, and it defines `DIR'. + */ +#define HAVE_DIRENT_H 1 + +/* Define to 1 if you have the `dirfd' function. */ +#define HAVE_DIRFD 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DLFCN_H 1 + +/* Define to 1 if you don't have `vprintf' but do have `_doprnt.' */ +/* #undef HAVE_DOPRNT */ + +/* Define to 1 if nl_langinfo supports D_MD_ORDER */ +/* #undef HAVE_D_MD_ORDER */ + +/* A possible errno value for invalid file format errors */ +/* #undef HAVE_EFTYPE */ + +/* A possible errno value for invalid file format errors */ +#define HAVE_EILSEQ 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_ERRNO_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_EXPAT_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_EXT2FS_EXT2_FS_H */ + +/* Define to 1 if you have the `extattr_get_file' function. */ +/* #undef HAVE_EXTATTR_GET_FILE */ + +/* Define to 1 if you have the `extattr_list_file' function. */ +/* #undef HAVE_EXTATTR_LIST_FILE */ + +/* Define to 1 if you have the `extattr_set_fd' function. */ +/* #undef HAVE_EXTATTR_SET_FD */ + +/* Define to 1 if you have the `extattr_set_file' function. */ +/* #undef HAVE_EXTATTR_SET_FILE */ + +/* Define to 1 if EXTATTR_NAMESPACE_USER is defined in sys/extattr.h. */ +/* #undef HAVE_DECL_EXTATTR_NAMESPACE_USER */ + +/* Define to 1 if you have the declaration of `GETACL', and to 0 if you don't. + */ +/* #undef HAVE_DECL_GETACL */ + +/* Define to 1 if you have the declaration of `GETACLCNT', and to 0 if you + don't. */ +/* #undef HAVE_DECL_GETACLCNT */ + +/* Define to 1 if you have the `fchdir' function. */ +#define HAVE_FCHDIR 1 + +/* Define to 1 if you have the `fchflags' function. */ +/* #undef HAVE_FCHFLAGS */ + +/* Define to 1 if you have the `fchmod' function. */ +#define HAVE_FCHMOD 1 + +/* Define to 1 if you have the `fchown' function. */ +#define HAVE_FCHOWN 1 + +/* Define to 1 if you have the `fcntl' function. */ +#define HAVE_FCNTL 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_FCNTL_H 1 + +/* Define to 1 if you have the `fdopendir' function. */ +#define HAVE_FDOPENDIR 1 + +/* Define to 1 if you have the `fgetea' function. */ +/* #undef HAVE_FGETEA */ + +/* Define to 1 if you have the `fgetxattr' function. */ +/* #undef HAVE_FGETXATTR */ + +/* Define to 1 if you have the `flistea' function. */ +/* #undef HAVE_FLISTEA */ + +/* Define to 1 if you have the `flistxattr' function. */ +#define HAVE_FLISTXATTR 1 + +/* Define to 1 if you have the `fnmatch' function. */ +#define HAVE_FNMATCH 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_FNMATCH_H 1 + +/* Define to 1 if you have the `fork' function. */ +#define HAVE_FORK 1 + +/* Define to 1 if fseeko (and presumably ftello) exists and is declared. */ +#define HAVE_FSEEKO 1 + +/* Define to 1 if you have the `fsetea' function. */ +/* #undef HAVE_FSETEA */ + +/* Define to 1 if you have the `fsetxattr' function. */ +/* #undef HAVE_FSETXATTR */ + +/* Define to 1 if you have the `fstat' function. */ +#define HAVE_FSTAT 1 + +/* Define to 1 if you have the `fstatat' function. */ +#define HAVE_FSTATAT 1 + +/* Define to 1 if you have the `fstatfs' function. */ +#define HAVE_FSTATFS 1 + +/* Define to 1 if you have the `fstatvfs' function. */ +#define HAVE_FSTATVFS 1 + +/* Define to 1 if you have the `ftruncate' function. */ +#define HAVE_FTRUNCATE 1 + +/* Define to 1 if you have the `futimens' function. */ +#define HAVE_FUTIMENS 1 + +/* Define to 1 if you have the `futimes' function. */ +#define HAVE_FUTIMES 1 + +/* Define to 1 if you have the `futimesat' function. */ +/* #undef HAVE_FUTIMESAT */ + +/* Define to 1 if you have the `getea' function. */ +/* #undef HAVE_GETEA */ + +/* Define to 1 if you have the `geteuid' function. */ +#define HAVE_GETEUID 1 + +/* Define to 1 if you have the `getgrgid_r' function. */ +#define HAVE_GETGRGID_R 1 + +/* Define to 1 if you have the `getgrnam_r' function. */ +#define HAVE_GETGRNAM_R 1 + +/* Define to 1 if platform uses `optreset` to reset `getopt` */ +#define HAVE_GETOPT_OPTRESET 1 + +/* Define to 1 if you have the `getpid' function. */ +#define HAVE_GETPID 1 + +/* Define to 1 if you have the `getpwnam_r' function. */ +#define HAVE_GETPWNAM_R 1 + +/* Define to 1 if you have the `getpwuid_r' function. */ +#define HAVE_GETPWUID_R 1 + +/* Define to 1 if you have the `getvfsbyname' function. */ +/* #undef HAVE_GETVFSBYNAME */ + +/* Define to 1 if you have the `getxattr' function. */ +#define HAVE_GETXATTR 1 + +/* Define to 1 if you have the `gmtime_r' function. */ +#define HAVE_GMTIME_R 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_GRP_H 1 + +/* Define to 1 if you have the `iconv' function. */ +/* #undef HAVE_ICONV */ + +/* Define to 1 if you have the header file. */ +#define HAVE_ICONV_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_IO_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_LANGINFO_H 1 + +/* Define to 1 if you have the `lchflags' function. */ +/* #undef HAVE_LCHFLAGS */ + +/* Define to 1 if you have the `lchmod' function. */ +#define HAVE_LCHMOD 1 + +/* Define to 1 if you have the `lchown' function. */ +#define HAVE_LCHOWN 1 + +/* Define to 1 if you have the `lgetea' function. */ +/* #undef HAVE_LGETEA */ + +/* Define to 1 if you have the `lgetxattr' function. */ +#define HAVE_LGETXATTR 1 + +/* Define to 1 if you have the `acl' library (-lacl). */ +/* #undef HAVE_LIBACL */ + +/* Define to 1 if you have the `attr' library (-lattr). */ +/* #undef HAVE_LIBATTR */ + +/* Define to 1 if you have the `bsdxml' library (-lbsdxml). */ +/* #undef HAVE_LIBBSDXML */ + +/* Define to 1 if you have the `bz2' library (-lbz2). */ +/* #undef HAVE_LIBBZ2 */ + +/* Define to 1 if you have the `b2' library (-lb2). */ +/* #undef HAVE_LIBB2 */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_BLAKE2_H */ + +/* Define to 1 if you have the `charset' library (-lcharset). */ +/* #undef HAVE_LIBCHARSET */ + +/* Define to 1 if you have the `crypto' library (-lcrypto). */ +/* #undef HAVE_LIBCRYPTO */ + +/* Define to 1 if you have the `expat' library (-lexpat). */ +/* #undef HAVE_LIBEXPAT */ + +/* Define to 1 if you have the `gcc' library (-lgcc). */ +/* #undef HAVE_LIBGCC */ + +/* Define to 1 if you have the `lz4' library (-llz4). */ +/* #undef HAVE_LIBLZ4 */ + +/* Define to 1 if you have the `lzma' library (-llzma). */ +/* #undef HAVE_LIBLZMA */ + +/* Define to 1 if you have the `lzmadec' library (-llzmadec). */ +/* #undef HAVE_LIBLZMADEC */ + +/* Define to 1 if you have the `lzo2' library (-llzo2). */ +/* #undef HAVE_LIBLZO2 */ + +/* Define to 1 if you have the `mbedcrypto' library (-lmbedcrypto). */ +/* #undef HAVE_LIBMBEDCRYPTO */ + +/* Define to 1 if you have the `nettle' library (-lnettle). */ +/* #undef HAVE_LIBNETTLE */ + +/* Define to 1 if you have the `pcre' library (-lpcre). */ +/* #undef HAVE_LIBPCRE */ + +/* Define to 1 if you have the `pcreposix' library (-lpcreposix). */ +/* #undef HAVE_LIBPCREPOSIX */ + +/* Define to 1 if you have the `xml2' library (-lxml2). */ +#define HAVE_LIBXML2 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LIBXML_XMLREADER_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LIBXML_XMLWRITER_H */ + +/* Define to 1 if you have the `z' library (-lz). */ +/* #undef HAVE_LIBZ */ + +/* Define to 1 if you have the `zstd' library (-lzstd). */ +/* #undef HAVE_LIBZSTD */ + +/* Define to 1 if you have the `zstd' library (-lzstd) with compression + support. */ +/* #undef HAVE_LIBZSTD_COMPRESSOR */ + +/* Define to 1 if you have the header file. */ +#define HAVE_LIMITS_H 1 + +/* Define to 1 if you have the `link' function. */ +#define HAVE_LINK 1 + +/* Define to 1 if you have the `linkat' function. */ +#define HAVE_LINKAT 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LINUX_FIEMAP_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LINUX_FS_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LINUX_MAGIC_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LINUX_TYPES_H */ + +/* Define to 1 if you have the `listea' function. */ +/* #undef HAVE_LISTEA */ + +/* Define to 1 if you have the `listxattr' function. */ +#define HAVE_LISTXATTR 1 + +/* Define to 1 if you have the `llistea' function. */ +/* #undef HAVE_LLISTEA */ + +/* Define to 1 if you have the `llistxattr' function. */ +#define HAVE_LLISTXATTR 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LOCALCHARSET_H */ + +/* Define to 1 if you have the `locale_charset' function. */ +/* #undef HAVE_LOCALE_CHARSET */ + +/* Define to 1 if you have the header file. */ +#define HAVE_LOCALE_H 1 + +/* Define to 1 if you have the `localtime_r' function. */ +#define HAVE_LOCALTIME_R 1 + +/* Define to 1 if the system has the type `long long int'. */ +/* #undef HAVE_LONG_LONG_INT */ + +/* Define to 1 if you have the `lsetea' function. */ +/* #undef HAVE_LSETEA */ + +/* Define to 1 if you have the `lsetxattr' function. */ +#define HAVE_LSETXATTR 1 + +/* Define to 1 if you have the `lstat' function. */ +#define HAVE_LSTAT 1 + +/* Define to 1 if `lstat' has the bug that it succeeds when given the + zero-length file name argument. */ +/* #undef HAVE_LSTAT_EMPTY_STRING_BUG */ + +/* Define to 1 if you have the `lutimes' function. */ +#define HAVE_LUTIMES 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZ4HC_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZ4_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZMADEC_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZMA_H */ + +/* Define to 1 if you have a working `lzma_stream_encoder_mt' function. */ +/* #undef HAVE_LZMA_STREAM_ENCODER_MT */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZO_LZO1X_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZO_LZOCONF_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_MBEDTLS_AES_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_MBEDTLS_MD_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_MBEDTLS_PKCS5_H */ + +/* Define to 1 if you have the `mbrtowc' function. */ +/* #undef HAVE_MBRTOWC */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_MEMBERSHIP_H */ + +/* Define to 1 if you have the `memmove' function. */ +#define HAVE_MEMMOVE 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have the `mkdir' function. */ +#define HAVE_MKDIR 1 + +/* Define to 1 if you have the `mkfifo' function. */ +#define HAVE_MKFIFO 1 + +/* Define to 1 if you have the `mknod' function. */ +#define HAVE_MKNOD 1 + +/* Define to 1 if you have the `mkstemp' function. */ +#define HAVE_MKSTEMP 1 + +/* Define to 1 if you have the header file, and it defines `DIR'. */ +/* #undef HAVE_NDIR_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NETTLE_AES_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NETTLE_HMAC_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NETTLE_MD5_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NETTLE_PBKDF2_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NETTLE_RIPEMD160_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NETTLE_SHA_H */ + +/* Define to 1 if you have the `nl_langinfo' function. */ +/* #undef HAVE_NL_LANGINFO */ + +/* Define to 1 if you have the `openat' function. */ +#define HAVE_OPENAT 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_OPENSSL_EVP_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_PATHS_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_PCREPOSIX_H */ + +/* Define to 1 if you have the `pipe' function. */ +#define HAVE_PIPE 1 + +/* Define to 1 if you have the `PKCS5_PBKDF2_HMAC_SHA1' function. */ +/* #undef HAVE_PKCS5_PBKDF2_HMAC_SHA1 */ + +/* Define to 1 if you have the `poll' function. */ +#define HAVE_POLL 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_POLL_H 1 + +/* Define to 1 if you have the `posix_spawnp' function. */ +#define HAVE_POSIX_SPAWNP 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_PROCESS_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_PTHREAD_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_PWD_H 1 + +/* Define to 1 if you have the `readdir_r' function. */ +#define HAVE_READDIR_R 1 + +/* Define to 1 if you have the `readlink' function. */ +#define HAVE_READLINK 1 + +/* Define to 1 if you have the `readlinkat' function. */ +#define HAVE_READLINKAT 1 + +/* Define to 1 if you have the `readpassphrase' function. */ +/* #undef HAVE_READPASSPHRASE */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_READPASSPHRASE_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_REGEX_H 1 + +/* Define to 1 if you have the `select' function. */ +#define HAVE_SELECT 1 + +/* Define to 1 if you have the `setenv' function. */ +#define HAVE_SETENV 1 + +/* Define to 1 if you have the `setlocale' function. */ +#define HAVE_SETLOCALE 1 + +/* Define to 1 if you have the `sigaction' function. */ +#define HAVE_SIGACTION 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SIGNAL_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SPAWN_H 1 + +/* Define to 1 if you have the `statfs' function. */ +#define HAVE_STATFS 1 + +/* Define to 1 if you have the `statvfs' function. */ +#define HAVE_STATVFS 1 + +/* Define to 1 if `stat' has the bug that it succeeds when given the + zero-length file name argument. */ +/* #undef HAVE_STAT_EMPTY_STRING_BUG */ + +/* Define to 1 if you have the header file. */ +#define HAVE_STDARG_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the `strchr' function. */ +#define HAVE_STRCHR 1 + +/* Define to 1 if you have the `strnlen' function. */ +#define HAVE_STRNLEN 1 + +/* Define to 1 if you have the `strdup' function. */ +#define HAVE_STRDUP 1 + +/* Define to 1 if you have the `strerror' function. */ +#define HAVE_STRERROR 1 + +/* Define to 1 if you have the `strerror_r' function. */ +#define HAVE_STRERROR_R 1 + +/* Define to 1 if you have the `strftime' function. */ +#define HAVE_STRFTIME 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the `strrchr' function. */ +#define HAVE_STRRCHR 1 + +/* Define to 1 if `f_namemax' is a member of `struct statfs'. */ +/* #undef HAVE_STRUCT_STATFS_F_NAMEMAX */ + +/* Define to 1 if `f_iosize' is a member of `struct statvfs'. */ +/* #undef HAVE_STRUCT_STATVFS_F_IOSIZE */ + +/* Define to 1 if `st_birthtime' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_BIRTHTIME */ + +/* Define to 1 if `st_birthtimespec.tv_nsec' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_BIRTHTIMESPEC_TV_NSEC */ + +/* Define to 1 if `st_blksize' is a member of `struct stat'. */ +#define HAVE_STRUCT_STAT_ST_BLKSIZE 1 + +/* Define to 1 if `st_flags' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_FLAGS */ + +/* Define to 1 if `st_mtimespec.tv_nsec' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC */ + +/* Define to 1 if `st_mtime_n' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_MTIME_N */ + +/* Define to 1 if `st_mtime_usec' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_MTIME_USEC */ + +/* Define to 1 if `st_mtim.tv_nsec' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC */ + +/* Define to 1 if `st_umtime' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_UMTIME */ + +/* Define to 1 if `tm_gmtoff' is a member of `struct tm'. */ +#define HAVE_STRUCT_TM_TM_GMTOFF 1 + +/* Define to 1 if `__tm_gmtoff' is a member of `struct tm'. */ +/* #undef HAVE_STRUCT_TM___TM_GMTOFF */ + +/* Define to 1 if you have `struct vfsconf'. */ +/* #undef HAVE_STRUCT_VFSCONF */ + +/* Define to 1 if you have `struct xvfsconf'. */ +/* #undef HAVE_STRUCT_XVFSCONF */ + +/* Define to 1 if you have the `symlink' function. */ +#define HAVE_SYMLINK 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_ACL_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_CDEFS_H */ + +/* Define to 1 if you have the header file, and it defines `DIR'. + */ +/* #undef HAVE_SYS_DIR_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_EA_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_EXTATTR_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_IOCTL_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_MKDEV_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_MOUNT_H */ + +/* Define to 1 if you have the header file, and it defines `DIR'. + */ +/* #undef HAVE_SYS_NDIR_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_PARAM_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_POLL_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_QUEUE_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_RICHACL_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_SELECT_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_STATFS_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STATVFS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_SYSMACROS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TIME_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_UTIME_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_UTSNAME_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_VFS_H 1 + +/* Define to 1 if you have that is POSIX.1 compatible. */ +#define HAVE_SYS_WAIT_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_XATTR_H */ + +/* Define to 1 if you have the `timegm' function. */ +#define HAVE_TIMEGM 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_TIME_H 1 + +/* Define to 1 if you have the `tzset' function. */ +#define HAVE_TZSET 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to 1 if you have the `unlinkat' function. */ +#define HAVE_UNLINKAT 1 + +/* Define to 1 if you have the `unsetenv' function. */ +#define HAVE_UNSETENV 1 + +/* Define to 1 if the system has the type `unsigned long long'. */ +/* #undef HAVE_UNSIGNED_LONG_LONG */ + +/* Define to 1 if the system has the type `unsigned long long int'. */ +/* #undef HAVE_UNSIGNED_LONG_LONG_INT */ + +/* Define to 1 if you have the `utime' function. */ +#define HAVE_UTIME 1 + +/* Define to 1 if you have the `utimensat' function. */ +#define HAVE_UTIMENSAT 1 + +/* Define to 1 if you have the `utimes' function. */ +#define HAVE_UTIMES 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UTIME_H 1 + +/* Define to 1 if you have the `vfork' function. */ +#define HAVE_VFORK 1 + +/* Define to 1 if you have the `vprintf' function. */ +#define HAVE_VPRINTF 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_WCHAR_H 1 + +/* Define to 1 if the system has the type `wchar_t'. */ +#define HAVE_WCHAR_T 1 + +/* Define to 1 if you have the `wcrtomb' function. */ +#define HAVE_WCRTOMB 1 + +/* Define to 1 if you have the `wcscmp' function. */ +#define HAVE_WCSCMP 1 + +/* Define to 1 if you have the `wcscpy' function. */ +#define HAVE_WCSCPY 1 + +/* Define to 1 if you have the `wcslen' function. */ +#define HAVE_WCSLEN 1 + +/* Define to 1 if you have the `wctomb' function. */ +#define HAVE_WCTOMB 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_WCTYPE_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WINCRYPT_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WINDOWS_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WINIOCTL_H */ + +/* Define to 1 if you have _CrtSetReportMode in */ +/* #undef HAVE__CrtSetReportMode */ + +/* Define to 1 if you have the `wmemcmp' function. */ +#define HAVE_WMEMCMP 1 + +/* Define to 1 if you have the `wmemcpy' function. */ +#define HAVE_WMEMCPY 1 + +/* Define to 1 if you have the `wmemmove' function. */ +#define HAVE_WMEMMOVE 1 + +/* Define to 1 if you have a working EXT2_IOC_GETFLAGS */ +/* #undef HAVE_WORKING_EXT2_IOC_GETFLAGS */ + +/* Define to 1 if you have a working FS_IOC_GETFLAGS */ +#define HAVE_WORKING_FS_IOC_GETFLAGS 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ZLIB_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ZSTD_H */ + +/* Define to 1 if you have the `ctime_s' function. */ +/* #undef HAVE_CTIME_S */ + +/* Define to 1 if you have the `_fseeki64' function. */ +/* #undef HAVE__FSEEKI64 */ + +/* Define to 1 if you have the `_get_timezone' function. */ +/* #undef HAVE__GET_TIMEZONE */ + +/* Define to 1 if you have the `gmtime_s' function. */ +/* #undef HAVE_GMTIME_S */ + +/* Define to 1 if you have the `localtime_s' function. */ +/* #undef HAVE_LOCALTIME_S */ + +/* Define to 1 if you have the `_mkgmtime' function. */ +/* #undef HAVE__MKGMTIME */ + +/* Define as const if the declaration of iconv() needs const. */ +#define ICONV_CONST + +/* Version number of libarchive as a single integer */ +#define LIBARCHIVE_VERSION_NUMBER "3007000" + +/* Version number of libarchive */ +#define LIBARCHIVE_VERSION_STRING "3.7.0" + +/* Define to 1 if `lstat' dereferences a symlink specified with a trailing + slash. */ +/* #undef LSTAT_FOLLOWS_SLASHED_SYMLINK */ + +/* Define to 1 if `major', `minor', and `makedev' are declared in . + */ +/* #undef MAJOR_IN_MKDEV */ + +/* Define to 1 if `major', `minor', and `makedev' are declared in + . */ +/* #undef MAJOR_IN_SYSMACROS */ + +/* Define to 1 if your C compiler doesn't accept -c and -o together. */ +/* #undef NO_MINUS_C_MINUS_O */ + +/* The size of `wchar_t', as computed by sizeof. */ +#define SIZEOF_WCHAR_T 4 + +/* Define to 1 if strerror_r returns char *. */ +/* #undef STRERROR_R_CHAR_P */ + +/* Define to 1 if you can safely include both and . */ +/* #undef TIME_WITH_SYS_TIME */ + +/* + * Some platform requires a macro to use extension functions. + */ +#define SAFE_TO_DEFINE_EXTENSIONS 1 +#ifdef SAFE_TO_DEFINE_EXTENSIONS +/* Enable extensions on AIX 3, Interix. */ +#ifndef _ALL_SOURCE +# define _ALL_SOURCE 1 +#endif +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# define _GNU_SOURCE 1 +#endif +/* Enable threading extensions on Solaris. */ +#ifndef _POSIX_PTHREAD_SEMANTICS +# define _POSIX_PTHREAD_SEMANTICS 1 +#endif +/* Enable extensions on HP NonStop. */ +#ifndef _TANDEM_SOURCE +# define _TANDEM_SOURCE 1 +#endif +/* Enable general extensions on Solaris. */ +#ifndef __EXTENSIONS__ +# define __EXTENSIONS__ 1 +#endif +#endif /* SAFE_TO_DEFINE_EXTENSIONS */ + +/* Version number of package */ +#define VERSION "3.7.0" + +/* Number of bits in a file offset, on hosts where this is settable. */ +/* #undef _FILE_OFFSET_BITS */ + +/* Define to 1 to make fseeko visible on some hosts (e.g. glibc 2.2). */ +/* #undef _LARGEFILE_SOURCE */ + +/* Define for large files, on AIX-style hosts. */ +/* #undef _LARGE_FILES */ + +/* Define to control Windows SDK version */ +#ifndef NTDDI_VERSION +/* #undef NTDDI_VERSION */ +#endif // NTDDI_VERSION + +#ifndef _WIN32_WINNT +/* #undef _WIN32_WINNT */ +#endif // _WIN32_WINNT + +#ifndef WINVER +/* #undef WINVER */ +#endif // WINVER + +/* Define to empty if `const' does not conform to ANSI C. */ +/* #undef const */ + +/* Define to `int' if doesn't define. */ +/* #undef gid_t */ + +/* Define to `unsigned long' if does not define. */ +/* #undef id_t */ + +/* Define to `int' if does not define. */ +/* #undef mode_t */ + +/* Define to `long long' if does not define. */ +/* #undef off_t */ + +/* Define to `int' if doesn't define. */ +/* #undef pid_t */ + +/* Define to `unsigned int' if does not define. */ +/* #undef size_t */ + +/* Define to `int' if does not define. */ +/* #undef ssize_t */ + +/* Define to `int' if doesn't define. */ +/* #undef uid_t */ + +/* Define to `int' if does not define. */ +/* #undef intptr_t */ + +/* Define to `unsigned int' if does not define. */ +/* #undef uintptr_t */ diff --git a/contrib/libmetrohash/src/platform.h b/contrib/libmetrohash/src/platform.h index bc00e5a286b..9e83d11cb7c 100644 --- a/contrib/libmetrohash/src/platform.h +++ b/contrib/libmetrohash/src/platform.h @@ -17,7 +17,8 @@ #ifndef METROHASH_PLATFORM_H #define METROHASH_PLATFORM_H -#include +#include +#include #include // rotate right idiom recognized by most compilers @@ -33,6 +34,11 @@ inline static uint64_t read_u64(const void * const ptr) // so we use memcpy() which is the most portable. clang & gcc usually translates `memcpy()` into a single `load` instruction // when hardware supports it, so using memcpy() is efficient too. memcpy(&result, ptr, sizeof(result)); + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + result = std::byteswap(result); +#endif + return result; } @@ -40,6 +46,11 @@ inline static uint64_t read_u32(const void * const ptr) { uint32_t result; memcpy(&result, ptr, sizeof(result)); + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + result = std::byteswap(result); +#endif + return result; } @@ -47,6 +58,11 @@ inline static uint64_t read_u16(const void * const ptr) { uint16_t result; memcpy(&result, ptr, sizeof(result)); + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + result = std::byteswap(result); +#endif + return result; } diff --git a/contrib/llvm-project b/contrib/llvm-project index d857c707fcc..e7b8befca85 160000 --- a/contrib/llvm-project +++ b/contrib/llvm-project @@ -1 +1 @@ -Subproject commit d857c707fccd50423bea1c4710dc469cf89607a9 +Subproject commit e7b8befca85c8b847614432dba250c22d35fbae0 diff --git a/contrib/llvm-project-cmake/CMakeLists.txt b/contrib/llvm-project-cmake/CMakeLists.txt index fe6cffd33e2..00992f4f792 100644 --- a/contrib/llvm-project-cmake/CMakeLists.txt +++ b/contrib/llvm-project-cmake/CMakeLists.txt @@ -1,18 +1,16 @@ -if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined") +if (APPLE OR SANITIZE STREQUAL "undefined") set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF) else() set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON) endif() -option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT}) +option (ENABLE_EMBEDDED_COMPILER "Enable support for JIT compilation during query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT}) if (NOT ENABLE_EMBEDDED_COMPILER) message(STATUS "Not using LLVM") return() endif() -# TODO: Enable compilation on AArch64 - set (LLVM_VERSION "15.0.0bundled") set (LLVM_INCLUDE_DIRS "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/llvm/include" @@ -58,18 +56,30 @@ set (REQUIRED_LLVM_LIBRARIES LLVMDemangle ) -# if (ARCH_AMD64) +if (ARCH_AMD64) + set (LLVM_TARGETS_TO_BUILD "X86" CACHE INTERNAL "") list(APPEND REQUIRED_LLVM_LIBRARIES LLVMX86Info LLVMX86Desc LLVMX86CodeGen) -# elseif (ARCH_AARCH64) -# list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen) -# endif () +elseif (ARCH_AARCH64) + set (LLVM_TARGETS_TO_BUILD "AArch64" CACHE INTERNAL "") + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen) +elseif (ARCH_PPC64LE) + set (LLVM_TARGETS_TO_BUILD "PowerPC" CACHE INTERNAL "") + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMPowerPCInfo LLVMPowerPCDesc LLVMPowerPCCodeGen) +elseif (ARCH_S390X) + set (LLVM_TARGETS_TO_BUILD "SystemZ" CACHE INTERNAL "") + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMSystemZInfo LLVMSystemZDesc LLVMSystemZCodeGen) +elseif (ARCH_RISCV64) + set (LLVM_TARGETS_TO_BUILD "RISCV" CACHE INTERNAL "") + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMRISCVInfo LLVMRISCVDesc LLVMRISCVCodeGen) +endif () + +message (STATUS "LLVM TARGETS TO BUILD ${LLVM_TARGETS_TO_BUILD}") set (CMAKE_INSTALL_RPATH "ON") # Do not adjust RPATH in llvm, since then it will not be able to find libcxx/libcxxabi/libunwind set (LLVM_COMPILER_CHECKED 1 CACHE INTERNAL "") # Skip internal compiler selection set (LLVM_ENABLE_EH 1 CACHE INTERNAL "") # With exception handling set (LLVM_ENABLE_RTTI 1 CACHE INTERNAL "") set (LLVM_ENABLE_PIC 0 CACHE INTERNAL "") -set (LLVM_TARGETS_TO_BUILD "X86" CACHE STRING "") # for x86 + ARM: "X86;AArch64" # Omit unnecessary stuff (just the options which are ON by default) set(LLVM_ENABLE_BACKTRACES 0 CACHE INTERNAL "") @@ -99,15 +109,12 @@ set(LLVM_ENABLE_BINDINGS 0 CACHE INTERNAL "") set (LLVM_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/llvm") set (LLVM_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm") -# Since we always use toolchain files to generate hermatic builds, cmake will -# think it's a cross compilation, and LLVM will try to configure NATIVE LLVM -# targets with all tests enabled, which will slow down cmake configuration and -# compilation (You'll see Building native llvm-tblgen...). Let's disable the -# cross compiling indicator for now. -# -# TODO We should let cmake know whether it's indeed a cross compilation in the -# first place. -set (CMAKE_CROSSCOMPILING 0) +message (STATUS "LLVM CMAKE CROSS COMPILING ${CMAKE_CROSSCOMPILING}") +if (CMAKE_CROSSCOMPILING) + set (LLVM_HOST_TRIPLE "${CMAKE_C_COMPILER_TARGET}" CACHE INTERNAL "") + message (STATUS "CROSS COMPILING SET LLVM HOST TRIPLE ${LLVM_HOST_TRIPLE}") +endif() + add_subdirectory ("${LLVM_SOURCE_DIR}" "${LLVM_BINARY_DIR}") set_directory_properties (PROPERTIES diff --git a/contrib/orc b/contrib/orc index 568d1d60c25..a20d1d9d7ad 160000 --- a/contrib/orc +++ b/contrib/orc @@ -1 +1 @@ -Subproject commit 568d1d60c250af1890f226c182bc15bd8cc94cf1 +Subproject commit a20d1d9d7ad4a4be7b7ba97588e16ca8b9abb2b6 diff --git a/contrib/robin-map b/contrib/robin-map new file mode 160000 index 00000000000..851a59e0e30 --- /dev/null +++ b/contrib/robin-map @@ -0,0 +1 @@ +Subproject commit 851a59e0e3063ee0e23089062090a73fd3de482d diff --git a/contrib/robin-map-cmake/CMakeLists.txt b/contrib/robin-map-cmake/CMakeLists.txt new file mode 100644 index 00000000000..f82ad705dcc --- /dev/null +++ b/contrib/robin-map-cmake/CMakeLists.txt @@ -0,0 +1 @@ +# See contrib/usearch-cmake/CMakeLists.txt diff --git a/contrib/snappy b/contrib/snappy index fb057edfed8..6ebb5b1ab88 160000 --- a/contrib/snappy +++ b/contrib/snappy @@ -1 +1 @@ -Subproject commit fb057edfed820212076239fd32cb2ff23e9016bf +Subproject commit 6ebb5b1ab8801ea3fde103c5c29f5ab86df5fe7a diff --git a/contrib/usearch b/contrib/usearch new file mode 160000 index 00000000000..387b78b28b1 --- /dev/null +++ b/contrib/usearch @@ -0,0 +1 @@ +Subproject commit 387b78b28b17b8954024ffc81e97cbcfa10d1f30 diff --git a/contrib/usearch-cmake/CMakeLists.txt b/contrib/usearch-cmake/CMakeLists.txt new file mode 100644 index 00000000000..29fbe57106c --- /dev/null +++ b/contrib/usearch-cmake/CMakeLists.txt @@ -0,0 +1,17 @@ +set(USEARCH_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/usearch") +set(USEARCH_SOURCE_DIR "${USEARCH_PROJECT_DIR}/include") + +set(FP16_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/FP16") +set(ROBIN_MAP_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/robin-map") +set(SIMSIMD_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/SimSIMD-map") + +add_library(_usearch INTERFACE) + +target_include_directories(_usearch SYSTEM INTERFACE + ${FP16_PROJECT_DIR}/include + ${ROBIN_MAP_PROJECT_DIR}/include + ${SIMSIMD_PROJECT_DIR}/include + ${USEARCH_SOURCE_DIR}) + +add_library(ch_contrib::usearch ALIAS _usearch) +target_compile_definitions(_usearch INTERFACE ENABLE_USEARCH) diff --git a/docker/README.md b/docker/README.md index ec52ddd143e..c1bb3b49f00 100644 --- a/docker/README.md +++ b/docker/README.md @@ -1,5 +1,5 @@ ## ClickHouse Dockerfiles -This directory contain Dockerfiles for `clickhouse-client` and `clickhouse-server`. They are updated in each release. +This directory contain Dockerfiles for `clickhouse-server`. They are updated in each release. -Also there is bunch of images for testing and CI. They are listed in `images.json` file and updated on each commit to master. If you need to add another image, place information about it into `images.json`. +Also, there is a bunch of images for testing and CI. They are listed in `images.json` file and updated on each commit to master. If you need to add another image, place information about it into `images.json`. diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile deleted file mode 100644 index 1c185daec75..00000000000 --- a/docker/client/Dockerfile +++ /dev/null @@ -1,34 +0,0 @@ -FROM ubuntu:18.04 - -# ARG for quick switch to a given ubuntu mirror -ARG apt_archive="http://archive.ubuntu.com" -RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list - -ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/" -ARG version=22.1.1.* - -RUN apt-get update \ - && apt-get install --yes --no-install-recommends \ - apt-transport-https \ - ca-certificates \ - dirmngr \ - gnupg \ - && mkdir -p /etc/apt/sources.list.d \ - && apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 \ - && echo $repository > /etc/apt/sources.list.d/clickhouse.list \ - && apt-get update \ - && env DEBIAN_FRONTEND=noninteractive \ - apt-get install --allow-unauthenticated --yes --no-install-recommends \ - clickhouse-client=$version \ - clickhouse-common-static=$version \ - locales \ - tzdata \ - && rm -rf /var/lib/apt/lists/* /var/cache/debconf \ - && apt-get clean - -RUN locale-gen en_US.UTF-8 -ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US:en -ENV LC_ALL en_US.UTF-8 - -ENTRYPOINT ["/usr/bin/clickhouse-client"] diff --git a/docker/client/README.md b/docker/client/README.md deleted file mode 100644 index bbcc7d60794..00000000000 --- a/docker/client/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# ClickHouse Client Docker Image - -For more information see [ClickHouse Server Docker Image](https://hub.docker.com/r/clickhouse/clickhouse-server/). - -## License - -View [license information](https://github.com/ClickHouse/ClickHouse/blob/master/LICENSE) for the software contained in this image. diff --git a/docker/images.json b/docker/images.json index e8fc329a640..d895e2da2f0 100644 --- a/docker/images.json +++ b/docker/images.json @@ -125,6 +125,7 @@ "docker/test/keeper-jepsen", "docker/test/server-jepsen", "docker/test/sqllogic", + "docker/test/sqltest", "docker/test/stateless" ] }, @@ -155,11 +156,18 @@ }, "docker/docs/builder": { "name": "clickhouse/docs-builder", - "dependent": [ - ] + "dependent": [] }, "docker/test/sqllogic": { "name": "clickhouse/sqllogic-test", "dependent": [] + }, + "docker/test/sqltest": { + "name": "clickhouse/sqltest", + "dependent": [] + }, + "docker/test/integration/nginx_dav": { + "name": "clickhouse/nginx-dav", + "dependent": [] } } diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index c9800e4e66d..c7206550bd8 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ esac ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release" -ARG VERSION="23.7.1.2470" +ARG VERSION="23.7.4.5" ARG PACKAGES="clickhouse-keeper" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/packager/README.md b/docker/packager/README.md index a78feb8d7fc..3a91f9a63f0 100644 --- a/docker/packager/README.md +++ b/docker/packager/README.md @@ -6,7 +6,7 @@ Usage: Build deb package with `clang-14` in `debug` mode: ``` $ mkdir deb/test_output -$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-14 --build-type=debug +$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-14 --debug-build $ ls -l deb/test_output -rw-r--r-- 1 root root 3730 clickhouse-client_22.2.2+debug_all.deb -rw-r--r-- 1 root root 84221888 clickhouse-common-static_22.2.2+debug_amd64.deb diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 99e748c41d4..940daad9c61 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -58,33 +58,6 @@ RUN curl https://sh.rustup.rs -sSf | bash -s -- -y && \ rustup target add aarch64-apple-darwin && \ rustup target add powerpc64le-unknown-linux-gnu -# Create vendor cache for cargo. -# -# Note, that the config.toml for the root is used, you will not be able to -# install any other crates, except those which had been vendored (since if -# there is "replace-with" for some source, then cargo will not look to other -# remotes except this). -# -# Notes for the command itself: -# - --chown is required to preserve the rights -# - unstable-options for -C -# - chmod is required to fix the permissions, since builds are running from a different user -# - copy of the Cargo.lock is required for proper dependencies versions -# - cargo vendor --sync is requried to overcome [1] bug. -# -# [1]: https://github.com/rust-lang/wg-cargo-std-aware/issues/23 -COPY --chown=root:root /rust /rust/packages -RUN cargo -Z unstable-options -C /rust/packages vendor > $CARGO_HOME/config.toml && \ - cp "$(rustc --print=sysroot)"/lib/rustlib/src/rust/Cargo.lock "$(rustc --print=sysroot)"/lib/rustlib/src/rust/library/test/ && \ - cargo -Z unstable-options -C /rust/packages vendor --sync "$(rustc --print=sysroot)"/lib/rustlib/src/rust/library/test/Cargo.toml && \ - rm "$(rustc --print=sysroot)"/lib/rustlib/src/rust/library/test/Cargo.lock && \ - sed -i "s#\"vendor\"#\"/rust/vendor\"#" $CARGO_HOME/config.toml && \ - cat $CARGO_HOME/config.toml && \ - mv /rust/packages/vendor /rust/vendor && \ - chmod -R o=r+X /rust/vendor && \ - ls -R -l /rust/packages && \ - rm -r /rust/packages - # NOTE: Seems like gcc-11 is too new for ubuntu20 repository # A cross-linker for RISC-V 64 (we need it, because LLVM's LLD does not work): RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \ @@ -107,6 +80,14 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \ # Download toolchain and SDK for Darwin RUN curl -sL -O https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11.0.sdk.tar.xz +# Download and install mold 2.0 for s390x build +RUN curl -Lo /tmp/mold.tar.gz "https://github.com/rui314/mold/releases/download/v2.0.0/mold-2.0.0-x86_64-linux.tar.gz" \ + && mkdir /tmp/mold \ + && tar -xzf /tmp/mold.tar.gz -C /tmp/mold \ + && cp -r /tmp/mold/mold*/* /usr \ + && rm -rf /tmp/mold \ + && rm /tmp/mold.tar.gz + # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH ARG NFPM_VERSION=2.20.0 diff --git a/docker/packager/binary/rust b/docker/packager/binary/rust deleted file mode 120000 index 742dc49e9ac..00000000000 --- a/docker/packager/binary/rust +++ /dev/null @@ -1 +0,0 @@ -../../../rust \ No newline at end of file diff --git a/docker/packager/packager b/docker/packager/packager index 399e12e6680..1d0ccae8a69 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -22,7 +22,7 @@ def check_image_exists_locally(image_name: str) -> bool: output = subprocess.check_output( f"docker images -q {image_name} 2> /dev/null", shell=True ) - return output != "" + return output != b"" except subprocess.CalledProcessError: return False @@ -46,7 +46,7 @@ def build_image(image_name: str, filepath: Path) -> None: ) -def pre_build(repo_path: Path, env_variables: List[str]): +def pre_build(repo_path: Path, env_variables: List[str]) -> None: if "WITH_PERFORMANCE=1" in env_variables: current_branch = subprocess.check_output( "git branch --show-current", shell=True, encoding="utf-8" @@ -80,9 +80,12 @@ def run_docker_image_with_env( output_dir: Path, env_variables: List[str], ch_root: Path, + cargo_cache_dir: Path, ccache_dir: Optional[Path], -): +) -> None: output_dir.mkdir(parents=True, exist_ok=True) + cargo_cache_dir.mkdir(parents=True, exist_ok=True) + env_part = " -e ".join(env_variables) if env_part: env_part = " -e " + env_part @@ -104,7 +107,7 @@ def run_docker_image_with_env( cmd = ( f"docker run --network=host --user={user} --rm {ccache_mount}" f"--volume={output_dir}:/output --volume={ch_root}:/build {env_part} " - f"{interactive} {image_name}" + f"--volume={cargo_cache_dir}:/rust/cargo/registry {interactive} {image_name}" ) logging.info("Will build ClickHouse pkg with cmd: '%s'", cmd) @@ -112,12 +115,12 @@ def run_docker_image_with_env( subprocess.check_call(cmd, shell=True) -def is_release_build(build_type: str, package_type: str, sanitizer: str) -> bool: - return build_type == "" and package_type == "deb" and sanitizer == "" +def is_release_build(debug_build: bool, package_type: str, sanitizer: str) -> bool: + return not debug_build and package_type == "deb" and sanitizer == "" def parse_env_variables( - build_type: str, + debug_build: bool, compiler: str, sanitizer: str, package_type: str, @@ -129,9 +132,10 @@ def parse_env_variables( version: str, official: bool, additional_pkgs: bool, + with_profiler: bool, with_coverage: bool, with_binaries: str, -): +) -> List[str]: DARWIN_SUFFIX = "-darwin" DARWIN_ARM_SUFFIX = "-darwin-aarch64" ARM_SUFFIX = "-aarch64" @@ -139,6 +143,7 @@ def parse_env_variables( FREEBSD_SUFFIX = "-freebsd" PPC_SUFFIX = "-ppc64le" RISCV_SUFFIX = "-riscv64" + S390X_SUFFIX = "-s390x" AMD64_COMPAT_SUFFIX = "-amd64-compat" result = [] @@ -152,6 +157,7 @@ def parse_env_variables( is_cross_arm_v80compat = compiler.endswith(ARM_V80COMPAT_SUFFIX) is_cross_ppc = compiler.endswith(PPC_SUFFIX) is_cross_riscv = compiler.endswith(RISCV_SUFFIX) + is_cross_s390x = compiler.endswith(S390X_SUFFIX) is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX) is_amd64_compat = compiler.endswith(AMD64_COMPAT_SUFFIX) @@ -213,6 +219,11 @@ def parse_env_variables( cmake_flags.append( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-riscv64.cmake" ) + elif is_cross_s390x: + cc = compiler[: -len(S390X_SUFFIX)] + cmake_flags.append( + "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-s390x.cmake" + ) elif is_amd64_compat: cc = compiler[: -len(AMD64_COMPAT_SUFFIX)] result.append("DEB_ARCH=amd64") @@ -240,7 +251,7 @@ def parse_env_variables( build_target = ( f"{build_target} clickhouse-odbc-bridge clickhouse-library-bridge" ) - if is_release_build(build_type, package_type, sanitizer): + if is_release_build(debug_build, package_type, sanitizer): cmake_flags.append("-DSPLIT_DEBUG_SYMBOLS=ON") result.append("WITH_PERFORMANCE=1") if is_cross_arm: @@ -255,8 +266,8 @@ def parse_env_variables( if sanitizer: result.append(f"SANITIZER={sanitizer}") - if build_type: - result.append(f"BUILD_TYPE={build_type.capitalize()}") + if debug_build: + result.append("BUILD_TYPE=Debug") else: result.append("BUILD_TYPE=None") @@ -322,6 +333,9 @@ def parse_env_variables( # utils are not included into clickhouse-bundle, so build everything build_target = "all" + if with_profiler: + cmake_flags.append("-DENABLE_BUILD_PROFILING=1") + if with_coverage: cmake_flags.append("-DWITH_COVERAGE=1") @@ -361,7 +375,7 @@ def parse_args() -> argparse.Namespace: help="ClickHouse git repository", ) parser.add_argument("--output-dir", type=dir_name, required=True) - parser.add_argument("--build-type", choices=("debug", ""), default="") + parser.add_argument("--debug-build", action="store_true") parser.add_argument( "--compiler", @@ -373,6 +387,7 @@ def parse_args() -> argparse.Namespace: "clang-16-aarch64-v80compat", "clang-16-ppc64le", "clang-16-riscv64", + "clang-16-s390x", "clang-16-amd64-compat", "clang-16-freebsd", ), @@ -412,10 +427,18 @@ def parse_args() -> argparse.Namespace: action="store_true", help="if set, the build fails on errors writing cache to S3", ) + parser.add_argument( + "--cargo-cache-dir", + default=Path(os.getenv("CARGO_HOME", "") or Path.home() / ".cargo") + / "registry", + type=dir_name, + help="a directory to preserve the rust cargo crates", + ) parser.add_argument("--force-build-image", action="store_true") parser.add_argument("--version") parser.add_argument("--official", action="store_true") parser.add_argument("--additional-pkgs", action="store_true") + parser.add_argument("--with-profiler", action="store_true") parser.add_argument("--with-coverage", action="store_true") parser.add_argument( "--with-binaries", choices=("programs", "tests", ""), default="" @@ -451,7 +474,7 @@ def parse_args() -> argparse.Namespace: return args -def main(): +def main() -> None: logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") args = parse_args() @@ -467,7 +490,7 @@ def main(): build_image(image_with_version, dockerfile) env_prepared = parse_env_variables( - args.build_type, + args.debug_build, args.compiler, args.sanitizer, args.package_type, @@ -479,6 +502,7 @@ def main(): args.version, args.official, args.additional_pkgs, + args.with_profiler, args.with_coverage, args.with_binaries, ) @@ -490,6 +514,7 @@ def main(): args.output_dir, env_prepared, ch_root, + args.cargo_cache_dir, args.ccache_dir, ) logging.info("Output placed into %s", args.output_dir) diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index f558338b23c..a3a1f912e90 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.7.1.2470" +ARG VERSION="23.7.4.5" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 156de034a7f..4936cfeccb0 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -23,7 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.7.1.2470" +ARG VERSION="23.7.4.5" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index 6ba44662af9..b55baa0e0fc 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -19,20 +19,23 @@ RUN apt-get update \ # and MEMORY_LIMIT_EXCEEDED exceptions in Functional tests (total memory limit in Functional tests is ~55.24 GiB). # TSAN will flush shadow memory when reaching this limit. # It may cause false-negatives, but it's better than OOM. -RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'" >> /etc/environment +RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'" >> /etc/environment RUN echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'" >> /etc/environment RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment # Sanitizer options for current shell (not current, but the one that will be spawned on "docker run") # (but w/o verbosity for TSAN, otherwise test.reference will not match) -ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1' +ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1' ENV UBSAN_OPTIONS='print_stacktrace=1' ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8 ENV LC_ALL en_US.UTF-8 -ENV TZ=Europe/Moscow +ENV TZ=Europe/Amsterdam RUN ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone +# This script is used to setup realtime export of server logs from the CI into external ClickHouse cluster: +COPY setup_export_logs.sh / + CMD sleep 1 diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh new file mode 100755 index 00000000000..9a48c0fcafc --- /dev/null +++ b/docker/test/base/setup_export_logs.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +# This script sets up export of system log tables to a remote server. +# Remote tables are created if not exist, and augmented with extra columns, +# and their names will contain a hash of the table structure, +# which allows exporting tables from servers of different versions. + +# Pre-configured destination cluster, where to export the data +CLUSTER=${CLUSTER:=system_logs_export} + +EXTRA_COLUMNS=${EXTRA_COLUMNS:="pull_request_number UInt32, commit_sha String, check_start_time DateTime, check_name LowCardinality(String), instance_type LowCardinality(String), "} +EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:="0 AS pull_request_number, '' AS commit_sha, now() AS check_start_time, '' AS check_name, '' AS instance_type"} +EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:="check_name, "} + +CONNECTION_PARAMETERS=${CONNECTION_PARAMETERS:=""} + +# Create all configured system logs: +clickhouse-client --query "SYSTEM FLUSH LOGS" + +# It's doesn't make sense to try creating tables if SYNC fails +echo "SYSTEM SYNC DATABASE REPLICA default" | clickhouse-client --receive_timeout 180 $CONNECTION_PARAMETERS || exit 0 + +# For each system log table: +clickhouse-client --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table +do + # Calculate hash of its structure: + hash=$(clickhouse-client --query " + SELECT sipHash64(groupArray((name, type))) + FROM (SELECT name, type FROM system.columns + WHERE database = 'system' AND table = '$table' + ORDER BY position) + ") + + # Create the destination table with adapted name and structure: + statement=$(clickhouse-client --format TSVRaw --query "SHOW CREATE TABLE system.${table}" | sed -r -e ' + s/^\($/('"$EXTRA_COLUMNS"'/; + s/ORDER BY \(/ORDER BY ('"$EXTRA_ORDER_BY_COLUMNS"'/; + s/^CREATE TABLE system\.\w+_log$/CREATE TABLE IF NOT EXISTS '"$table"'_'"$hash"'/; + /^TTL /d + ') + + echo "Creating destination table ${table}_${hash}" >&2 + + echo "$statement" | clickhouse-client --distributed_ddl_task_timeout=10 $CONNECTION_PARAMETERS || continue + + echo "Creating table system.${table}_sender" >&2 + + # Create Distributed table and materialized view to watch on the original table: + clickhouse-client --query " + CREATE TABLE system.${table}_sender + ENGINE = Distributed(${CLUSTER}, default, ${table}_${hash}) + SETTINGS flush_on_detach=0 + EMPTY AS + SELECT ${EXTRA_COLUMNS_EXPRESSION}, * + FROM system.${table} + " + + echo "Creating materialized view system.${table}_watcher" >&2 + + clickhouse-client --query " + CREATE MATERIALIZED VIEW system.${table}_watcher TO system.${table}_sender AS + SELECT ${EXTRA_COLUMNS_EXPRESSION}, * + FROM system.${table} + " +done diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index da4baa8c687..ad24e662a6c 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -32,7 +32,7 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ && rm -rf /tmp/clickhouse-odbc-tmp -ENV TZ=Europe/Moscow +ENV TZ=Europe/Amsterdam RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV COMMIT_SHA='' diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 60e6199aaa4..477bbdb6919 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -148,6 +148,7 @@ function clone_submodules contrib/liburing contrib/libfiu contrib/incbin + contrib/yaml-cpp ) git submodule sync @@ -170,6 +171,7 @@ function run_cmake "-DENABLE_SIMDJSON=1" "-DENABLE_JEMALLOC=1" "-DENABLE_LIBURING=1" + "-DENABLE_YAML_CPP=1" ) export CCACHE_DIR="$FASTTEST_WORKSPACE/ccache" diff --git a/docker/test/fuzzer/Dockerfile b/docker/test/fuzzer/Dockerfile index aa71074c02a..0bc0fb06633 100644 --- a/docker/test/fuzzer/Dockerfile +++ b/docker/test/fuzzer/Dockerfile @@ -8,7 +8,7 @@ ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list ENV LANG=C.UTF-8 -ENV TZ=Europe/Moscow +ENV TZ=Europe/Amsterdam RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN apt-get update \ diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 5cda0831a84..05cc92ee040 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -122,6 +122,23 @@ EOL $PWD EOL + + # Setup a cluster for logs export to ClickHouse Cloud + # Note: these variables are provided to the Docker run command by the Python script in tests/ci + if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ] + then + echo " +remote_servers: + system_logs_export: + shard: + replica: + secure: 1 + user: ci + host: '${CLICKHOUSE_CI_LOGS_HOST}' + port: 9440 + password: '${CLICKHOUSE_CI_LOGS_PASSWORD}' +" > db/config.d/system_logs_export.yaml + fi } function filter_exists_and_template @@ -223,7 +240,22 @@ quit done clickhouse-client --query "select 1" # This checks that the server is responding kill -0 $server_pid # This checks that it is our server that is started and not some other one - echo Server started and responded + echo 'Server started and responded' + + # Initialize export of system logs to ClickHouse Cloud + if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ] + then + export EXTRA_COLUMNS_EXPRESSION="$PR_TO_TEST AS pull_request_number, '$SHA_TO_TEST' AS commit_sha, '$CHECK_START_TIME' AS check_start_time, '$CHECK_NAME' AS check_name, '$INSTANCE_TYPE' AS instance_type" + # TODO: Check if the password will appear in the logs. + export CONNECTION_PARAMETERS="--secure --user ci --host ${CLICKHOUSE_CI_LOGS_HOST} --password ${CLICKHOUSE_CI_LOGS_PASSWORD}" + + /setup_export_logs.sh + + # Unset variables after use + export CONNECTION_PARAMETERS='' + export CLICKHOUSE_CI_LOGS_HOST='' + export CLICKHOUSE_CI_LOGS_PASSWORD='' + fi # SC2012: Use find instead of ls to better handle non-alphanumeric filenames. They are all alphanumeric. # SC2046: Quote this to prevent word splitting. Actually I need word splitting. diff --git a/docker/test/install/deb/Dockerfile b/docker/test/install/deb/Dockerfile index 9614473c69b..e9c928b1fe7 100644 --- a/docker/test/install/deb/Dockerfile +++ b/docker/test/install/deb/Dockerfile @@ -12,6 +12,7 @@ ENV \ # install systemd packages RUN apt-get update && \ apt-get install -y --no-install-recommends \ + sudo \ systemd \ && \ apt-get clean && \ diff --git a/docker/test/integration/nginx_dav/Dockerfile b/docker/test/integration/nginx_dav/Dockerfile new file mode 100644 index 00000000000..42c1244f6dc --- /dev/null +++ b/docker/test/integration/nginx_dav/Dockerfile @@ -0,0 +1,6 @@ +FROM nginx:alpine-slim + +COPY default.conf /etc/nginx/conf.d/ + +RUN mkdir /usr/share/nginx/files/ \ + && chown nginx: /usr/share/nginx/files/ -R diff --git a/docker/test/integration/nginx_dav/default.conf b/docker/test/integration/nginx_dav/default.conf new file mode 100644 index 00000000000..466d0584a2d --- /dev/null +++ b/docker/test/integration/nginx_dav/default.conf @@ -0,0 +1,25 @@ +server { + listen 80; + + #root /usr/share/nginx/test.com; + index index.html index.htm; + + server_name test.com localhost; + + location / { + expires max; + root /usr/share/nginx/files; + client_max_body_size 20m; + client_body_temp_path /usr/share/nginx/tmp; + dav_methods PUT; # Allowed methods, only PUT is necessary + + create_full_put_path on; # nginx automatically creates nested directories + dav_access user:rw group:r all:r; # access permissions for files + + limit_except GET { + allow all; + } + } + + error_page 405 =200 $uri; +} diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 8e95d94b6dc..f67c45f7114 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -95,6 +95,7 @@ RUN python3 -m pip install --no-cache-dir \ pytest-timeout \ pytest-xdist \ pytz \ + pyyaml==5.3.1 \ redis \ requests-kerberos \ tzlocal==2.1 \ @@ -129,7 +130,7 @@ COPY misc/ /misc/ # Same options as in test/base/Dockerfile # (in case you need to override them in tests) -ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1' +ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1' ENV UBSAN_OPTIONS='print_stacktrace=1' ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' diff --git a/docker/test/integration/runner/compose/docker_compose_coredns.yml b/docker/test/integration/runner/compose/docker_compose_coredns.yml index b329d4e0a46..e4736e04846 100644 --- a/docker/test/integration/runner/compose/docker_compose_coredns.yml +++ b/docker/test/integration/runner/compose/docker_compose_coredns.yml @@ -2,7 +2,7 @@ version: "2.3" services: coredns: - image: coredns/coredns:latest + image: coredns/coredns:1.9.3 # :latest broke this test restart: always volumes: - ${COREDNS_CONFIG_DIR}/example.com:/example.com diff --git a/docker/test/integration/runner/compose/docker_compose_hdfs.yml b/docker/test/integration/runner/compose/docker_compose_hdfs.yml index f83eb93fea7..1cae54ad9e1 100644 --- a/docker/test/integration/runner/compose/docker_compose_hdfs.yml +++ b/docker/test/integration/runner/compose/docker_compose_hdfs.yml @@ -12,3 +12,5 @@ services: - type: ${HDFS_FS:-tmpfs} source: ${HDFS_LOGS:-} target: /usr/local/hadoop/logs + sysctls: + net.ipv4.ip_local_port_range: '55000 65535' diff --git a/docker/test/integration/runner/compose/docker_compose_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kafka.yml index 30d1b0bed3f..4ae3de3cbc7 100644 --- a/docker/test/integration/runner/compose/docker_compose_kafka.yml +++ b/docker/test/integration/runner/compose/docker_compose_kafka.yml @@ -31,6 +31,8 @@ services: - kafka_zookeeper security_opt: - label:disable + sysctls: + net.ipv4.ip_local_port_range: '55000 65535' schema-registry: image: confluentinc/cp-schema-registry:5.2.0 diff --git a/docker/test/integration/runner/compose/docker_compose_keeper.yml b/docker/test/integration/runner/compose/docker_compose_keeper.yml index 8524823ed87..91010c4aa83 100644 --- a/docker/test/integration/runner/compose/docker_compose_keeper.yml +++ b/docker/test/integration/runner/compose/docker_compose_keeper.yml @@ -20,6 +20,9 @@ services: - type: ${keeper_fs:-tmpfs} source: ${keeper_db_dir1:-} target: /var/lib/clickhouse-keeper + - type: ${keeper_fs:-tmpfs} + source: ${keeper_db_dir1:-} + target: /var/lib/clickhouse entrypoint: "${keeper_cmd_prefix:-clickhouse keeper} --config=/etc/clickhouse-keeper/keeper_config1.xml --log-file=/var/log/clickhouse-keeper/clickhouse-keeper.log --errorlog-file=/var/log/clickhouse-keeper/clickhouse-keeper.err.log" cap_add: - SYS_PTRACE @@ -53,6 +56,9 @@ services: - type: ${keeper_fs:-tmpfs} source: ${keeper_db_dir2:-} target: /var/lib/clickhouse-keeper + - type: ${keeper_fs:-tmpfs} + source: ${keeper_db_dir1:-} + target: /var/lib/clickhouse entrypoint: "${keeper_cmd_prefix:-clickhouse keeper} --config=/etc/clickhouse-keeper/keeper_config2.xml --log-file=/var/log/clickhouse-keeper/clickhouse-keeper.log --errorlog-file=/var/log/clickhouse-keeper/clickhouse-keeper.err.log" cap_add: - SYS_PTRACE @@ -86,6 +92,9 @@ services: - type: ${keeper_fs:-tmpfs} source: ${keeper_db_dir3:-} target: /var/lib/clickhouse-keeper + - type: ${keeper_fs:-tmpfs} + source: ${keeper_db_dir1:-} + target: /var/lib/clickhouse entrypoint: "${keeper_cmd_prefix:-clickhouse keeper} --config=/etc/clickhouse-keeper/keeper_config3.xml --log-file=/var/log/clickhouse-keeper/clickhouse-keeper.log --errorlog-file=/var/log/clickhouse-keeper/clickhouse-keeper.err.log" cap_add: - SYS_PTRACE diff --git a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml index 1160192696d..e955a14eb3d 100644 --- a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml +++ b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml @@ -20,6 +20,8 @@ services: depends_on: - hdfskerberos entrypoint: /etc/bootstrap.sh -d + sysctls: + net.ipv4.ip_local_port_range: '55000 65535' hdfskerberos: image: clickhouse/kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest} @@ -29,3 +31,5 @@ services: - ${KERBERIZED_HDFS_DIR}/../../kerberos_image_config.sh:/config.sh - /dev/urandom:/dev/random expose: [88, 749] + sysctls: + net.ipv4.ip_local_port_range: '55000 65535' diff --git a/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml index 86e920ff573..49d4c1db90f 100644 --- a/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml +++ b/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml @@ -48,6 +48,8 @@ services: - kafka_kerberos security_opt: - label:disable + sysctls: + net.ipv4.ip_local_port_range: '55000 65535' kafka_kerberos: image: clickhouse/kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest} diff --git a/docker/test/integration/runner/compose/docker_compose_meili.yml b/docker/test/integration/runner/compose/docker_compose_meili.yml index c734c43b4c6..c1fad4aca87 100644 --- a/docker/test/integration/runner/compose/docker_compose_meili.yml +++ b/docker/test/integration/runner/compose/docker_compose_meili.yml @@ -1,16 +1,15 @@ version: '2.3' services: meili1: - image: getmeili/meilisearch:v0.27.0 + image: getmeili/meilisearch:v0.27.0 restart: always ports: - ${MEILI_EXTERNAL_PORT:-7700}:${MEILI_INTERNAL_PORT:-7700} meili_secure: - image: getmeili/meilisearch:v0.27.0 + image: getmeili/meilisearch:v0.27.0 restart: always ports: - ${MEILI_SECURE_EXTERNAL_PORT:-7700}:${MEILI_SECURE_INTERNAL_PORT:-7700} environment: MEILI_MASTER_KEY: "password" - diff --git a/docker/test/integration/runner/compose/docker_compose_minio.yml b/docker/test/integration/runner/compose/docker_compose_minio.yml index 3eaf891ff8e..f2979566296 100644 --- a/docker/test/integration/runner/compose/docker_compose_minio.yml +++ b/docker/test/integration/runner/compose/docker_compose_minio.yml @@ -14,7 +14,7 @@ services: MINIO_ACCESS_KEY: minio MINIO_SECRET_KEY: minio123 MINIO_PROMETHEUS_AUTH_TYPE: public - command: server --address :9001 --certs-dir /certs /data1-1 + command: server --console-address 127.0.0.1:19001 --address :9001 --certs-dir /certs /data1-1 depends_on: - proxy1 - proxy2 diff --git a/docker/test/integration/runner/compose/docker_compose_mysql.yml b/docker/test/integration/runner/compose/docker_compose_mysql.yml index 6b98a372bd0..103fe2769e9 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql.yml @@ -9,10 +9,10 @@ services: DATADIR: /mysql/ expose: - ${MYSQL_PORT:-3306} - command: --server_id=100 - --log-bin='mysql-bin-1.log' - --default-time-zone='+3:00' - --gtid-mode="ON" + command: --server_id=100 + --log-bin='mysql-bin-1.log' + --default-time-zone='+3:00' + --gtid-mode="ON" --enforce-gtid-consistency --log-error-verbosity=3 --log-error=/mysql/error.log @@ -21,4 +21,4 @@ services: volumes: - type: ${MYSQL_LOGS_FS:-tmpfs} source: ${MYSQL_LOGS:-} - target: /mysql/ \ No newline at end of file + target: /mysql/ diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_8_0.yml b/docker/test/integration/runner/compose/docker_compose_mysql_8_0.yml index d5fb5a53aaf..9c9c7430cec 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql_8_0.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql_8_0.yml @@ -9,9 +9,9 @@ services: DATADIR: /mysql/ expose: - ${MYSQL8_PORT:-3306} - command: --server_id=100 --log-bin='mysql-bin-1.log' - --default_authentication_plugin='mysql_native_password' - --default-time-zone='+3:00' --gtid-mode="ON" + command: --server_id=100 --log-bin='mysql-bin-1.log' + --default_authentication_plugin='mysql_native_password' + --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency --log-error-verbosity=3 --log-error=/mysql/error.log @@ -20,4 +20,4 @@ services: volumes: - type: ${MYSQL8_LOGS_FS:-tmpfs} source: ${MYSQL8_LOGS:-} - target: /mysql/ \ No newline at end of file + target: /mysql/ diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml b/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml index 8e145a3b408..73f9e39f0d6 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml @@ -9,10 +9,10 @@ services: DATADIR: /mysql/ expose: - ${MYSQL_CLUSTER_PORT:-3306} - command: --server_id=100 - --log-bin='mysql-bin-2.log' - --default-time-zone='+3:00' - --gtid-mode="ON" + command: --server_id=100 + --log-bin='mysql-bin-2.log' + --default-time-zone='+3:00' + --gtid-mode="ON" --enforce-gtid-consistency --log-error-verbosity=3 --log-error=/mysql/2_error.log @@ -31,10 +31,10 @@ services: DATADIR: /mysql/ expose: - ${MYSQL_CLUSTER_PORT:-3306} - command: --server_id=100 - --log-bin='mysql-bin-3.log' - --default-time-zone='+3:00' - --gtid-mode="ON" + command: --server_id=100 + --log-bin='mysql-bin-3.log' + --default-time-zone='+3:00' + --gtid-mode="ON" --enforce-gtid-consistency --log-error-verbosity=3 --log-error=/mysql/3_error.log @@ -53,10 +53,10 @@ services: DATADIR: /mysql/ expose: - ${MYSQL_CLUSTER_PORT:-3306} - command: --server_id=100 - --log-bin='mysql-bin-4.log' - --default-time-zone='+3:00' - --gtid-mode="ON" + command: --server_id=100 + --log-bin='mysql-bin-4.log' + --default-time-zone='+3:00' + --gtid-mode="ON" --enforce-gtid-consistency --log-error-verbosity=3 --log-error=/mysql/4_error.log @@ -65,4 +65,4 @@ services: volumes: - type: ${MYSQL_CLUSTER_LOGS_FS:-tmpfs} source: ${MYSQL_CLUSTER_LOGS:-} - target: /mysql/ \ No newline at end of file + target: /mysql/ diff --git a/docker/test/integration/runner/compose/docker_compose_nginx.yml b/docker/test/integration/runner/compose/docker_compose_nginx.yml index d0fb9fc1ff4..38d2a6d84c8 100644 --- a/docker/test/integration/runner/compose/docker_compose_nginx.yml +++ b/docker/test/integration/runner/compose/docker_compose_nginx.yml @@ -5,7 +5,7 @@ services: # Files will be put into /usr/share/nginx/files. nginx: - image: kssenii/nginx-test:1.1 + image: clickhouse/nginx-dav:${DOCKER_NGINX_DAV_TAG:-latest} restart: always ports: - 80:80 diff --git a/docker/test/integration/runner/compose/docker_compose_postgres.yml b/docker/test/integration/runner/compose/docker_compose_postgres.yml index 1fb6b7a1410..2ef7eb17395 100644 --- a/docker/test/integration/runner/compose/docker_compose_postgres.yml +++ b/docker/test/integration/runner/compose/docker_compose_postgres.yml @@ -12,9 +12,9 @@ services: timeout: 5s retries: 5 networks: - default: - aliases: - - postgre-sql.local + default: + aliases: + - postgre-sql.local environment: POSTGRES_HOST_AUTH_METHOD: "trust" POSTGRES_PASSWORD: mysecretpassword diff --git a/docker/test/integration/runner/compose/docker_compose_zookeeper_secure.yml b/docker/test/integration/runner/compose/docker_compose_zookeeper_secure.yml index 7a1c32e0023..b5dbae423b2 100644 --- a/docker/test/integration/runner/compose/docker_compose_zookeeper_secure.yml +++ b/docker/test/integration/runner/compose/docker_compose_zookeeper_secure.yml @@ -12,7 +12,7 @@ services: command: ["zkServer.sh", "start-foreground"] entrypoint: /zookeeper-ssl-entrypoint.sh volumes: - - type: bind + - type: bind source: /misc/zookeeper-ssl-entrypoint.sh target: /zookeeper-ssl-entrypoint.sh - type: bind @@ -37,7 +37,7 @@ services: command: ["zkServer.sh", "start-foreground"] entrypoint: /zookeeper-ssl-entrypoint.sh volumes: - - type: bind + - type: bind source: /misc/zookeeper-ssl-entrypoint.sh target: /zookeeper-ssl-entrypoint.sh - type: bind @@ -61,7 +61,7 @@ services: command: ["zkServer.sh", "start-foreground"] entrypoint: /zookeeper-ssl-entrypoint.sh volumes: - - type: bind + - type: bind source: /misc/zookeeper-ssl-entrypoint.sh target: /zookeeper-ssl-entrypoint.sh - type: bind diff --git a/docker/test/integration/runner/dockerd-entrypoint.sh b/docker/test/integration/runner/dockerd-entrypoint.sh index 3c4ff522b36..b05aef76faf 100755 --- a/docker/test/integration/runner/dockerd-entrypoint.sh +++ b/docker/test/integration/runner/dockerd-entrypoint.sh @@ -64,15 +64,16 @@ export CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH=/clickhouse-odbc-bridge export CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH=/clickhouse-library-bridge export DOCKER_BASE_TAG=${DOCKER_BASE_TAG:=latest} -export DOCKER_HELPER_TAG=${DOCKER_HELPER_TAG:=latest} -export DOCKER_MYSQL_GOLANG_CLIENT_TAG=${DOCKER_MYSQL_GOLANG_CLIENT_TAG:=latest} export DOCKER_DOTNET_CLIENT_TAG=${DOCKER_DOTNET_CLIENT_TAG:=latest} +export DOCKER_HELPER_TAG=${DOCKER_HELPER_TAG:=latest} +export DOCKER_KERBERIZED_HADOOP_TAG=${DOCKER_KERBERIZED_HADOOP_TAG:=latest} +export DOCKER_KERBEROS_KDC_TAG=${DOCKER_KERBEROS_KDC_TAG:=latest} +export DOCKER_MYSQL_GOLANG_CLIENT_TAG=${DOCKER_MYSQL_GOLANG_CLIENT_TAG:=latest} export DOCKER_MYSQL_JAVA_CLIENT_TAG=${DOCKER_MYSQL_JAVA_CLIENT_TAG:=latest} export DOCKER_MYSQL_JS_CLIENT_TAG=${DOCKER_MYSQL_JS_CLIENT_TAG:=latest} export DOCKER_MYSQL_PHP_CLIENT_TAG=${DOCKER_MYSQL_PHP_CLIENT_TAG:=latest} +export DOCKER_NGINX_DAV_TAG=${DOCKER_NGINX_DAV_TAG:=latest} export DOCKER_POSTGRESQL_JAVA_CLIENT_TAG=${DOCKER_POSTGRESQL_JAVA_CLIENT_TAG:=latest} -export DOCKER_KERBEROS_KDC_TAG=${DOCKER_KERBEROS_KDC_TAG:=latest} -export DOCKER_KERBERIZED_HADOOP_TAG=${DOCKER_KERBERIZED_HADOOP_TAG:=latest} cd /ClickHouse/tests/integration exec "$@" diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index ab9f1f8a2e3..d31663f9071 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -1,18 +1,7 @@ # docker build -t clickhouse/performance-comparison . -# Using ubuntu:22.04 over 20.04 as all other images, since: -# a) ubuntu 20.04 has too old parallel, and does not support --memsuspend -# b) anyway for perf tests it should not be important (backward compatiblity -# with older ubuntu had been checked lots of times in various tests) -FROM ubuntu:22.04 - -# ARG for quick switch to a given ubuntu mirror -ARG apt_archive="http://archive.ubuntu.com" -RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list - -ENV LANG=C.UTF-8 -ENV TZ=Europe/Moscow -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone +ARG FROM_TAG=latest +FROM clickhouse/test-base:$FROM_TAG RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \ @@ -56,10 +45,9 @@ COPY * / # node #0 should be less stable because of system interruptions. We bind # randomly to node 1 or 0 to gather some statistics on that. We have to bind # both servers and the tmpfs on which the database is stored. How to do it -# through Yandex Sandbox API is unclear, but by default tmpfs uses +# is unclear, but by default tmpfs uses # 'process allocation policy', not sure which process but hopefully the one that -# writes to it, so just bind the downloader script as well. We could also try to -# remount it with proper options in Sandbox task. +# writes to it, so just bind the downloader script as well. # https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt # Double-escaped backslashes are a tribute to the engineering wonder of docker -- # it gives '/bin/sh: 1: [bash,: not found' otherwise. diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 798d2a40b12..4b1b5c13b9b 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -90,7 +90,7 @@ function configure set +m wait_for_server $LEFT_SERVER_PORT $left_pid - echo Server for setup started + echo "Server for setup started" clickhouse-client --port $LEFT_SERVER_PORT --query "create database test" ||: clickhouse-client --port $LEFT_SERVER_PORT --query "rename table datasets.hits_v1 to test.hits" ||: @@ -156,9 +156,9 @@ function restart wait_for_server $RIGHT_SERVER_PORT $right_pid echo right ok - clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.tables where database != 'system'" + clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.tables where database NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema')" clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.build_options" - clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.tables where database != 'system'" + clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.tables where database NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema')" clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.build_options" # Check again that both servers we started are running -- this is important @@ -352,14 +352,12 @@ function get_profiles wait clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_log where type in ('QueryFinish', 'ExceptionWhileProcessing') format TSVWithNamesAndTypes" > left-query-log.tsv ||: & - clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > left-query-thread-log.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > left-trace-log.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > left-addresses.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.metric_log format TSVWithNamesAndTypes" > left-metric-log.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > left-async-metric-log.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_log where type in ('QueryFinish', 'ExceptionWhileProcessing') format TSVWithNamesAndTypes" > right-query-log.tsv ||: & - clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > right-query-thread-log.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > right-trace-log.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > right-addresses.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.metric_log format TSVWithNamesAndTypes" > right-metric-log.tsv ||: & @@ -665,9 +663,8 @@ create view partial_query_times as select * from -- Report for backward-incompatible ('partial') queries that we could only run on the new server (e.g. -- queries with new functions added in the tested PR). create table partial_queries_report engine File(TSV, 'report/partial-queries-report.tsv') - settings output_format_decimal_trailing_zeros = 1 - as select toDecimal64(time_median, 3) time, - toDecimal64(time_stddev / time_median, 3) relative_time_stddev, + as select round(time_median, 3) time, + round(time_stddev / time_median, 3) relative_time_stddev, test, query_index, query_display_name from partial_query_times join query_display_names using (test, query_index) @@ -739,28 +736,26 @@ create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv') ; create table changed_perf_report engine File(TSV, 'report/changed-perf.tsv') - settings output_format_decimal_trailing_zeros = 1 as with -- server_time is sometimes reported as zero (if it's less than 1 ms), -- so we have to work around this to not get an error about conversion -- of NaN to decimal. (left > right ? left / right : right / left) as times_change_float, isFinite(times_change_float) as times_change_finite, - toDecimal64(times_change_finite ? times_change_float : 1., 3) as times_change_decimal, + round(times_change_finite ? times_change_float : 1., 3) as times_change_decimal, times_change_finite ? (left > right ? '-' : '+') || toString(times_change_decimal) || 'x' : '--' as times_change_str select - toDecimal64(left, 3), toDecimal64(right, 3), times_change_str, - toDecimal64(diff, 3), toDecimal64(stat_threshold, 3), + round(left, 3), round(right, 3), times_change_str, + round(diff, 3), round(stat_threshold, 3), changed_fail, test, query_index, query_display_name from queries where changed_show order by abs(diff) desc; create table unstable_queries_report engine File(TSV, 'report/unstable-queries.tsv') - settings output_format_decimal_trailing_zeros = 1 as select - toDecimal64(left, 3), toDecimal64(right, 3), toDecimal64(diff, 3), - toDecimal64(stat_threshold, 3), unstable_fail, test, query_index, query_display_name + round(left, 3), round(right, 3), round(diff, 3), + round(stat_threshold, 3), unstable_fail, test, query_index, query_display_name from queries where unstable_show order by stat_threshold desc; @@ -789,11 +784,10 @@ create view total_speedup as ; create table test_perf_changes_report engine File(TSV, 'report/test-perf-changes.tsv') - settings output_format_decimal_trailing_zeros = 1 as with (times_speedup >= 1 - ? '-' || toString(toDecimal64(times_speedup, 3)) || 'x' - : '+' || toString(toDecimal64(1 / times_speedup, 3)) || 'x') + ? '-' || toString(round(times_speedup, 3)) || 'x' + : '+' || toString(round(1 / times_speedup, 3)) || 'x') as times_speedup_str select test, times_speedup_str, queries, bad, changed, unstable -- Not sure what's the precedence of UNION ALL vs WHERE & ORDER BY, hence all @@ -817,11 +811,10 @@ create view total_client_time_per_query as select * 'test text, query_index int, client float, server float'); create table slow_on_client_report engine File(TSV, 'report/slow-on-client.tsv') - settings output_format_decimal_trailing_zeros = 1 - as select client, server, toDecimal64(client/server, 3) p, + as select client, server, round(client/server, 3) p, test, query_display_name from total_client_time_per_query left join query_display_names using (test, query_index) - where p > toDecimal64(1.02, 3) order by p desc; + where p > round(1.02, 3) order by p desc; create table wall_clock_time_per_test engine Memory as select * from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float'); @@ -899,15 +892,14 @@ create view test_times_view_total as ; create table test_times_report engine File(TSV, 'report/test-times.tsv') - settings output_format_decimal_trailing_zeros = 1 as select test, - toDecimal64(real, 3), - toDecimal64(total_client_time, 3), + round(real, 3), + round(total_client_time, 3), queries, - toDecimal64(query_max, 3), - toDecimal64(avg_real_per_query, 3), - toDecimal64(query_min, 3), + round(query_max, 3), + round(avg_real_per_query, 3), + round(query_min, 3), runs from ( select * from test_times_view @@ -919,21 +911,20 @@ create table test_times_report engine File(TSV, 'report/test-times.tsv') -- report for all queries page, only main metric create table all_tests_report engine File(TSV, 'report/all-queries.tsv') - settings output_format_decimal_trailing_zeros = 1 as with -- server_time is sometimes reported as zero (if it's less than 1 ms), -- so we have to work around this to not get an error about conversion -- of NaN to decimal. (left > right ? left / right : right / left) as times_change_float, isFinite(times_change_float) as times_change_finite, - toDecimal64(times_change_finite ? times_change_float : 1., 3) as times_change_decimal, + round(times_change_finite ? times_change_float : 1., 3) as times_change_decimal, times_change_finite ? (left > right ? '-' : '+') || toString(times_change_decimal) || 'x' : '--' as times_change_str select changed_fail, unstable_fail, - toDecimal64(left, 3), toDecimal64(right, 3), times_change_str, - toDecimal64(isFinite(diff) ? diff : 0, 3), - toDecimal64(isFinite(stat_threshold) ? stat_threshold : 0, 3), + round(left, 3), round(right, 3), times_change_str, + round(isFinite(diff) ? diff : 0, 3), + round(isFinite(stat_threshold) ? stat_threshold : 0, 3), test, query_index, query_display_name from queries order by test, query_index; @@ -1044,27 +1035,6 @@ create table unstable_run_traces engine File(TSVWithNamesAndTypes, order by count() desc ; -create table metric_devation engine File(TSVWithNamesAndTypes, - 'report/metric-deviation.$version.tsv') - settings output_format_decimal_trailing_zeros = 1 - -- first goes the key used to split the file with grep - as select test, query_index, query_display_name, - toDecimal64(d, 3) d, q, metric - from ( - select - test, query_index, - (q[3] - q[1])/q[2] d, - quantilesExact(0, 0.5, 1)(value) q, metric - from (select * from unstable_run_metrics - union all select * from unstable_run_traces - union all select * from unstable_run_metrics_2) mm - group by test, query_index, metric - having isFinite(d) and d > 0.5 and q[3] > 5 - ) metrics - left join query_display_names using (test, query_index) - order by test, query_index, d desc - ; - create table stacks engine File(TSV, 'report/stacks.$version.tsv') as select -- first goes the key used to split the file with grep @@ -1173,9 +1143,8 @@ create table metrics engine File(TSV, 'metrics/metrics.tsv') as -- Show metrics that have changed create table changes engine File(TSV, 'metrics/changes.tsv') - settings output_format_decimal_trailing_zeros = 1 as select metric, left, right, - toDecimal64(diff, 3), toDecimal64(times_diff, 3) + round(diff, 3), round(times_diff, 3) from ( select metric, median(left) as left, median(right) as right, (right - left) / left diff, @@ -1226,7 +1195,6 @@ create table ci_checks engine File(TSVWithNamesAndTypes, 'ci-checks.tsv') '$SHA_TO_TEST' :: LowCardinality(String) AS commit_sha, '${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME:-Performance}' :: LowCardinality(String) AS check_name, '$(sed -n 's/.*/\1/p' report.html)' :: LowCardinality(String) AS check_status, - -- TODO toDateTime() can't parse output of 'date', so no time for now. (($(date +%s) - $CHPC_CHECK_START_TIMESTAMP) * 1000) :: UInt64 AS check_duration_ms, fromUnixTimestamp($CHPC_CHECK_START_TIMESTAMP) check_start_time, test_name :: LowCardinality(String) AS test_name , diff --git a/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml b/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml index 39c29bb61ca..292665c4f68 100644 --- a/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml +++ b/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml @@ -19,31 +19,6 @@ - - - ENGINE = Memory - - - - ENGINE = Memory - - - - ENGINE = Memory - - - - ENGINE = Memory - - - - ENGINE = Memory - - - 1000000000 10 diff --git a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml index 093834943a3..cb591f1a184 100644 --- a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml +++ b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml @@ -3,7 +3,7 @@ 1 1 - 1 + 0 0 0 + 0 60 diff --git a/docker/test/performance-comparison/download.sh b/docker/test/performance-comparison/download.sh index aee11030068..cb243b655c6 100755 --- a/docker/test/performance-comparison/download.sh +++ b/docker/test/performance-comparison/download.sh @@ -31,8 +31,6 @@ function download # Test all of them. declare -a urls_to_try=( "$S3_URL/PRs/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst" - "$S3_URL/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst" - "$S3_URL/$left_pr/$left_sha/$BUILD_NAME/performance.tgz" ) for path in "${urls_to_try[@]}" diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh index 74571777be0..fb5e6bd2a7a 100755 --- a/docker/test/performance-comparison/entrypoint.sh +++ b/docker/test/performance-comparison/entrypoint.sh @@ -130,7 +130,7 @@ then git -C right/ch diff --name-only "$base" pr -- :!tests/performance :!docker/test/performance-comparison | tee other-changed-files.txt fi -# Set python output encoding so that we can print queries with Russian letters. +# Set python output encoding so that we can print queries with non-ASCII letters. export PYTHONIOENCODING=utf-8 # By default, use the main comparison script from the tested package, so that we @@ -151,11 +151,7 @@ export PATH export REF_PR export REF_SHA -# Try to collect some core dumps. I've seen two patterns in Sandbox: -# 1) |/home/zomb-sandbox/venv/bin/python /home/zomb-sandbox/client/sandbox/bin/coredumper.py %e %p %g %u %s %P %c -# Not sure what this script does (puts them to sandbox resources, logs some messages?), -# and it's not accessible from inside docker anyway. -# 2) something like %e.%p.core.dmp. The dump should end up in the workspace directory. +# Try to collect some core dumps. # At least we remove the ulimit and then try to pack some common file names into output. ulimit -c unlimited cat /proc/sys/kernel/core_pattern diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index 7a4e6386d0d..d23a9ac61c1 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -369,6 +369,7 @@ for query_index in queries_to_run: "max_execution_time": args.prewarm_max_query_seconds, "query_profiler_real_time_period_ns": 10000000, "query_profiler_cpu_time_period_ns": 10000000, + "metrics_perf_events_enabled": 1, "memory_profiler_step": "4Mi", }, ) @@ -503,6 +504,7 @@ for query_index in queries_to_run: settings={ "query_profiler_real_time_period_ns": 10000000, "query_profiler_cpu_time_period_ns": 10000000, + "metrics_perf_events_enabled": 1, }, ) print( diff --git a/docker/test/sqllogic/run.sh b/docker/test/sqllogic/run.sh index 444252837a3..db828741b0d 100755 --- a/docker/test/sqllogic/run.sh +++ b/docker/test/sqllogic/run.sh @@ -1,4 +1,5 @@ #!/bin/bash + set -exu trap "exit" INT TERM @@ -96,5 +97,4 @@ rg -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||: zstd < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst & # Compressed (FIXME: remove once only github actions will be left) -rm /var/log/clickhouse-server/clickhouse-server.log mv /var/log/clickhouse-server/stderr.log /test_output/ ||: diff --git a/docker/test/sqltest/Dockerfile b/docker/test/sqltest/Dockerfile new file mode 100644 index 00000000000..437677f4fd1 --- /dev/null +++ b/docker/test/sqltest/Dockerfile @@ -0,0 +1,30 @@ +# docker build -t clickhouse/sqltest . +ARG FROM_TAG=latest +FROM clickhouse/test-base:$FROM_TAG + +RUN apt-get update --yes \ + && env DEBIAN_FRONTEND=noninteractive \ + apt-get install --yes --no-install-recommends \ + wget \ + git \ + python3 \ + python3-dev \ + python3-pip \ + sudo \ + && apt-get clean + +RUN pip3 install \ + pyyaml \ + clickhouse-driver + +ARG sqltest_repo="https://github.com/elliotchance/sqltest/" + +RUN git clone ${sqltest_repo} + +ENV TZ=UTC +ENV MAX_RUN_TIME=900 +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +COPY run.sh / +COPY test.py / +CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/sqltest/run.sh b/docker/test/sqltest/run.sh new file mode 100755 index 00000000000..cba1c1dab1f --- /dev/null +++ b/docker/test/sqltest/run.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# shellcheck disable=SC2015 + +set -x +set -e +set -u +set -o pipefail + +BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-16_debug_none_unsplitted_disable_False_binary"} +BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"} + +function wget_with_retry +{ + for _ in 1 2 3 4; do + if wget -nv -nd -c "$1";then + return 0 + else + sleep 0.5 + fi + done + return 1 +} + +wget_with_retry "$BINARY_URL_TO_DOWNLOAD" +chmod +x clickhouse +./clickhouse install --noninteractive + +echo " +users: + default: + access_management: 1" > /etc/clickhouse-server/users.d/access_management.yaml + +clickhouse start + +# Wait for start +for _ in {1..100} +do + clickhouse-client --query "SELECT 1" && break ||: + sleep 1 +done + +# Run the test +pushd sqltest/standards/2016/ +/test.py +mv report.html test.log /workspace +popd + +zstd --threads=0 /var/log/clickhouse-server/clickhouse-server.log +zstd --threads=0 /var/log/clickhouse-server/clickhouse-server.err.log + +mv /var/log/clickhouse-server/clickhouse-server.log.zst /var/log/clickhouse-server/clickhouse-server.err.log.zst /workspace diff --git a/docker/test/sqltest/test.py b/docker/test/sqltest/test.py new file mode 100755 index 00000000000..5807ca79b02 --- /dev/null +++ b/docker/test/sqltest/test.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 + +import os +import yaml +import html +import random +import string +from clickhouse_driver import Client + + +client = Client(host="localhost", port=9000) +settings = { + "default_table_engine": "Memory", + "union_default_mode": "DISTINCT", + "calculate_text_stack_trace": 0, +} + +database_name = "sqltest_" + "".join( + random.choice(string.ascii_lowercase) for _ in range(10) +) + +client.execute(f"DROP DATABASE IF EXISTS {database_name}", settings=settings) +client.execute(f"CREATE DATABASE {database_name}", settings=settings) + +client = Client(host="localhost", port=9000, database=database_name) + +summary = {"success": 0, "total": 0, "results": {}} + +log_file = open("test.log", "w") +report_html_file = open("report.html", "w") + +with open("features.yml", "r") as file: + yaml_content = yaml.safe_load(file) + + for category in yaml_content: + log_file.write(category.capitalize() + " features:\n") + summary["results"][category] = {"success": 0, "total": 0, "results": {}} + + for test in yaml_content[category]: + log_file.write(test + ": " + yaml_content[category][test] + "\n") + summary["results"][category]["results"][test] = { + "success": 0, + "total": 0, + "description": yaml_content[category][test], + } + + test_path = test[0] + "/" + test + ".tests.yml" + if os.path.exists(test_path): + with open(test_path, "r") as test_file: + test_yaml_content = yaml.load_all(test_file, Loader=yaml.FullLoader) + + for test_case in test_yaml_content: + queries = test_case["sql"] + if not isinstance(queries, list): + queries = [queries] + + for query in queries: + # Example: E011-01 + test_group = "" + if "-" in test: + test_group = test.split("-", 1)[0] + summary["results"][category]["results"][test_group][ + "total" + ] += 1 + summary["results"][category]["results"][test]["total"] += 1 + summary["results"][category]["total"] += 1 + summary["total"] += 1 + + log_file.write(query + "\n") + + try: + result = client.execute(query, settings=settings) + log_file.write(str(result) + "\n") + + if test_group: + summary["results"][category]["results"][test_group][ + "success" + ] += 1 + summary["results"][category]["results"][test][ + "success" + ] += 1 + summary["results"][category]["success"] += 1 + summary["success"] += 1 + + except Exception as e: + log_file.write(f"Error occurred: {str(e)}\n") + +client.execute(f"DROP DATABASE {database_name}", settings=settings) + + +def enable_color(ratio): + if ratio == 0: + return "" + elif ratio < 0.5: + return "" + elif ratio < 1: + return "" + else: + return "" + + +reset_color = "" + + +def print_ratio(indent, name, success, total, description): + report_html_file.write( + "{}{}: {}{} / {} ({:.1%}){}{}\n".format( + " " * indent, + name.capitalize(), + enable_color(success / total), + success, + total, + success / total, + reset_color, + f" - " + html.escape(description) if description else "", + ) + ) + + +report_html_file.write( + "
\n"
+)
+
+print_ratio(0, "Total", summary["success"], summary["total"], "")
+
+for category in summary["results"]:
+    cat_summary = summary["results"][category]
+
+    if cat_summary["total"] == 0:
+        continue
+
+    print_ratio(2, category, cat_summary["success"], cat_summary["total"], "")
+
+    for test in summary["results"][category]["results"]:
+        test_summary = summary["results"][category]["results"][test]
+
+        if test_summary["total"] == 0:
+            continue
+
+        print_ratio(
+            6 if "-" in test else 4,
+            test,
+            test_summary["success"],
+            test_summary["total"],
+            test_summary["description"],
+        )
+
+report_html_file.write("
\n") diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index c973b6c6ec6..300d613e3b0 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -20,6 +20,22 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & ./setup_minio.sh stateful +# Setup a cluster for logs export to ClickHouse Cloud +# Note: these variables are provided to the Docker run command by the Python script in tests/ci +if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ] +then + echo " + remote_servers: + system_logs_export: + shard: + replica: + secure: 1 + user: ci + host: '${CLICKHOUSE_CI_LOGS_HOST}' + password: '${CLICKHOUSE_CI_LOGS_PASSWORD}' + " > /etc/clickhouse-server/config.d/system_logs_export.yaml +fi + function start() { if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then @@ -65,6 +81,22 @@ function start() } start + +# Initialize export of system logs to ClickHouse Cloud +if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ] +then + export EXTRA_COLUMNS_EXPRESSION="$PULL_REQUEST_NUMBER AS pull_request_number, '$COMMIT_SHA' AS commit_sha, '$CHECK_START_TIME' AS check_start_time, '$CHECK_NAME' AS check_name, '$INSTANCE_TYPE' AS instance_type" + # TODO: Check if the password will appear in the logs. + export CONNECTION_PARAMETERS="--secure --user ci --host ${CLICKHOUSE_CI_LOGS_HOST} --password ${CLICKHOUSE_CI_LOGS_PASSWORD}" + + ./setup_export_logs.sh + + # Unset variables after use + export CONNECTION_PARAMETERS='' + export CLICKHOUSE_CI_LOGS_HOST='' + export CLICKHOUSE_CI_LOGS_PASSWORD='' +fi + # shellcheck disable=SC2086 # No quotes because I want to split it into words. /s3downloader --url-prefix "$S3_URL" --dataset-names $DATASETS chmod 777 -R /var/lib/clickhouse diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index e1e84c427ba..ab670209a69 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -41,6 +41,8 @@ RUN apt-get update -y \ zstd \ file \ pv \ + zip \ + p7zip-full \ && apt-get clean RUN pip3 install numpy scipy pandas Jinja2 @@ -52,7 +54,7 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ && rm -rf /tmp/clickhouse-odbc-tmp -ENV TZ=Europe/Moscow +ENV TZ=Europe/Amsterdam RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV NUM_TRIES=1 @@ -85,4 +87,5 @@ RUN npm install -g azurite \ COPY run.sh / COPY setup_minio.sh / COPY setup_hdfs_minicluster.sh / + CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 3694fb7c2f6..015ba7d7423 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -36,6 +36,22 @@ fi ./setup_minio.sh stateless ./setup_hdfs_minicluster.sh +# Setup a cluster for logs export to ClickHouse Cloud +# Note: these variables are provided to the Docker run command by the Python script in tests/ci +if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ] +then + echo " + remote_servers: + system_logs_export: + shard: + replica: + secure: 1 + user: ci + host: '${CLICKHOUSE_CI_LOGS_HOST}' + password: '${CLICKHOUSE_CI_LOGS_PASSWORD}' + " > /etc/clickhouse-server/config.d/system_logs_export.yaml +fi + # For flaky check we also enable thread fuzzer if [ "$NUM_TRIES" -gt "1" ]; then export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000 @@ -92,7 +108,28 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited) fi -sleep 5 + +# Wait for the server to start, but not for too long. +for _ in {1..100} +do + clickhouse-client --query "SELECT 1" && break + sleep 1 +done + +# Initialize export of system logs to ClickHouse Cloud +if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ] +then + export EXTRA_COLUMNS_EXPRESSION="$PULL_REQUEST_NUMBER AS pull_request_number, '$COMMIT_SHA' AS commit_sha, '$CHECK_START_TIME' AS check_start_time, '$CHECK_NAME' AS check_name, '$INSTANCE_TYPE' AS instance_type" + # TODO: Check if the password will appear in the logs. + export CONNECTION_PARAMETERS="--secure --user ci --host ${CLICKHOUSE_CI_LOGS_HOST} --password ${CLICKHOUSE_CI_LOGS_PASSWORD}" + + ./setup_export_logs.sh + + # Unset variables after use + export CONNECTION_PARAMETERS='' + export CLICKHOUSE_CI_LOGS_HOST='' + export CLICKHOUSE_CI_LOGS_PASSWORD='' +fi attach_gdb_to_clickhouse || true # FIXME: to not break old builds, clean on 2023-09-01 diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 9217fcfddd9..72d124d63a6 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -51,8 +51,39 @@ configure azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & ./setup_minio.sh stateless # to have a proper environment +# Setup a cluster for logs export to ClickHouse Cloud +# Note: these variables are provided to the Docker run command by the Python script in tests/ci +if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ] +then + echo " +remote_servers: + system_logs_export: + shard: + replica: + secure: 1 + user: ci + host: '${CLICKHOUSE_CI_LOGS_HOST}' + password: '${CLICKHOUSE_CI_LOGS_PASSWORD}' +" > /etc/clickhouse-server/config.d/system_logs_export.yaml +fi + start +# Initialize export of system logs to ClickHouse Cloud +if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ] +then + export EXTRA_COLUMNS_EXPRESSION="$PULL_REQUEST_NUMBER AS pull_request_number, '$COMMIT_SHA' AS commit_sha, '$CHECK_START_TIME' AS check_start_time, '$CHECK_NAME' AS check_name, '$INSTANCE_TYPE' AS instance_type" + # TODO: Check if the password will appear in the logs. + export CONNECTION_PARAMETERS="--secure --user ci --host ${CLICKHOUSE_CI_LOGS_HOST} --password ${CLICKHOUSE_CI_LOGS_PASSWORD}" + + ./setup_export_logs.sh + + # Unset variables after use + export CONNECTION_PARAMETERS='' + export CLICKHOUSE_CI_LOGS_HOST='' + export CLICKHOUSE_CI_LOGS_PASSWORD='' +fi + # shellcheck disable=SC2086 # No quotes because I want to split it into words. /s3downloader --url-prefix "$S3_URL" --dataset-names $DATASETS chmod 777 -R /var/lib/clickhouse @@ -180,6 +211,11 @@ mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/cli sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +sudo cat /etc/clickhouse-server/config.d/logger_trace.xml \ + | sed "s|trace|test|" \ + > /etc/clickhouse-server/config.d/logger_trace.xml.tmp +mv /etc/clickhouse-server/config.d/logger_trace.xml.tmp /etc/clickhouse-server/config.d/logger_trace.xml + start stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \ @@ -233,4 +269,10 @@ rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv +# But OOMs in stress test are allowed +if rg 'OOM in dmesg|Signal 9' /test_output/check_status.tsv +then + sed -i 's/failure/success/' /test_output/check_status.tsv +fi + collect_core_dumps diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index bd1c26855b7..a4feae27c67 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -1,5 +1,5 @@ # docker build -t clickhouse/style-test . -FROM ubuntu:20.04 +FROM ubuntu:22.04 ARG ACT_VERSION=0.2.33 ARG ACTIONLINT_VERSION=1.6.22 diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 13c352d5d41..5ae066cd9e2 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -63,6 +63,7 @@ configure # it contains some new settings, but we can safely remove it rm /etc/clickhouse-server/config.d/merge_tree.xml rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml +rm /etc/clickhouse-server/config.d/filesystem_caches_path.xml rm /etc/clickhouse-server/users.d/nonconst_timezone.xml start @@ -93,6 +94,7 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau # it contains some new settings, but we can safely remove it rm /etc/clickhouse-server/config.d/merge_tree.xml rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml +rm /etc/clickhouse-server/config.d/filesystem_caches_path.xml rm /etc/clickhouse-server/users.d/nonconst_timezone.xml start @@ -231,4 +233,10 @@ rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv +# But OOMs in stress test are allowed +if rg 'OOM in dmesg|Signal 9' /test_output/check_status.tsv +then + sed -i 's/failure/success/' /test_output/check_status.tsv +fi + collect_core_dumps diff --git a/docs/README.md b/docs/README.md index 0cd35a4e3ec..d1260312166 100644 --- a/docs/README.md +++ b/docs/README.md @@ -200,8 +200,8 @@ Templates: - [Server Setting](_description_templates/template-server-setting.md) - [Database or Table engine](_description_templates/template-engine.md) - [System table](_description_templates/template-system-table.md) -- [Data type](_description_templates/data-type.md) -- [Statement](_description_templates/statement.md) +- [Data type](_description_templates/template-data-type.md) +- [Statement](_description_templates/template-statement.md) diff --git a/docs/_includes/install/universal.sh b/docs/_includes/install/universal.sh index 5d4571aed9e..0ae77f464eb 100755 --- a/docs/_includes/install/universal.sh +++ b/docs/_includes/install/universal.sh @@ -36,6 +36,9 @@ then elif [ "${ARCH}" = "riscv64" ] then DIR="riscv64" + elif [ "${ARCH}" = "s390x" ] + then + DIR="s390x" fi elif [ "${OS}" = "FreeBSD" ] then diff --git a/docs/changelogs/v23.3.9.55-lts.md b/docs/changelogs/v23.3.9.55-lts.md new file mode 100644 index 00000000000..a08070892b5 --- /dev/null +++ b/docs/changelogs/v23.3.9.55-lts.md @@ -0,0 +1,45 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.3.9.55-lts (b9c5c8622d3) FIXME as compared to v23.3.8.21-lts (1675f2264f3) + +#### Performance Improvement +* Backported in [#52213](https://github.com/ClickHouse/ClickHouse/issues/52213): Do not store blocks in `ANY` hash join if nothing is inserted. [#48633](https://github.com/ClickHouse/ClickHouse/pull/48633) ([vdimir](https://github.com/vdimir)). +* Backported in [#52826](https://github.com/ClickHouse/ClickHouse/issues/52826): Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1` . This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823) . This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173) . [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)). + +#### Build/Testing/Packaging Improvement +* Backported in [#53019](https://github.com/ClickHouse/ClickHouse/issues/53019): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#53288](https://github.com/ClickHouse/ClickHouse/issues/53288): The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#53461](https://github.com/ClickHouse/ClickHouse/issues/53461): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix optimization to move functions before sorting. [#51481](https://github.com/ClickHouse/ClickHouse/pull/51481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)). +* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). +* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)). +* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)). +* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). +* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)). +* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)). +* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix wrong columns order for queries with parallel FINAL. [#53489](https://github.com/ClickHouse/ClickHouse/pull/53489) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/docs/changelogs/v23.7.2.25-stable.md b/docs/changelogs/v23.7.2.25-stable.md new file mode 100644 index 00000000000..267083d8e03 --- /dev/null +++ b/docs/changelogs/v23.7.2.25-stable.md @@ -0,0 +1,31 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.7.2.25-stable (8dd1107b032) FIXME as compared to v23.7.1.2470-stable (a70127baecc) + +#### Backward Incompatible Change +* Backported in [#52850](https://github.com/ClickHouse/ClickHouse/issues/52850): If a dynamic disk contains a name, it should be specified as `disk = disk(name = 'disk_name'`, ...) in disk function arguments. In previous version it could be specified as `disk = disk_(...)`, which is no longer supported. [#52820](https://github.com/ClickHouse/ClickHouse/pull/52820) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Build/Testing/Packaging Improvement +* Backported in [#52913](https://github.com/ClickHouse/ClickHouse/issues/52913): Add `clickhouse-keeper-client` symlink to the clickhouse-server package. [#51882](https://github.com/ClickHouse/ClickHouse/pull/51882) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)). +* Fix data race in Keeper reconfiguration [#52804](https://github.com/ClickHouse/ClickHouse/pull/52804) ([Antonio Andelic](https://github.com/antonio2368)). +* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Rename setting disable_url_encoding to enable_url_encoding and add a test [#52656](https://github.com/ClickHouse/ClickHouse/pull/52656) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix bugs and better test for SYSTEM STOP LISTEN [#52680](https://github.com/ClickHouse/ClickHouse/pull/52680) ([Nikolay Degterinsky](https://github.com/evillique)). +* Increase min protocol version for sparse serialization [#52835](https://github.com/ClickHouse/ClickHouse/pull/52835) ([Anton Popov](https://github.com/CurtizJ)). +* Docker improvements [#52869](https://github.com/ClickHouse/ClickHouse/pull/52869) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.7.3.14-stable.md b/docs/changelogs/v23.7.3.14-stable.md new file mode 100644 index 00000000000..dbe76bd19e7 --- /dev/null +++ b/docs/changelogs/v23.7.3.14-stable.md @@ -0,0 +1,23 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.7.3.14-stable (bd9a510550c) FIXME as compared to v23.7.2.25-stable (8dd1107b032) + +#### Build/Testing/Packaging Improvement +* Backported in [#53025](https://github.com/ClickHouse/ClickHouse/issues/53025): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix named collections on cluster 23.7 [#52687](https://github.com/ClickHouse/ClickHouse/pull/52687) ([Al Korgun](https://github.com/alkorgun)). +* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)). +* Fix ZstdDeflatingWriteBuffer truncating the output sometimes [#53064](https://github.com/ClickHouse/ClickHouse/pull/53064) ([Michael Kolupaev](https://github.com/al13n321)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Suspicious DISTINCT crashes from sqlancer [#52636](https://github.com/ClickHouse/ClickHouse/pull/52636) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix Parquet stats for Float32 and Float64 [#53067](https://github.com/ClickHouse/ClickHouse/pull/53067) ([Michael Kolupaev](https://github.com/al13n321)). + diff --git a/docs/changelogs/v23.7.4.5-stable.md b/docs/changelogs/v23.7.4.5-stable.md new file mode 100644 index 00000000000..c7926d79bde --- /dev/null +++ b/docs/changelogs/v23.7.4.5-stable.md @@ -0,0 +1,17 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.7.4.5-stable (bd2fcd44553) FIXME as compared to v23.7.3.14-stable (bd9a510550c) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Disable the new parquet encoder [#53130](https://github.com/ClickHouse/ClickHouse/pull/53130) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Revert changes in `ZstdDeflatingAppendableWriteBuffer` [#53111](https://github.com/ClickHouse/ClickHouse/pull/53111) ([Antonio Andelic](https://github.com/antonio2368)). + diff --git a/docs/en/development/build.md b/docs/en/development/build.md index e3749608bbc..04dbc26aac1 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -14,6 +14,20 @@ Supported platforms: - PowerPC 64 LE (experimental) - RISC-V 64 (experimental) +## Building in docker +We use the docker image `clickhouse/binary-builder` for our CI builds. It contains everything necessary to build the binary and packages. There is a script `docker/packager/packager` to ease the image usage: + +```bash +# define a directory for the output artifacts +output_dir="build_results" +# a simplest build +./docker/packager/packager --package-type=binary --output-dir "$output_dir" +# build debian packages +./docker/packager/packager --package-type=deb --output-dir "$output_dir" +# by default, debian packages use thin LTO, so we can override it to speed up the build +CMAKE_FLAGS='-DENABLE_THINLTO=' ./docker/packager/packager --package-type=deb --output-dir "./$(git rev-parse --show-cdup)/build_results" +``` + ## Building on Ubuntu The following tutorial is based on Ubuntu Linux. @@ -28,20 +42,20 @@ sudo apt-get install git cmake ccache python3 ninja-build nasm yasm gawk lsb-rel ### Install and Use the Clang compiler -On Ubuntu/Debian you can use LLVM's automatic installation script, see [here](https://apt.llvm.org/). +On Ubuntu/Debian, you can use LLVM's automatic installation script; see [here](https://apt.llvm.org/). ``` bash sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ``` -Note: in case of troubles, you can also use this: +Note: in case of trouble, you can also use this: ```bash sudo apt-get install software-properties-common sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test ``` -For other Linux distribution - check the availability of LLVM's [prebuild packages](https://releases.llvm.org/download.html). +For other Linux distributions - check the availability of LLVM's [prebuild packages](https://releases.llvm.org/download.html). As of April 2023, clang-16 or higher will work. GCC as a compiler is not supported. @@ -78,8 +92,12 @@ cmake -S . -B build cmake --build build # or: `cd build; ninja` ``` +:::tip +In case `cmake` isn't able to detect the number of available logical cores, the build will be done by one thread. To overcome this, you can tweak `cmake` to use a specific number of threads with `-j` flag, for example, `cmake --build build -j 16`. Alternatively, you can generate build files with a specific number of jobs in advance to avoid always setting the flag: `cmake -DPARALLEL_COMPILE_JOBS=16 -S . -B build`, where `16` is the desired number of threads. +::: + To create an executable, run `cmake --build build --target clickhouse` (or: `cd build; ninja clickhouse`). -This will create executable `build/programs/clickhouse` which can be used with `client` or `server` arguments. +This will create an executable `build/programs/clickhouse`, which can be used with `client` or `server` arguments. ## Building on Any Linux {#how-to-build-clickhouse-on-any-linux} @@ -93,7 +111,7 @@ The build requires the following components: - Yasm - Gawk -If all the components are installed, you may build in the same way as the steps above. +If all the components are installed, you may build it in the same way as the steps above. Example for OpenSUSE Tumbleweed: @@ -109,7 +127,7 @@ Example for Fedora Rawhide: ``` bash sudo yum update -sudo yum --nogpg install git cmake make clang python3 ccache nasm yasm gawk +sudo yum --nogpg install git cmake make clang python3 ccache lld nasm yasm gawk git clone --recursive https://github.com/ClickHouse/ClickHouse.git mkdir build cmake -S . -B build diff --git a/docs/en/development/continuous-integration.md b/docs/en/development/continuous-integration.md index 738c5458cc3..c76ab738004 100644 --- a/docs/en/development/continuous-integration.md +++ b/docs/en/development/continuous-integration.md @@ -141,6 +141,10 @@ Runs [stateful functional tests](tests.md#functional-tests). Treat them in the s Runs [integration tests](tests.md#integration-tests). +## Bugfix validate check +Checks that either a new test (functional or integration) or there some changed tests that fail with the binary built on master branch. This check is triggered when pull request has "pr-bugfix" label. + + ## Stress Test Runs stateless functional tests concurrently from several clients to detect concurrency-related errors. If it fails: diff --git a/docs/en/engines/database-engines/materialized-mysql.md b/docs/en/engines/database-engines/materialized-mysql.md index f7cc52e622e..b7e567c7b6c 100644 --- a/docs/en/engines/database-engines/materialized-mysql.md +++ b/docs/en/engines/database-engines/materialized-mysql.md @@ -190,7 +190,7 @@ These are the schema conversion manipulations you can do with table overrides fo * Modify [column TTL](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#mergetree-column-ttl). * Modify [column compression codec](/docs/en/sql-reference/statements/create/table.md/#codecs). * Add [ALIAS columns](/docs/en/sql-reference/statements/create/table.md/#alias). - * Add [skipping indexes](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-data_skipping-indexes) + * Add [skipping indexes](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-data_skipping-indexes). Note that you need to enable `use_skip_indexes_if_final` setting to make them work (MaterializedMySQL is using `SELECT ... FINAL` by default) * Add [projections](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#projections). Note that projection optimizations are disabled when using `SELECT ... FINAL` (which MaterializedMySQL does by default), so their utility is limited here. `INDEX ... TYPE hypothesis` as [described in the v21.12 blog post]](https://clickhouse.com/blog/en/2021/clickhouse-v21.12-released/) diff --git a/docs/en/engines/database-engines/replicated.md b/docs/en/engines/database-engines/replicated.md index 5672633c4a2..1f90f2acabf 100644 --- a/docs/en/engines/database-engines/replicated.md +++ b/docs/en/engines/database-engines/replicated.md @@ -35,7 +35,7 @@ The [system.clusters](../../operations/system-tables/clusters.md) system table c When creating a new replica of the database, this replica creates tables by itself. If the replica has been unavailable for a long time and has lagged behind the replication log — it checks its local metadata with the current metadata in ZooKeeper, moves the extra tables with data to a separate non-replicated database (so as not to accidentally delete anything superfluous), creates the missing tables, updates the table names if they have been renamed. The data is replicated at the `ReplicatedMergeTree` level, i.e. if the table is not replicated, the data will not be replicated (the database is responsible only for metadata). -[`ALTER TABLE ATTACH|FETCH|DROP|DROP DETACHED|DETACH PARTITION|PART`](../../sql-reference/statements/alter/partition.md) queries are allowed but not replicated. The database engine will only add/fetch/remove the partition/part to the current replica. However, if the table itself uses a Replicated table engine, then the data will be replicated after using `ATTACH`. +[`ALTER TABLE FREEZE|ATTACH|FETCH|DROP|DROP DETACHED|DETACH PARTITION|PART`](../../sql-reference/statements/alter/partition.md) queries are allowed but not replicated. The database engine will only add/fetch/remove the partition/part to the current replica. However, if the table itself uses a Replicated table engine, then the data will be replicated after using `ATTACH`. ## Usage Example {#usage-example} diff --git a/docs/en/engines/table-engines/index.md b/docs/en/engines/table-engines/index.md index bd704d0e87e..b024820024a 100644 --- a/docs/en/engines/table-engines/index.md +++ b/docs/en/engines/table-engines/index.md @@ -60,6 +60,7 @@ Engines in the family: - [EmbeddedRocksDB](../../engines/table-engines/integrations/embedded-rocksdb.md) - [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md) - [PostgreSQL](../../engines/table-engines/integrations/postgresql.md) +- [S3Queue](../../engines/table-engines/integrations/s3queue.md) ### Special Engines {#special-engines} diff --git a/docs/en/engines/table-engines/integrations/azureBlobStorage.md b/docs/en/engines/table-engines/integrations/azureBlobStorage.md index 60e448377d0..3df08ee2ffb 100644 --- a/docs/en/engines/table-engines/integrations/azureBlobStorage.md +++ b/docs/en/engines/table-engines/integrations/azureBlobStorage.md @@ -21,7 +21,7 @@ CREATE TABLE azure_blob_storage_table (name String, value UInt32) - `connection_string|storage_account_url` — connection_string includes account name & key ([Create connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&bc=%2Fazure%2Fstorage%2Fblobs%2Fbreadcrumb%2Ftoc.json#configure-a-connection-string-for-an-azure-storage-account)) or you could also provide the storage account url here and account name & account key as separate parameters (see parameters account_name & account_key) - `container_name` - Container name -- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. +- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. - `account_name` - if storage_account_url is used, then account name can be specified here - `account_key` - if storage_account_url is used, then account key can be specified here - `format` — The [format](/docs/en/interfaces/formats.md) of the file. diff --git a/docs/en/engines/table-engines/integrations/deltalake.md b/docs/en/engines/table-engines/integrations/deltalake.md index b562e9d7fe6..964c952f31a 100644 --- a/docs/en/engines/table-engines/integrations/deltalake.md +++ b/docs/en/engines/table-engines/integrations/deltalake.md @@ -22,7 +22,7 @@ CREATE TABLE deltalake - `url` — Bucket url with path to the existing Delta Lake table. - `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. -Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md) +Engine parameters can be specified using [Named Collections](/docs/en/operations/named-collections.md). **Example** diff --git a/docs/en/engines/table-engines/integrations/hudi.md b/docs/en/engines/table-engines/integrations/hudi.md index c60618af289..b2f599e5c92 100644 --- a/docs/en/engines/table-engines/integrations/hudi.md +++ b/docs/en/engines/table-engines/integrations/hudi.md @@ -22,7 +22,7 @@ CREATE TABLE hudi_table - `url` — Bucket url with the path to an existing Hudi table. - `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. -Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md) +Engine parameters can be specified using [Named Collections](/docs/en/operations/named-collections.md). **Example** diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md index b81d5624c1a..e4d3ac762ed 100644 --- a/docs/en/engines/table-engines/integrations/kafka.md +++ b/docs/en/engines/table-engines/integrations/kafka.md @@ -173,6 +173,7 @@ Similar to GraphiteMergeTree, the Kafka engine supports extended configuration u cgrp smallest + 600 @@ -260,3 +261,4 @@ The number of rows in one Kafka message depends on whether the format is row-bas - [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) - [background_message_broker_schedule_pool_size](../../../operations/server-configuration-parameters/settings.md#background_message_broker_schedule_pool_size) +- [system.kafka_consumers](../../../operations/system-tables/kafka_consumers.md) diff --git a/docs/en/engines/table-engines/integrations/materialized-postgresql.md b/docs/en/engines/table-engines/integrations/materialized-postgresql.md index bccafd67c2c..47dae2ed494 100644 --- a/docs/en/engines/table-engines/integrations/materialized-postgresql.md +++ b/docs/en/engines/table-engines/integrations/materialized-postgresql.md @@ -13,7 +13,7 @@ If more than one table is required, it is highly recommended to use the [Materia ``` sql CREATE TABLE postgresql_db.postgresql_replica (key UInt64, value UInt64) -ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgresql_replica', 'postgres_user', 'postgres_password') +ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgresql_table', 'postgres_user', 'postgres_password') PRIMARY KEY key; ``` diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index 051945538b2..2967a15494c 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -37,7 +37,7 @@ CREATE TABLE s3_engine_table (name String, value UInt32) ### Engine parameters -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path). +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path). - `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed. - `format` — The [format](../../../interfaces/formats.md#formats) of the file. - `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3). @@ -164,6 +164,7 @@ For more information about virtual columns see [here](../../../engines/table-eng `path` argument can specify multiple files using bash-like wildcards. For being processed file should exist and match to the whole path pattern. Listing of files is determined during `SELECT` (not at `CREATE` moment). - `*` — Substitutes any number of any characters except `/` including empty string. +- `**` — Substitutes any number of any character include `/` including empty string. - `?` — Substitutes any single character. - `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. - `{N..M}` — Substitutes any number in range from N to M including both borders. N and M can have leading zeroes e.g. `000..078`. @@ -237,7 +238,7 @@ The following settings can be set before query execution or placed into configur - `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). - `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. - `s3_upload_part_size_multiply_factor` - Multiply `s3_min_upload_part_size` by this factor each time `s3_multiply_parts_count_threshold` parts were uploaded from a single write to S3. Default values is `2`. -- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3 `s3_min_upload_part_size multiplied` by `s3_upload_part_size_multiply_factor`. Default value us `500`. +- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3, `s3_min_upload_part_size` is multiplied by `s3_upload_part_size_multiply_factor`. Default value is `500`. - `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object. Its number should be limited. The value `0` means unlimited. Default value is `20`. Each in-flight part has a buffer with size `s3_min_upload_part_size` for the first `s3_upload_part_size_multiply_factor` parts and more when file is big enough, see `upload_part_size_multiply_factor`. With default settings one uploaded file consumes not more than `320Mb` for a file which is less than `8G`. The consumption is greater for a larger file. Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration. diff --git a/docs/en/engines/table-engines/integrations/s3queue.md b/docs/en/engines/table-engines/integrations/s3queue.md new file mode 100644 index 00000000000..68d41d4b0a7 --- /dev/null +++ b/docs/en/engines/table-engines/integrations/s3queue.md @@ -0,0 +1,225 @@ +--- +slug: /en/engines/table-engines/integrations/s3queue +sidebar_position: 7 +sidebar_label: S3Queue +--- + +# S3Queue Table Engine +This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ecosystem and allows streaming import. This engine is similar to the [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) engines, but provides S3-specific features. + +## Create Table {#creating-a-table} + +``` sql +CREATE TABLE s3_queue_engine_table (name String, value UInt32) + ENGINE = S3Queue(path [, NOSIGN | aws_access_key_id, aws_secret_access_key,] format, [compression]) + [SETTINGS] + [mode = 'unordered',] + [after_processing = 'keep',] + [keeper_path = '',] + [s3queue_loading_retries = 0,] + [s3queue_polling_min_timeout_ms = 1000,] + [s3queue_polling_max_timeout_ms = 10000,] + [s3queue_polling_backoff_ms = 0,] + [s3queue_tracked_files_limit = 1000,] + [s3queue_tracked_file_ttl_sec = 0,] + [s3queue_polling_size = 50,] +``` + +**Engine parameters** + +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path). +- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed. +- `format` — The [format](../../../interfaces/formats.md#formats) of the file. +- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3). +- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will autodetect compression by file extension. + +**Example** + +```sql +CREATE TABLE s3queue_engine_table (name String, value UInt32) +ENGINE=S3Queue('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/*', 'CSV', 'gzip') +SETTINGS + mode = 'ordred'; +``` + +Using named collections: + +``` xml + + + + 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/* + test + test + + + +``` + +```sql +CREATE TABLE s3queue_engine_table (name String, value UInt32) +ENGINE=S3Queue(s3queue_conf, format = 'CSV', compression_method = 'gzip') +SETTINGS + mode = 'ordred'; +``` + +## Settings {#s3queue-settings} + +### mode {#mode} + +Possible values: + +- unordered — With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKeeper. +- ordered — With ordered mode, only the max name of the successfully consumed file, and the names of files that will be retried after unsuccessful loading attempt are being stored in ZooKeeper. + +Default value: `unordered`. + +### after_processing {#after_processing} + +Delete or keep file after successful processing. +Possible values: + +- keep. +- delete. + +Default value: `keep`. + +### keeper_path {#keeper_path} + +The path in ZooKeeper can be specified as a table engine setting or default path can be formed from the global configuration-provided path and table UUID. +Possible values: + +- String. + +Default value: `/`. + +### s3queue_loading_retries {#s3queue_loading_retries} + +Retry file loading up to specified number of times. By default, there are no retries. +Possible values: + +- Positive integer. + +Default value: `0`. + +### s3queue_polling_min_timeout_ms {#s3queue_polling_min_timeout_ms} + +Minimal timeout before next polling (in milliseconds). + +Possible values: + +- Positive integer. + +Default value: `1000`. + +### s3queue_polling_max_timeout_ms {#s3queue_polling_max_timeout_ms} + +Maximum timeout before next polling (in milliseconds). + +Possible values: + +- Positive integer. + +Default value: `10000`. + +### s3queue_polling_backoff_ms {#s3queue_polling_backoff_ms} + +Polling backoff (in milliseconds). + +Possible values: + +- Positive integer. + +Default value: `0`. + +### s3queue_tracked_files_limit {#s3queue_tracked_files_limit} + +Allows to limit the number of Zookeeper nodes if the 'unordered' mode is used, does nothing for 'ordered' mode. +If limit reached the oldest processed files will be deleted from ZooKeeper node and processed again. + +Possible values: + +- Positive integer. + +Default value: `1000`. + +### s3queue_tracked_file_ttl_sec {#s3queue_tracked_file_ttl_sec} + +Maximum number of seconds to store processed files in ZooKeeper node (store forever by default) for 'unordered' mode, does nothing for 'ordered' mode. +After the specified number of seconds, the file will be re-imported. + +Possible values: + +- Positive integer. + +Default value: `0`. + +### s3queue_polling_size {#s3queue_polling_size} + +Maximum files to fetch from S3 with SELECT or in background task. +Engine takes files for processing from S3 in batches. +We limit the batch size to increase concurrency if multiple table engines with the same `keeper_path` consume files from the same path. + +Possible values: + +- Positive integer. + +Default value: `50`. + + +## S3-related Settings {#s3-settings} + +Engine supports all s3 related settings. For more information about S3 settings see [here](../../../engines/table-engines/integrations/s3.md). + + +## Description {#description} + +`SELECT` is not particularly useful for streaming import (except for debugging), because each file can be imported only once. It is more practical to create real-time threads using [materialized views](../../../sql-reference/statements/create/view.md). To do this: + +1. Use the engine to create a table for consuming from specified path in S3 and consider it a data stream. +2. Create a table with the desired structure. +3. Create a materialized view that converts data from the engine and puts it into a previously created table. + +When the `MATERIALIZED VIEW` joins the engine, it starts collecting data in the background. + +Example: + +``` sql + CREATE TABLE s3queue_engine_table (name String, value UInt32) + ENGINE=S3Queue('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/*', 'CSV', 'gzip') + SETTINGS + mode = 'unordred', + keeper_path = '/clickhouse/s3queue/'; + + CREATE TABLE stats (name String, value UInt32) + ENGINE = MergeTree() ORDER BY name; + + CREATE MATERIALIZED VIEW consumer TO stats + AS SELECT name, value FROM s3queue_engine_table; + + SELECT * FROM stats ORDER BY name; +``` + +## Virtual columns {#virtual-columns} + +- `_path` — Path to the file. +- `_file` — Name of the file. + +For more information about virtual columns see [here](../../../engines/table-engines/index.md#table_engines-virtual_columns). + + +## Wildcards In Path {#wildcards-in-path} + +`path` argument can specify multiple files using bash-like wildcards. For being processed file should exist and match to the whole path pattern. Listing of files is determined during `SELECT` (not at `CREATE` moment). + +- `*` — Substitutes any number of any characters except `/` including empty string. +- `**` — Substitutes any number of any characters include `/` including empty string. +- `?` — Substitutes any single character. +- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. +- `{N..M}` — Substitutes any number in range from N to M including both borders. N and M can have leading zeroes e.g. `000..078`. + +Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function. + +:::note +If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. +::: diff --git a/docs/en/engines/table-engines/mergetree-family/annindexes.md b/docs/en/engines/table-engines/mergetree-family/annindexes.md index 4a4ebb47bdc..ee91794b20e 100644 --- a/docs/en/engines/table-engines/mergetree-family/annindexes.md +++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md @@ -1,4 +1,4 @@ -# Approximate Nearest Neighbor Search Indexes [experimental] {#table_engines-ANNIndex} +# Approximate Nearest Neighbor Search Indexes [experimental] Nearest neighborhood search is the problem of finding the M closest points for a given point in an N-dimensional vector space. The most straightforward approach to solve this problem is a brute force search where the distance between all points in the vector space and the @@ -17,7 +17,7 @@ In terms of SQL, the nearest neighborhood problem can be expressed as follows: ``` sql SELECT * -FROM table +FROM table_with_ann_index ORDER BY Distance(vectors, Point) LIMIT N ``` @@ -32,7 +32,7 @@ An alternative formulation of the nearest neighborhood search problem looks as f ``` sql SELECT * -FROM table +FROM table_with_ann_index WHERE Distance(vectors, Point) < MaxDistance LIMIT N ``` @@ -45,12 +45,12 @@ With brute force search, both queries are expensive (linear in the number of poi `Point` must be computed. To speed this process up, Approximate Nearest Neighbor Search Indexes (ANN indexes) store a compact representation of the search space (using clustering, search trees, etc.) which allows to compute an approximate answer much quicker (in sub-linear time). -# Creating and Using ANN Indexes +# Creating and Using ANN Indexes {#creating_using_ann_indexes} Syntax to create an ANN index over an [Array](../../../sql-reference/data-types/array.md) column: ```sql -CREATE TABLE table +CREATE TABLE table_with_ann_index ( `id` Int64, `vectors` Array(Float32), @@ -63,7 +63,7 @@ ORDER BY id; Syntax to create an ANN index over a [Tuple](../../../sql-reference/data-types/tuple.md) column: ```sql -CREATE TABLE table +CREATE TABLE table_with_ann_index ( `id` Int64, `vectors` Tuple(Float32[, Float32[, ...]]), @@ -83,7 +83,7 @@ ANN indexes support two types of queries: ``` sql SELECT * - FROM table + FROM table_with_ann_index [WHERE ...] ORDER BY Distance(vectors, Point) LIMIT N @@ -93,7 +93,7 @@ ANN indexes support two types of queries: ``` sql SELECT * - FROM table + FROM table_with_ann_index WHERE Distance(vectors, Point) < MaxDistance LIMIT N ``` @@ -103,7 +103,7 @@ To avoid writing out large vectors, you can use [query parameters](/docs/en/interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters), e.g. ```bash -clickhouse-client --param_vec='hello' --query="SELECT * FROM table WHERE L2Distance(vectors, {vec: Array(Float32)}) < 1.0" +clickhouse-client --param_vec='hello' --query="SELECT * FROM table_with_ann_index WHERE L2Distance(vectors, {vec: Array(Float32)}) < 1.0" ``` ::: @@ -138,17 +138,19 @@ back to a smaller `GRANULARITY` values only in case of problems like excessive m was specified for ANN indexes, the default value is 100 million. -# Available ANN Indexes +# Available ANN Indexes {#available_ann_indexes} - [Annoy](/docs/en/engines/table-engines/mergetree-family/annindexes.md#annoy-annoy) +- [USearch](/docs/en/engines/table-engines/mergetree-family/annindexes.md#usearch-usearch) + ## Annoy {#annoy} Annoy indexes are currently experimental, to use them you first need to `SET allow_experimental_annoy_index = 1`. They are also currently disabled on ARM due to memory safety problems with the algorithm. -This type of ANN index implements [the Annoy algorithm](https://github.com/spotify/annoy) which is based on a recursive division of the -space in random linear surfaces (lines in 2D, planes in 3D etc.). +This type of ANN index is based on the [Annoy library](https://github.com/spotify/annoy) which recursively divides the space into random +linear surfaces (lines in 2D, planes in 3D etc.).
+
+ +Syntax to create an USearch index over an [Array](../../../sql-reference/data-types/array.md) column: + +```sql +CREATE TABLE table_with_usearch_index +( + id Int64, + vectors Array(Float32), + INDEX [ann_index_name] vectors TYPE usearch([Distance]) [GRANULARITY N] +) +ENGINE = MergeTree +ORDER BY id; +``` + +Syntax to create an ANN index over a [Tuple](../../../sql-reference/data-types/tuple.md) column: + +```sql +CREATE TABLE table_with_usearch_index +( + id Int64, + vectors Tuple(Float32[, Float32[, ...]]), + INDEX [ann_index_name] vectors TYPE usearch([Distance]) [GRANULARITY N] +) +ENGINE = MergeTree +ORDER BY id; +``` + +USearch currently supports two distance functions: +- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space + ([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)). +- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors + ([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)). + +For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no +distance function was specified during index creation, `L2Distance` is used as default. diff --git a/docs/en/engines/table-engines/special/buffer.md b/docs/en/engines/table-engines/special/buffer.md index f7d84b9b452..a3bb11117cd 100644 --- a/docs/en/engines/table-engines/special/buffer.md +++ b/docs/en/engines/table-engines/special/buffer.md @@ -13,7 +13,7 @@ A recommended alternative to the Buffer Table Engine is enabling [asynchronous i ::: ``` sql -Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes) +Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes [,flush_time [,flush_rows [,flush_bytes]]]) ``` ### Engine parameters: diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md index f556df0a088..5a5e1564180 100644 --- a/docs/en/engines/table-engines/special/url.md +++ b/docs/en/engines/table-engines/special/url.md @@ -106,4 +106,4 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da ## Storage Settings {#storage-settings} - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default. -- [disable_url_encoding](/docs/en/operations/settings/settings.md#disable_url_encoding) -allows to disable decoding/encoding path in uri. Disabled by default. +- [enable_url_encoding](/docs/en/operations/settings/settings.md#enable_url_encoding) - allows to enable/disable decoding/encoding path in uri. Enabled by default. diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 8779dd1a544..36afb94433a 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -323,9 +323,9 @@ clickhouse-client clickhouse://192.168.1.15,192.168.1.25 `clickhouse-client` uses the first existing file of the following: - Defined in the `--config-file` parameter. -- `./clickhouse-client.xml` -- `~/.clickhouse-client/config.xml` -- `/etc/clickhouse-client/config.xml` +- `./clickhouse-client.xml`, `.yaml`, `.yml` +- `~/.clickhouse-client/config.xml`, `.yaml`, `.yml` +- `/etc/clickhouse-client/config.xml`, `.yaml`, `.yml` Example of a config file: @@ -342,6 +342,17 @@ Example of a config file: ``` +Or the same config in a YAML format: + +```yaml +user: username +password: 'password' +secure: true +openSSL: + client: + caConfig: '/etc/ssl/cert.pem' +``` + ### Query ID Format {#query-id-format} In interactive mode `clickhouse-client` shows query ID for every query. By default, the ID is formatted like this: diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 15f9d1f47bf..d059d158d54 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -11,82 +11,83 @@ results of a `SELECT`, and to perform `INSERT`s into a file-backed table. The supported formats are: | Format | Input | Output | -|-------------------------------------------------------------------------------------------|------|--------| -| [TabSeparated](#tabseparated) | ✔ | ✔ | -| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ | -| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ | -| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ | -| [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ | -| [TabSeparatedRawWithNamesAndTypes](#tabseparatedrawwithnamesandtypes) | ✔ | ✔ | -| [Template](#format-template) | ✔ | ✔ | -| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ | -| [CSV](#csv) | ✔ | ✔ | -| [CSVWithNames](#csvwithnames) | ✔ | ✔ | -| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ | -| [CustomSeparated](#format-customseparated) | ✔ | ✔ | -| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ | -| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ | -| [SQLInsert](#sqlinsert) | ✗ | ✔ | -| [Values](#data-format-values) | ✔ | ✔ | -| [Vertical](#vertical) | ✗ | ✔ | -| [JSON](#json) | ✔ | ✔ | -| [JSONAsString](#jsonasstring) | ✔ | ✗ | -| [JSONStrings](#jsonstrings) | ✔ | ✔ | -| [JSONColumns](#jsoncolumns) | ✔ | ✔ | -| [JSONColumnsWithMetadata](#jsoncolumnsmonoblock)) | ✔ | ✔ | -| [JSONCompact](#jsoncompact) | ✔ | ✔ | -| [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ | -| [JSONCompactColumns](#jsoncompactcolumns) | ✔ | ✔ | -| [JSONEachRow](#jsoneachrow) | ✔ | ✔ | -| [PrettyJSONEachRow](#prettyjsoneachrow) | ✗ | ✔ | -| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ | -| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ | -| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ | -| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ | -| [JSONCompactEachRowWithNames](#jsoncompacteachrowwithnames) | ✔ | ✔ | -| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ | -| [JSONCompactStringsEachRow](#jsoncompactstringseachrow) | ✔ | ✔ | -| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ | -| [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔ | ✔ | -| [JSONObjectEachRow](#jsonobjecteachrow) | ✔ | ✔ | -| [BSONEachRow](#bsoneachrow) | ✔ | ✔ | -| [TSKV](#tskv) | ✔ | ✔ | -| [Pretty](#pretty) | ✗ | ✔ | -| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ | -| [PrettyMonoBlock](#prettymonoblock) | ✗ | ✔ | -| [PrettyNoEscapesMonoBlock](#prettynoescapesmonoblock) | ✗ | ✔ | -| [PrettyCompact](#prettycompact) | ✗ | ✔ | -| [PrettyCompactNoEscapes](#prettycompactnoescapes) | ✗ | ✔ | -| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ | -| [PrettyCompactNoEscapesMonoBlock](#prettycompactnoescapesmonoblock) | ✗ | ✔ | -| [PrettySpace](#prettyspace) | ✗ | ✔ | -| [PrettySpaceNoEscapes](#prettyspacenoescapes) | ✗ | ✔ | -| [PrettySpaceMonoBlock](#prettyspacemonoblock) | ✗ | ✔ | -| [PrettySpaceNoEscapesMonoBlock](#prettyspacenoescapesmonoblock) | ✗ | ✔ | -| [Prometheus](#prometheus) | ✗ | ✔ | -| [Protobuf](#protobuf) | ✔ | ✔ | -| [ProtobufSingle](#protobufsingle) | ✔ | ✔ | -| [Avro](#data-format-avro) | ✔ | ✔ | -| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ | -| [Parquet](#data-format-parquet) | ✔ | ✔ | -| [ParquetMetadata](#data-format-parquet-metadata) | ✔ | ✗ | -| [Arrow](#data-format-arrow) | ✔ | ✔ | -| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ | -| [ORC](#data-format-orc) | ✔ | ✔ | -| [RowBinary](#rowbinary) | ✔ | ✔ | -| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ | -| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | -| [RowBinaryWithDefaults](#rowbinarywithdefaults) | ✔ | ✔ | -| [Native](#native) | ✔ | ✔ | -| [Null](#null) | ✗ | ✔ | -| [XML](#xml) | ✗ | ✔ | -| [CapnProto](#capnproto) | ✔ | ✔ | -| [LineAsString](#lineasstring) | ✔ | ✔ | -| [Regexp](#data-format-regexp) | ✔ | ✗ | -| [RawBLOB](#rawblob) | ✔ | ✔ | -| [MsgPack](#msgpack) | ✔ | ✔ | -| [MySQLDump](#mysqldump) | ✔ | ✗ | -| [Markdown](#markdown) | ✗ | ✔ | +|-------------------------------------------------------------------------------------------|------|-------| +| [TabSeparated](#tabseparated) | ✔ | ✔ | +| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ | +| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ | +| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ | +| [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ | +| [TabSeparatedRawWithNamesAndTypes](#tabseparatedrawwithnamesandtypes) | ✔ | ✔ | +| [Template](#format-template) | ✔ | ✔ | +| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ | +| [CSV](#csv) | ✔ | ✔ | +| [CSVWithNames](#csvwithnames) | ✔ | ✔ | +| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ | +| [CustomSeparated](#format-customseparated) | ✔ | ✔ | +| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ | +| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ | +| [SQLInsert](#sqlinsert) | ✗ | ✔ | +| [Values](#data-format-values) | ✔ | ✔ | +| [Vertical](#vertical) | ✗ | ✔ | +| [JSON](#json) | ✔ | ✔ | +| [JSONAsString](#jsonasstring) | ✔ | ✗ | +| [JSONStrings](#jsonstrings) | ✔ | ✔ | +| [JSONColumns](#jsoncolumns) | ✔ | ✔ | +| [JSONColumnsWithMetadata](#jsoncolumnsmonoblock)) | ✔ | ✔ | +| [JSONCompact](#jsoncompact) | ✔ | ✔ | +| [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ | +| [JSONCompactColumns](#jsoncompactcolumns) | ✔ | ✔ | +| [JSONEachRow](#jsoneachrow) | ✔ | ✔ | +| [PrettyJSONEachRow](#prettyjsoneachrow) | ✗ | ✔ | +| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ | +| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ | +| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ | +| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ | +| [JSONCompactEachRowWithNames](#jsoncompacteachrowwithnames) | ✔ | ✔ | +| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ | +| [JSONCompactStringsEachRow](#jsoncompactstringseachrow) | ✔ | ✔ | +| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ | +| [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔ | ✔ | +| [JSONObjectEachRow](#jsonobjecteachrow) | ✔ | ✔ | +| [BSONEachRow](#bsoneachrow) | ✔ | ✔ | +| [TSKV](#tskv) | ✔ | ✔ | +| [Pretty](#pretty) | ✗ | ✔ | +| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ | +| [PrettyMonoBlock](#prettymonoblock) | ✗ | ✔ | +| [PrettyNoEscapesMonoBlock](#prettynoescapesmonoblock) | ✗ | ✔ | +| [PrettyCompact](#prettycompact) | ✗ | ✔ | +| [PrettyCompactNoEscapes](#prettycompactnoescapes) | ✗ | ✔ | +| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ | +| [PrettyCompactNoEscapesMonoBlock](#prettycompactnoescapesmonoblock) | ✗ | ✔ | +| [PrettySpace](#prettyspace) | ✗ | ✔ | +| [PrettySpaceNoEscapes](#prettyspacenoescapes) | ✗ | ✔ | +| [PrettySpaceMonoBlock](#prettyspacemonoblock) | ✗ | ✔ | +| [PrettySpaceNoEscapesMonoBlock](#prettyspacenoescapesmonoblock) | ✗ | ✔ | +| [Prometheus](#prometheus) | ✗ | ✔ | +| [Protobuf](#protobuf) | ✔ | ✔ | +| [ProtobufSingle](#protobufsingle) | ✔ | ✔ | +| [Avro](#data-format-avro) | ✔ | ✔ | +| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ | +| [Parquet](#data-format-parquet) | ✔ | ✔ | +| [ParquetMetadata](#data-format-parquet-metadata) | ✔ | ✗ | +| [Arrow](#data-format-arrow) | ✔ | ✔ | +| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ | +| [ORC](#data-format-orc) | ✔ | ✔ | +| [One](#data-format-one) | ✔ | ✗ | +| [RowBinary](#rowbinary) | ✔ | ✔ | +| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ | +| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | +| [RowBinaryWithDefaults](#rowbinarywithdefaults) | ✔ | ✔ | +| [Native](#native) | ✔ | ✔ | +| [Null](#null) | ✗ | ✔ | +| [XML](#xml) | ✗ | ✔ | +| [CapnProto](#capnproto) | ✔ | ✔ | +| [LineAsString](#lineasstring) | ✔ | ✔ | +| [Regexp](#data-format-regexp) | ✔ | ✗ | +| [RawBLOB](#rawblob) | ✔ | ✔ | +| [MsgPack](#msgpack) | ✔ | ✔ | +| [MySQLDump](#mysqldump) | ✔ | ✗ | +| [Markdown](#markdown) | ✗ | ✔ | You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](/docs/en/operations/settings/settings-formats.md) section. @@ -195,6 +196,7 @@ SELECT * FROM nestedt FORMAT TSV - [input_format_tsv_skip_first_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_first_lines) - skip specified number of lines at the beginning of data. Default value - `0`. - [input_format_tsv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_detect_header) - automatically detect header with names and types in TSV format. Default value - `true`. - [input_format_tsv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`. +- [input_format_tsv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_allow_variable_number_of_columns) - allow variable number of columns in TSV format, ignore extra columns and use default values on missing columns. Default value - `false`. ## TabSeparatedRaw {#tabseparatedraw} @@ -472,7 +474,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe - [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`. - [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`. - [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`. -- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`. +- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - allow variable number of columns in CSV format, ignore extra columns and use default values on missing columns. Default value - `false`. - [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`. ## CSVWithNames {#csvwithnames} @@ -501,9 +503,10 @@ the types from input data will be compared with the types of the corresponding c Similar to [Template](#format-template), but it prints or reads all names and types of columns and uses escaping rule from [format_custom_escaping_rule](/docs/en/operations/settings/settings-formats.md/#format_custom_escaping_rule) setting and delimiters from [format_custom_field_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_field_delimiter), [format_custom_row_before_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_before_delimiter), [format_custom_row_after_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_after_delimiter), [format_custom_row_between_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_between_delimiter), [format_custom_result_before_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_result_before_delimiter) and [format_custom_result_after_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_result_after_delimiter) settings, not from format strings. -If setting [input_format_custom_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) is enabled, ClickHouse will automatically detect header with names and types if any. - -If setting [input_format_tsv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) is enabled, trailing empty lines at the end of file will be skipped. +Additional settings: +- [input_format_custom_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) - enables automatic detection of header with names and types if any. Default value - `true`. +- [input_format_custom_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_custom_skip_trailing_empty_lines) - skip trailing empty lines at the end of file . Default value - `false`. +- [input_format_custom_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_custom_allow_variable_number_of_columns) - allow variable number of columns in CustomSeparated format, ignore extra columns and use default values on missing columns. Default value - `false`. There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [TemplateIgnoreSpaces](#templateignorespaces). @@ -1261,6 +1264,7 @@ SELECT * FROM json_each_row_nested - [input_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings-formats.md/#input_format_json_named_tuples_as_objects) - parse named tuple columns as JSON objects. Default value - `true`. - [input_format_json_defaults_for_missing_elements_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_defaults_for_missing_elements_in_named_tuple) - insert default values for missing elements in JSON object while parsing named tuple. Default value - `true`. - [input_format_json_ignore_unknown_keys_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_ignore_unknown_keys_in_named_tuple) - Ignore unknown keys in json object for named tuples. Default value - `false`. +- [input_format_json_compact_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_json_compact_allow_variable_number_of_columns) - allow variable number of columns in JSONCompact/JSONCompactEachRow format, ignore extra columns and use default values on missing columns. Default value - `false`. - [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`. - [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`. - [output_format_json_quote_denormals](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`. @@ -1723,6 +1727,34 @@ You can select data from a ClickHouse table and save them into some file in the ``` bash $ clickhouse-client --query = "SELECT * FROM test.hits FORMAT CapnProto SETTINGS format_schema = 'schema:Message'" ``` + +### Using autogenerated schema {#using-autogenerated-capn-proto-schema} + +If you don't have an external CapnProto schema for your data, you can still output/input data in CapnProto format using autogenerated schema. +For example: + +```sql +SELECT * FROM test.hits format CapnProto SETTINGS format_capn_proto_use_autogenerated_schema=1 +``` + +In this case ClickHouse will autogenerate CapnProto schema according to the table structure using function [structureToCapnProtoSchema](../sql-reference/functions/other-functions.md#structure_to_capn_proto_schema) and will use this schema to serialize data in CapnProto format. + +You can also read CapnProto file with autogenerated schema (in this case the file must be created using the same schema): + +```bash +$ cat hits.bin | clickhouse-client --query "INSERT INTO test.hits SETTINGS format_capn_proto_use_autogenerated_schema=1 FORMAT CapnProto" +``` + +The setting [format_capn_proto_use_autogenerated_schema](../operations/settings/settings-formats.md#format_capn_proto_use_autogenerated_schema) is enabled by default and applies if [format_schema](../operations/settings/settings-formats.md#formatschema-format-schema) is not set. + +You can also save autogenerated schema in the file during input/output using setting [output_format_schema](../operations/settings/settings-formats.md#outputformatschema-output-format-schema). For example: + +```sql +SELECT * FROM test.hits format CapnProto SETTINGS format_capn_proto_use_autogenerated_schema=1, output_format_schema='path/to/schema/schema.capnp' +``` + +In this case autogenerated CapnProto schema will be saved in file `path/to/schema/schema.capnp`. + ## Prometheus {#prometheus} Expose metrics in [Prometheus text-based exposition format](https://prometheus.io/docs/instrumenting/exposition_formats/#text-based-format). @@ -1861,6 +1893,33 @@ ClickHouse inputs and outputs protobuf messages in the `length-delimited` format It means before every message should be written its length as a [varint](https://developers.google.com/protocol-buffers/docs/encoding#varints). See also [how to read/write length-delimited protobuf messages in popular languages](https://cwiki.apache.org/confluence/display/GEODE/Delimiting+Protobuf+Messages). +### Using autogenerated schema {#using-autogenerated-protobuf-schema} + +If you don't have an external Protobuf schema for your data, you can still output/input data in Protobuf format using autogenerated schema. +For example: + +```sql +SELECT * FROM test.hits format Protobuf SETTINGS format_protobuf_use_autogenerated_schema=1 +``` + +In this case ClickHouse will autogenerate Protobuf schema according to the table structure using function [structureToProtobufSchema](../sql-reference/functions/other-functions.md#structure_to_protobuf_schema) and will use this schema to serialize data in Protobuf format. + +You can also read Protobuf file with autogenerated schema (in this case the file must be created using the same schema): + +```bash +$ cat hits.bin | clickhouse-client --query "INSERT INTO test.hits SETTINGS format_protobuf_use_autogenerated_schema=1 FORMAT Protobuf" +``` + +The setting [format_protobuf_use_autogenerated_schema](../operations/settings/settings-formats.md#format_protobuf_use_autogenerated_schema) is enabled by default and applies if [format_schema](../operations/settings/settings-formats.md#formatschema-format-schema) is not set. + +You can also save autogenerated schema in the file during input/output using setting [output_format_schema](../operations/settings/settings-formats.md#outputformatschema-output-format-schema). For example: + +```sql +SELECT * FROM test.hits format Protobuf SETTINGS format_protobuf_use_autogenerated_schema=1, output_format_schema='path/to/schema/schema.proto' +``` + +In this case autogenerated Protobuf schema will be saved in file `path/to/schema/schema.capnp`. + ## ProtobufSingle {#protobufsingle} Same as [Protobuf](#protobuf) but for storing/parsing single Protobuf message without length delimiters. @@ -2080,6 +2139,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t - [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`. - [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`. - [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`. +- [input_format_parquet_local_file_min_bytes_for_seek](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_local_file_min_bytes_for_seek) - min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format. Default value - `8192`. - [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`. - [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`. - [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `snappy`. @@ -2281,7 +2341,6 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam - [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_low_cardinality_as_dictionary) - enable output ClickHouse LowCardinality type as Dictionary Arrow type. Default value - `false`. - [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. -- [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. - [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. - [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. - [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. @@ -2347,7 +2406,6 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename. - [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. - [output_format_orc_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_orc_compression_method) - compression method used in output ORC format. Default value - `none`. -- [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. - [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. - [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. - [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. @@ -2355,6 +2413,34 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename. To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/table-engines/integrations/hdfs.md). +## One {#data-format-one} + +Special input format that doesn't read any data from file and returns only one row with column of type `UInt8`, name `dummy` and value `0` (like `system.one` table). +Can be used with virtual columns `_file/_path` to list all files without reading actual data. + +Example: + +Query: +```sql +SELECT _file FROM file('path/to/files/data*', One); +``` + +Result: +```text +┌─_file────┐ +│ data.csv │ +└──────────┘ +┌─_file──────┐ +│ data.jsonl │ +└────────────┘ +┌─_file────┐ +│ data.tsv │ +└──────────┘ +┌─_file────────┐ +│ data.parquet │ +└──────────────┘ +``` + ## LineAsString {#lineasstring} In this format, every line of input data is interpreted as a single string value. This format can only be parsed for table with a single field of type [String](/docs/en/sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](/docs/en/sql-reference/statements/create/table.md/#default) or [MATERIALIZED](/docs/en/sql-reference/statements/create/table.md/#materialized), or omitted. diff --git a/docs/en/interfaces/images/mysql1.png b/docs/en/interfaces/images/mysql1.png new file mode 100644 index 00000000000..f5ac85b6e2c Binary files /dev/null and b/docs/en/interfaces/images/mysql1.png differ diff --git a/docs/en/interfaces/images/mysql2.png b/docs/en/interfaces/images/mysql2.png new file mode 100644 index 00000000000..7b999e41665 Binary files /dev/null and b/docs/en/interfaces/images/mysql2.png differ diff --git a/docs/en/interfaces/images/mysql3.png b/docs/en/interfaces/images/mysql3.png new file mode 100644 index 00000000000..be6cb963003 Binary files /dev/null and b/docs/en/interfaces/images/mysql3.png differ diff --git a/docs/en/interfaces/images/mysql4.png b/docs/en/interfaces/images/mysql4.png new file mode 100644 index 00000000000..3b5ce1e844d Binary files /dev/null and b/docs/en/interfaces/images/mysql4.png differ diff --git a/docs/en/interfaces/images/mysql5.png b/docs/en/interfaces/images/mysql5.png new file mode 100644 index 00000000000..fc026a8b753 Binary files /dev/null and b/docs/en/interfaces/images/mysql5.png differ diff --git a/docs/en/interfaces/mysql.md b/docs/en/interfaces/mysql.md index fab3ba42758..ce5ab24ecb0 100644 --- a/docs/en/interfaces/mysql.md +++ b/docs/en/interfaces/mysql.md @@ -6,7 +6,34 @@ sidebar_label: MySQL Interface # MySQL Interface -ClickHouse supports MySQL wire protocol. To enable the MySQL wire protocol, add the [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d` folder: +ClickHouse supports the MySQL wire protocol. This allow tools that are MySQL-compatible to interact with ClickHouse seamlessly (e.g. [Looker Studio](../integrations/data-visualization/looker-studio-and-clickhouse.md)). + +## Enabling the MySQL Interface On ClickHouse Cloud + +1. After creating your ClickHouse Cloud Service, on the credentials screen, select the MySQL tab + +![Credentials screen - Prompt](./images/mysql1.png) + +2. Toggle the switch to enable the MySQL interface for this specific service. This will expose port `3306` for this service and prompt you with your MySQL connection screen that include your unique MySQL username. The password will be the same as the service's default user password. + +![Credentials screen - Enabled MySQL](./images/mysql2.png) + +Alternatively, in order to enable the MySQL interface for an existing service: + +1. Ensure your service is in `Running` state then click on the "View connection string" button for the service you want to enable the MySQL interface for + +![Connection screen - Prompt MySQL](./images/mysql3.png) + +2. Toggle the switch to enable the MySQL interface for this specific service. This will prompt you to enter the default password. + +![Connection screen - Prompt MySQL](./images/mysql4.png) + +3. After entering the password, you will get prompted the MySQL connection string for this service +![Connection screen - MySQL Enabled](./images/mysql5.png) + +## Enabling the MySQL Interface On Self-managed ClickHouse + +Add the [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d/` [folder](../operations/configuration-files): ``` xml @@ -20,7 +47,7 @@ Startup your ClickHouse server and look for a log message similar to the followi {} Application: Listening for MySQL compatibility protocol: 127.0.0.1:9004 ``` -## Connect mysql to ClickHouse +## Connect MySQL to ClickHouse The following command demonstrates how to connect the MySQL client `mysql` to ClickHouse: diff --git a/docs/en/interfaces/overview.md b/docs/en/interfaces/overview.md index 8f16dcf5f83..e60aff927c4 100644 --- a/docs/en/interfaces/overview.md +++ b/docs/en/interfaces/overview.md @@ -21,6 +21,11 @@ In most cases it is recommended to use an appropriate tool or library instead of - [ODBC driver](../interfaces/odbc.md) - [C++ client library](../interfaces/cpp.md) +ClickHouse server provides embedded visual interfaces for power users: + +- Play UI: open `/play` in the browser; +- Advanced Dashboard: open `/dashboard` in the browser; + There are also a wide range of third-party libraries for working with ClickHouse: - [Client libraries](../interfaces/third-party/client-libraries.md) diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md index 3e1b1e84f5d..a9f1af93495 100644 --- a/docs/en/interfaces/third-party/integrations.md +++ b/docs/en/interfaces/third-party/integrations.md @@ -83,8 +83,8 @@ ClickHouse, Inc. does **not** maintain the tools and libraries listed below and - Python - [SQLAlchemy](https://www.sqlalchemy.org) - [sqlalchemy-clickhouse](https://github.com/cloudflare/sqlalchemy-clickhouse) (uses [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)) - - [pandas](https://pandas.pydata.org) - - [pandahouse](https://github.com/kszucs/pandahouse) + - [PyArrow/Pandas](https://pandas.pydata.org) + - [Ibis](https://github.com/ibis-project/ibis) - PHP - [Doctrine](https://www.doctrine-project.org/) - [dbal-clickhouse](https://packagist.org/packages/friendsofdoctrine/dbal-clickhouse) diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 62f931a76b4..c3545a778b7 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -84,6 +84,8 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des - `password` for the file on disk - `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')` - `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables + - `storage_policy`: storage policy for the tables being restored. See [Using Multiple Block Devices for Data Storage](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). This setting is only applicable to the `RESTORE` command. The specified storage policy applies only to tables with an engine from the `MergeTree` family. + - `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD` ### Usage examples diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md index 02f52b6f8bf..6ed72152c1e 100644 --- a/docs/en/operations/named-collections.md +++ b/docs/en/operations/named-collections.md @@ -169,7 +169,6 @@ host = '127.0.0.1', port = 3306, database = 'test', connection_pool_size = 8, -on_duplicate_clause = 1, replace_query = 1 ``` @@ -185,7 +184,6 @@ replace_query = 1 3306 test 8 - 1 1 diff --git a/docs/en/operations/optimizing-performance/profile-guided-optimization.md b/docs/en/operations/optimizing-performance/profile-guided-optimization.md new file mode 100644 index 00000000000..cda21e3c604 --- /dev/null +++ b/docs/en/operations/optimizing-performance/profile-guided-optimization.md @@ -0,0 +1,26 @@ +--- +slug: /en/operations/optimizing-performance/profile-guided-optimization +sidebar_position: 54 +sidebar_label: Profile Guided Optimization (PGO) +--- +import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md'; + +# Profile Guided Optimization + +Profile-Guided Optimization (PGO) is a compiler optimization technique where a program is optimized based on the runtime profile. + +According to the tests, PGO helps with achieving better performance for ClickHouse. According to the tests, we see improvements up to 15% in QPS on the ClickBench test suite. The more detailed results are available [here](https://pastebin.com/xbue3HMU). The performance benefits depend on your typical workload - you can get better or worse results. + +More information about PGO in ClickHouse you can read in the corresponding GitHub [issue](https://github.com/ClickHouse/ClickHouse/issues/44567). + +## How to build ClickHouse with PGO? + +There are two major kinds of PGO: [Instrumentation](https://clang.llvm.org/docs/UsersManual.html#using-sampling-profilers) and [Sampling](https://clang.llvm.org/docs/UsersManual.html#using-sampling-profilers) (also known as AutoFDO). In this guide is described the Instrumentation PGO with ClickHouse. + +1. Build ClickHouse in Instrumented mode. In Clang it can be done via passing `-fprofile-instr-generate` option to `CXXFLAGS`. +2. Run instrumented ClickHouse on a sample workload. Here you need to use your usual workload. One of the approaches could be using [ClickBench](https://github.com/ClickHouse/ClickBench) as a sample workload. ClickHouse in the instrumentation mode could work slowly so be ready for that and do not run instrumented ClickHouse in performance-critical environments. +3. Recompile ClickHouse once again with `-fprofile-instr-use` compiler flags and profiles that are collected from the previous step. + +A more detailed guide on how to apply PGO is in the Clang [documentation](https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization). + +If you are going to collect a sample workload directly from a production environment, we recommend trying to use Sampling PGO. diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index e9f0f0dae00..03cd56ef119 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -221,6 +221,10 @@ Default: 1024 Size of cache for index marks. Zero means disabled. +:::note +This setting can be modified at runtime and will take effect immediately. +::: + Type: UInt64 Default: 0 @@ -230,6 +234,10 @@ Default: 0 Size of cache for uncompressed blocks of MergeTree indices. Zero means disabled. +:::note +This setting can be modified at runtime and will take effect immediately. +::: + Type: UInt64 Default: 0 @@ -255,6 +263,10 @@ Default: SLRU Size of cache for marks (index of MergeTree family of tables). +:::note +This setting can be modified at runtime and will take effect immediately. +::: + Type: UInt64 Default: 5368709120 @@ -288,7 +300,7 @@ Default: 1000 Limit on total number of concurrently executed queries. Zero means Unlimited. Note that limits on insert and select queries, and on the maximum number of queries for users must also be considered. See also max_concurrent_insert_queries, max_concurrent_select_queries, max_concurrent_queries_for_all_users. Zero means unlimited. :::note -These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. +This setting can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. ::: Type: UInt64 @@ -300,7 +312,7 @@ Default: 0 Limit on total number of concurrent insert queries. Zero means Unlimited. :::note -These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. +This setting can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. ::: Type: UInt64 @@ -312,7 +324,7 @@ Default: 0 Limit on total number of concurrently select queries. Zero means Unlimited. :::note -These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. +This setting can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. ::: Type: UInt64 @@ -456,6 +468,10 @@ Sets the cache size (in bytes) for mapped files. This setting allows avoiding fr Note that the amount of data in mapped files does not consume memory directly and is not accounted for in query or server memory usage — because this memory can be discarded similar to the OS page cache. The cache is dropped (the files are closed) automatically on the removal of old parts in tables of the MergeTree family, also it can be dropped manually by the `SYSTEM DROP MMAP CACHE` query. +:::note +This setting can be modified at runtime and will take effect immediately. +::: + Type: UInt64 Default: 1000 @@ -605,6 +621,10 @@ There is one shared cache for the server. Memory is allocated on demand. The cac The uncompressed cache is advantageous for very short queries in individual cases. +:::note +This setting can be modified at runtime and will take effect immediately. +::: + Type: UInt64 Default: 0 @@ -1640,7 +1660,7 @@ Keys for server/client settings: - verificationMode (default: relaxed) – The method for checking the node’s certificates. Details are in the description of the [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) class. Possible values: `none`, `relaxed`, `strict`, `once`. - verificationDepth (default: 9) – The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value. - loadDefaultCAFile (default: true) – Wether built-in CA certificates for OpenSSL will be used. ClickHouse assumes that builtin CA certificates are in the file `/etc/ssl/cert.pem` (resp. the directory `/etc/ssl/certs`) or in file (resp. directory) specified by the environment variable `SSL_CERT_FILE` (resp. `SSL_CERT_DIR`). -- cipherList (default: `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`) - Supported OpenSSL encryptions. +- cipherList (default: `ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH`) - Supported OpenSSL encryptions. - cacheSessions (default: false) – Enables or disables caching sessions. Must be used in combination with `sessionIdContext`. Acceptable values: `true`, `false`. - sessionIdContext (default: `${application.name}`) – A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed `SSL_MAX_SSL_SESSION_ID_LENGTH`. This parameter is always recommended since it helps avoid problems both if the server caches the session and if the client requested caching. Default value: `${application.name}`. - sessionCacheSize (default: [1024\*20](https://github.com/ClickHouse/boringssl/blob/master/include/openssl/ssl.h#L1978)) – The maximum number of sessions that the server caches. A value of 0 means unlimited sessions. @@ -2288,6 +2308,8 @@ This section contains the following parameters: - `session_timeout_ms` — Maximum timeout for the client session in milliseconds. - `operation_timeout_ms` — Maximum timeout for one operation in milliseconds. - `root` — The [znode](http://zookeeper.apache.org/doc/r3.5.5/zookeeperOver.html#Nodes+and+ephemeral+nodes) that is used as the root for znodes used by the ClickHouse server. Optional. +- `fallback_session_lifetime.min` - If the first zookeeper host resolved by zookeeper_load_balancing strategy is unavailable, limit the lifetime of a zookeeper session to the fallback node. This is done for load-balancing purposes to avoid excessive load on one of zookeeper hosts. This setting sets the minimal duration of the fallback session. Set in seconds. Optional. Default is 3 hours. +- `fallback_session_lifetime.max` - If the first zookeeper host resolved by zookeeper_load_balancing strategy is unavailable, limit the lifetime of a zookeeper session to the fallback node. This is done for load-balancing purposes to avoid excessive load on one of zookeeper hosts. This setting sets the maximum duration of the fallback session. Set in seconds. Optional. Default is 6 hours. - `identity` — User and password, that can be required by ZooKeeper to give access to requested znodes. Optional. - zookeeper_load_balancing - Specifies the algorithm of ZooKeeper node selection. * random - randomly selects one of ZooKeeper nodes. diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md index 6863d7f3191..86d24c3a942 100644 --- a/docs/en/operations/settings/index.md +++ b/docs/en/operations/settings/index.md @@ -7,6 +7,10 @@ pagination_next: en/operations/settings/settings # Settings Overview +:::note +XML-based Settings Profiles and [configuration files](https://clickhouse.com/docs/en/operations/configuration-files) are currently not supported for ClickHouse Cloud. To specify settings for your ClickHouse Cloud service, you must use [SQL-driven Settings Profiles](https://clickhouse.com/docs/en/operations/access-rights#settings-profiles-management). +::: + There are two main groups of ClickHouse settings: - Global server settings diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 4122b4af40f..8ea599b9861 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -56,11 +56,11 @@ Possible values: - Any positive integer. -Default value: 300. +Default value: 3000. To achieve maximum performance of `SELECT` queries, it is necessary to minimize the number of parts processed, see [Merge Tree](../../development/architecture.md#merge-tree). -You can set a larger value to 600 (1200), this will reduce the probability of the `Too many parts` error, but at the same time `SELECT` performance might degrade. Also in case of a merge issue (for example, due to insufficient disk space) you will notice it later than it could be with the original 300. +Prior to 23.6 this setting was set to 300. You can set a higher different value, it will reduce the probability of the `Too many parts` error, but at the same time `SELECT` performance might degrade. Also in case of a merge issue (for example, due to insufficient disk space) you will notice it later than it could be with the original 300. ## parts_to_delay_insert {#parts-to-delay-insert} diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md index 163ed5d5826..14d612be831 100644 --- a/docs/en/operations/settings/query-complexity.md +++ b/docs/en/operations/settings/query-complexity.md @@ -298,7 +298,7 @@ Default value: `THROW`. - [JOIN clause](../../sql-reference/statements/select/join.md#select-join) - [Join table engine](../../engines/table-engines/special/join.md) -## max_partitions_per_insert_block {#max-partitions-per-insert-block} +## max_partitions_per_insert_block {#settings-max_partitions_per_insert_block} Limits the maximum number of partitions in a single inserted block. @@ -309,9 +309,18 @@ Default value: 100. **Details** -When inserting data, ClickHouse calculates the number of partitions in the inserted block. If the number of partitions is more than `max_partitions_per_insert_block`, ClickHouse throws an exception with the following text: +When inserting data, ClickHouse calculates the number of partitions in the inserted block. If the number of partitions is more than `max_partitions_per_insert_block`, ClickHouse either logs a warning or throws an exception based on `throw_on_max_partitions_per_insert_block`. Exceptions have the following text: -> “Too many partitions for single INSERT block (more than” + toString(max_parts) + “). The limit is controlled by ‘max_partitions_per_insert_block’ setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).” +> “Too many partitions for a single INSERT block (`partitions_count` partitions, limit is ” + toString(max_partitions) + “). The limit is controlled by the ‘max_partitions_per_insert_block’ setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).” + +## throw_on_max_partitions_per_insert_block {#settings-throw_on_max_partition_per_insert_block} + +Allows you to control behaviour when `max_partitions_per_insert_block` is reached. + +- `true` - When an insert block reaches `max_partitions_per_insert_block`, an exception is raised. +- `false` - Logs a warning when `max_partitions_per_insert_block` is reached. + +Default value: `true` ## max_temporary_data_on_disk_size_for_user {#settings_max_temporary_data_on_disk_size_for_user} @@ -327,3 +336,39 @@ The maximum amount of data consumed by temporary files on disk in bytes for all Zero means unlimited. Default value: 0. + +## max_sessions_for_user {#max-sessions-per-user} + +Maximum number of simultaneous sessions per authenticated user to the ClickHouse server. + +Example: + +``` xml + + + 1 + + + 2 + + + 0 + + + + + + single_session_user + + + + two_sessions_profile + + + + unlimited_sessions_profile + + +``` + +Default value: 0 (Infinite count of simultaneous sessions). diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index fb10ff7f61b..bd87d7485e0 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -321,6 +321,10 @@ If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` ar This parameter is useful when you are using formats that require a schema definition, such as [Cap’n Proto](https://capnproto.org/) or [Protobuf](https://developers.google.com/protocol-buffers/). The value depends on the format. +## output_format_schema {#output-format-schema} + +The path to the file where the automatically generated schema will be saved in [Cap’n Proto](../../interfaces/formats.md#capnproto-capnproto) or [Protobuf](../../interfaces/formats.md#protobuf-protobuf) formats. + ## output_format_enable_streaming {#output_format_enable_streaming} Enable streaming in output formats that support it. @@ -623,6 +627,13 @@ Column type should be String. If value is empty, default names `row_{i}`will be Default value: ''. +### input_format_json_compact_allow_variable_number_of_columns {#input_format_json_compact_allow_variable_number_of_columns} + +Allow variable number of columns in rows in JSONCompact/JSONCompactEachRow input formats. +Ignore extra columns in rows with more columns than expected and treat missing columns as default values. + +Disabled by default. + ## TSV format settings {#tsv-format-settings} ### input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default} @@ -760,6 +771,13 @@ When enabled, trailing empty lines at the end of TSV file will be skipped. Disabled by default. +### input_format_tsv_allow_variable_number_of_columns {#input_format_tsv_allow_variable_number_of_columns} + +Allow variable number of columns in rows in TSV input format. +Ignore extra columns in rows with more columns than expected and treat missing columns as default values. + +Disabled by default. + ## CSV format settings {#csv-format-settings} ### format_csv_delimiter {#format_csv_delimiter} @@ -951,9 +969,11 @@ Result ```text " string " ``` + ### input_format_csv_allow_variable_number_of_columns {#input_format_csv_allow_variable_number_of_columns} -ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. +Allow variable number of columns in rows in CSV input format. +Ignore extra columns in rows with more columns than expected and treat missing columns as default values. Disabled by default. @@ -1108,17 +1128,6 @@ Default value: 1. ## Arrow format settings {#arrow-format-settings} -### input_format_arrow_import_nested {#input_format_arrow_import_nested} - -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format. - -Possible values: - -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. - -Default value: `0`. - ### input_format_arrow_case_insensitive_column_matching {#input_format_arrow_case_insensitive_column_matching} Ignore case when matching Arrow column names with ClickHouse column names. @@ -1168,17 +1177,6 @@ Default value: `lz4_frame`. ## ORC format settings {#orc-format-settings} -### input_format_orc_import_nested {#input_format_orc_import_nested} - -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format. - -Possible values: - -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. - -Default value: `0`. - ### input_format_orc_row_batch_size {#input_format_orc_row_batch_size} Batch size when reading ORC stripes. @@ -1217,17 +1215,6 @@ Default value: `none`. ## Parquet format settings {#parquet-format-settings} -### input_format_parquet_import_nested {#input_format_parquet_import_nested} - -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format. - -Possible values: - -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. - -Default value: `0`. - ### input_format_parquet_case_insensitive_column_matching {#input_format_parquet_case_insensitive_column_matching} Ignore case when matching Parquet column names with ClickHouse column names. @@ -1252,6 +1239,12 @@ Allow skipping columns with unsupported types while schema inference for format Disabled by default. +### input_format_parquet_local_file_min_bytes_for_seek {#input_format_parquet_local_file_min_bytes_for_seek} + +min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format. + +Default value - `8192`. + ### output_format_parquet_string_as_string {#output_format_parquet_string_as_string} Use Parquet String type instead of Binary for String columns. @@ -1330,6 +1323,11 @@ When serializing Nullable columns with Google wrappers, serialize default values Disabled by default. +### format_protobuf_use_autogenerated_schema {#format_capn_proto_use_autogenerated_schema} + +Use autogenerated Protobuf schema when [format_schema](#formatschema-format-schema) is not set. +The schema is generated from ClickHouse table structure using function [structureToProtobufSchema](../../sql-reference/functions/other-functions.md#structure_to_protobuf_schema) + ## Avro format settings {#avro-format-settings} ### input_format_avro_allow_missing_fields {#input_format_avro_allow_missing_fields} @@ -1589,6 +1587,13 @@ When enabled, trailing empty lines at the end of file in CustomSeparated format Disabled by default. +### input_format_custom_allow_variable_number_of_columns {#input_format_custom_allow_variable_number_of_columns} + +Allow variable number of columns in rows in CustomSeparated input format. +Ignore extra columns in rows with more columns than expected and treat missing columns as default values. + +Disabled by default. + ## Regexp format settings {#regexp-format-settings} ### format_regexp_escaping_rule {#format_regexp_escaping_rule} @@ -1626,6 +1631,11 @@ Possible values: Default value: `'by_values'`. +### format_capn_proto_use_autogenerated_schema {#format_capn_proto_use_autogenerated_schema} + +Use autogenerated CapnProto schema when [format_schema](#formatschema-format-schema) is not set. +The schema is generated from ClickHouse table structure using function [structureToCapnProtoSchema](../../sql-reference/functions/other-functions.md#structure_to_capnproto_schema) + ## MySQLDump format settings {#musqldump-format-settings} ### input_format_mysql_dump_table_name (#input_format_mysql_dump_table_name) diff --git a/docs/en/operations/settings/settings-profiles.md b/docs/en/operations/settings/settings-profiles.md index 2f39a75453c..d08266b8ef8 100644 --- a/docs/en/operations/settings/settings-profiles.md +++ b/docs/en/operations/settings/settings-profiles.md @@ -39,7 +39,7 @@ Example: 8
- + 1000000000 100000000000 @@ -67,6 +67,8 @@ Example: 50 100 + 4 + 1 diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 8dfb6c0d225..29434ef556f 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -98,6 +98,18 @@ Default value: 0. ``` +## mutations_execute_nondeterministic_on_initiator {#mutations_execute_nondeterministic_on_initiator} + +If true constant nondeterministic functions (e.g. function `now()`) are executed on initiator and replaced to literals in `UPDATE` and `DELETE` queries. It helps to keep data in sync on replicas while executing mutations with constant nondeterministic functions. Default value: `false`. + +## mutations_execute_subqueries_on_initiator {#mutations_execute_subqueries_on_initiator} + +If true scalar subqueries are executed on initiator and replaced to literals in `UPDATE` and `DELETE` queries. Default value: `false`. + +## mutations_max_literal_size_to_replace {#mutations_max_literal_size_to_replace} + +The maximum size of serialized literal in bytes to replace in `UPDATE` and `DELETE` queries. Takes effect only if at least one the two settings above is enabled. Default value: 16384 (16 KiB). + ## distributed_product_mode {#distributed-product-mode} Changes the behaviour of [distributed subqueries](../../sql-reference/operators/in.md). @@ -3468,11 +3480,11 @@ Possible values: Default value: `0`. -## disable_url_encoding {#disable_url_encoding} +## enable_url_encoding {#enable_url_encoding} -Allows to disable decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables. +Allows to enable/disable decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables. -Disabled by default. +Enabled by default. ## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously} @@ -4298,7 +4310,7 @@ Use this setting only for backward compatibility if your use cases depend on old ## session_timezone {#session_timezone} Sets the implicit time zone of the current session or query. -The implicit time zone is the time zone applied to values of type DateTime/DateTime64 which have no explicitly specified time zone. +The implicit time zone is the time zone applied to values of type DateTime/DateTime64 which have no explicitly specified time zone. The setting takes precedence over the globally configured (server-level) implicit time zone. A value of '' (empty string) means that the implicit time zone of the current session or query is equal to the [server time zone](../server-configuration-parameters/settings.md#server_configuration_parameters-timezone). @@ -4333,7 +4345,7 @@ SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zuric ``` :::warning -Not all functions that parse DateTime/DateTime64 respect `session_timezone`. This can lead to subtle errors. +Not all functions that parse DateTime/DateTime64 respect `session_timezone`. This can lead to subtle errors. See the following example and explanation. ::: @@ -4578,3 +4590,39 @@ Type: Int64 Default: 0 +## rewrite_count_distinct_if_with_count_distinct_implementation + +Allows you to rewrite `countDistcintIf` with [count_distinct_implementation](#settings-count_distinct_implementation) setting. + +Possible values: + +- true — Allow. +- false — Disallow. + +Default value: `false`. + +## precise_float_parsing {#precise_float_parsing} + +Switches [Float32/Float64](../../sql-reference/data-types/float.md) parsing algorithms: +* If the value is `1`, then precise method is used. It is slower than fast method, but it always returns a number that is the closest machine representable number to the input. +* Otherwise, fast method is used (default). It usually returns the same value as precise, but in rare cases result may differ by one or two least significant digits. + +Possible values: `0`, `1`. + +Default value: `0`. + +Example: + +```sql +SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 0; + +┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐ +│ 1.7090999999999998 │ 15008753.000000002 │ +└─────────────────────┴──────────────────────────┘ + +SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 1; + +┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐ +│ 1.7091 │ 15008753 │ +└─────────────────────┴──────────────────────────┘ +``` diff --git a/docs/en/operations/system-tables/clusters.md b/docs/en/operations/system-tables/clusters.md index deb9a0aaeb3..2659f80e338 100644 --- a/docs/en/operations/system-tables/clusters.md +++ b/docs/en/operations/system-tables/clusters.md @@ -23,6 +23,7 @@ Columns: - `database_shard_name` ([String](../../sql-reference/data-types/string.md)) — The name of the `Replicated` database shard (for clusters that belong to a `Replicated` database). - `database_replica_name` ([String](../../sql-reference/data-types/string.md)) — The name of the `Replicated` database replica (for clusters that belong to a `Replicated` database). - `is_active` ([Nullable(UInt8)](../../sql-reference/data-types/int-uint.md)) — The status of the `Replicated` database replica (for clusters that belong to a `Replicated` database): 1 means "replica is online", 0 means "replica is offline", `NULL` means "unknown". +- `name` ([String](../../sql-reference/data-types/string.md)) - An alias to cluster. **Example** diff --git a/docs/en/operations/system-tables/kafka_consumers.md b/docs/en/operations/system-tables/kafka_consumers.md new file mode 100644 index 00000000000..7e28a251e26 --- /dev/null +++ b/docs/en/operations/system-tables/kafka_consumers.md @@ -0,0 +1,58 @@ +--- +slug: /en/operations/system-tables/kafka_consumers +--- +# kafka_consumers + +Contains information about Kafka consumers. +Applicable for [Kafka table engine](../../engines/table-engines/integrations/kafka) (native ClickHouse integration) + +Columns: + +- `database` (String) - database of the table with Kafka Engine. +- `table` (String) - name of the table with Kafka Engine. +- `consumer_id` (String) - Kafka consumer identifier. Note, that a table can have many consumers. Specified by `kafka_num_consumers` parameter. +- `assignments.topic` (Array(String)) - Kafka topic. +- `assignments.partition_id` (Array(Int32)) - Kafka partition id. Note, that only one consumer can be assigned to a partition. +- `assignments.current_offset` (Array(Int64)) - current offset. +- `exceptions.time`, (Array(DateTime)) - timestamp when the 10 most recent exceptions were generated. +- `exceptions.text`, (Array(String)) - text of 10 most recent exceptions. +- `last_poll_time`, (DateTime) - timestamp of the most recent poll. +- `num_messages_read`, (UInt64) - number of messages read by the consumer. +- `last_commit_time`, (DateTime) - timestamp of the most recent poll. +- `num_commits`, (UInt64) - total number of commits for the consumer. +- `last_rebalance_time`, (DateTime) - timestamp of the most recent Kafka rebalance +- `num_rebalance_revocations`, (UInt64) - number of times the consumer was revoked its partitions +- `num_rebalance_assignments`, (UInt64) - number of times the consumer was assigned to Kafka cluster +- `is_currently_used`, (UInt8) - consumer is in use +- `rdkafka_stat` (String) - library internal statistic. See https://github.com/ClickHouse/librdkafka/blob/master/STATISTICS.md . Set `statistics_interval_ms` to 0 disable, default is 3000 (once in three seconds). + +Example: + +``` sql +SELECT * +FROM system.kafka_consumers +FORMAT Vertical +``` + +``` text +Row 1: +────── +database: test +table: kafka +consumer_id: ClickHouse-instance-test-kafka-1caddc7f-f917-4bb1-ac55-e28bd103a4a0 +assignments.topic: ['system_kafka_cons'] +assignments.partition_id: [0] +assignments.current_offset: [18446744073709550615] +exceptions.time: [] +exceptions.text: [] +last_poll_time: 2006-11-09 18:47:47 +num_messages_read: 4 +last_commit_time: 2006-11-10 04:39:40 +num_commits: 1 +last_rebalance_time: 1970-01-01 00:00:00 +num_rebalance_revocations: 0 +num_rebalance_assignments: 1 +is_currently_used: 1 +rdkafka_stat: {...} + +``` diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index c6f565b8748..7143520835f 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -48,7 +48,7 @@ Columns: - `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number of rows read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_rows` includes the total number of rows read at all replicas. Each replica sends it’s `read_rows` value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value. - `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number of bytes read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_bytes` includes the total number of rows read at all replicas. Each replica sends it’s `read_bytes` value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value. - `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. -- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. +- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes (uncompressed). For other queries, the column value is 0. - `result_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of rows in a result of the `SELECT` query, or a number of rows in the `INSERT` query. - `result_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — RAM volume in bytes used to store a query result. - `memory_usage` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Memory consumption by the query. diff --git a/docs/en/operations/utilities/clickhouse-keeper-client.md b/docs/en/operations/utilities/clickhouse-keeper-client.md index 77f816fe428..2140d22b620 100644 --- a/docs/en/operations/utilities/clickhouse-keeper-client.md +++ b/docs/en/operations/utilities/clickhouse-keeper-client.md @@ -11,7 +11,7 @@ A client application to interact with clickhouse-keeper by its native protocol. - `-q QUERY`, `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-keeper-client` will start in interactive mode. - `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`. -- `-p N`, `--port=N` — Server port. Default value: 2181 +- `-p N`, `--port=N` — Server port. Default value: 9181 - `--connection-timeout=TIMEOUT` — Set connection timeout in seconds. Default value: 10s. - `--session-timeout=TIMEOUT` — Set session timeout in seconds. Default value: 10s. - `--operation-timeout=TIMEOUT` — Set operation timeout in seconds. Default value: 10s. @@ -21,8 +21,8 @@ A client application to interact with clickhouse-keeper by its native protocol. ## Example {#clickhouse-keeper-client-example} ```bash -./clickhouse-keeper-client -h localhost:2181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30 -Connected to ZooKeeper at [::1]:2181 with session_id 137 +./clickhouse-keeper-client -h localhost:9181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30 +Connected to ZooKeeper at [::1]:9181 with session_id 137 / :) ls keeper foo bar / :) cd keeper @@ -45,9 +45,14 @@ keeper foo bar - `ls [path]` -- Lists the nodes for the given path (default: cwd) - `cd [path]` -- Change the working path (default `.`) - `set [version]` -- Updates the node's value. Only update if version matches (default: -1) -- `create ` -- Creates new node +- `create [mode]` -- Creates new node with the set value +- `touch ` -- Creates new node with an empty string as value. Doesn't throw an exception if the node already exists - `get ` -- Returns the node's value - `remove ` -- Remove the node - `rmr ` -- Recursively deletes path. Confirmation required - `flwc ` -- Executes four-letter-word command - `help` -- Prints this message +- `get_stat [path]` -- Returns the node's stat (default `.`) +- `find_super_nodes [path]` -- Finds nodes with number of children larger than some threshold for the given path (default `.`) +- `delete_stale_backups` -- Deletes ClickHouse nodes used for backups that are now inactive +- `find_big_family [path] [n]` -- Returns the top n nodes with the biggest family in the subtree (default path = `.` and n = 10) diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index 0443a80cf17..0b17afb7e12 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -34,7 +34,13 @@ The binary you just downloaded can run all sorts of ClickHouse tools and utiliti A common use of `clickhouse-local` is to run ad-hoc queries on files: where you don't have to insert the data into a table. `clickhouse-local` can stream the data from a file into a temporary table and execute your SQL. -If the file is sitting on the same machine as `clickhouse-local`, use the `file` table engine. The following `reviews.tsv` file contains a sampling of Amazon product reviews: +If the file is sitting on the same machine as `clickhouse-local`, you can simple specify the file to load. The following `reviews.tsv` file contains a sampling of Amazon product reviews: + +```bash +./clickhouse local -q "SELECT * FROM 'reviews.tsv'" +``` + +This command is a shortcut of: ```bash ./clickhouse local -q "SELECT * FROM file('reviews.tsv')" diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index fe279edb709..c99c8791542 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -140,8 +140,8 @@ Time shifts for multiple days. Some pacific islands changed their timezone offse - [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md) - [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) - [Functions for working with arrays](../../sql-reference/functions/array-functions.md) -- [The `date_time_input_format` setting](../../operations/settings/settings.md#settings-date_time_input_format) -- [The `date_time_output_format` setting](../../operations/settings/settings.md#settings-date_time_output_format) +- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#settings-date_time_input_format) +- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#settings-date_time_output_format) - [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) - [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone) - [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) diff --git a/docs/en/sql-reference/data-types/geo.md b/docs/en/sql-reference/data-types/geo.md index 3b2787008d2..1d37b829dd5 100644 --- a/docs/en/sql-reference/data-types/geo.md +++ b/docs/en/sql-reference/data-types/geo.md @@ -26,9 +26,9 @@ SELECT p, toTypeName(p) FROM geo_point; Result: ``` text -┌─p─────┬─toTypeName(p)─┐ +┌─p───────┬─toTypeName(p)─┐ │ (10,10) │ Point │ -└───────┴───────────────┘ +└─────────┴───────────────┘ ``` ## Ring diff --git a/docs/en/sql-reference/data-types/tuple.md b/docs/en/sql-reference/data-types/tuple.md index dfe0eda2e21..3add754e6b6 100644 --- a/docs/en/sql-reference/data-types/tuple.md +++ b/docs/en/sql-reference/data-types/tuple.md @@ -4,7 +4,7 @@ sidebar_position: 54 sidebar_label: Tuple(T1, T2, ...) --- -# Tuple(t1, T2, …) +# Tuple(T1, T2, …) A tuple of elements, each having an individual [type](../../sql-reference/data-types/index.md#data_types). Tuple must contain at least one element. diff --git a/docs/en/sql-reference/data-types/uuid.md b/docs/en/sql-reference/data-types/uuid.md index b0f19f0d8be..40f756b9588 100644 --- a/docs/en/sql-reference/data-types/uuid.md +++ b/docs/en/sql-reference/data-types/uuid.md @@ -6,42 +6,42 @@ sidebar_label: UUID # UUID -A universally unique identifier (UUID) is a 16-byte number used to identify records. For detailed information about the UUID, see [Wikipedia](https://en.wikipedia.org/wiki/Universally_unique_identifier). +A Universally Unique Identifier (UUID) is a 16-byte value used to identify records. For detailed information about UUIDs, see [Wikipedia](https://en.wikipedia.org/wiki/Universally_unique_identifier). -The example of UUID type value is represented below: +While different UUID variants exist (see [here](https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis)), ClickHouse does not validate that inserted UUIDs conform to a particular variant. UUIDs are internally treated as a sequence of 16 random bytes with [8-4-4-4-12 representation](https://en.wikipedia.org/wiki/Universally_unique_identifier#Textual_representation) at SQL level. + +Example UUID value: ``` text 61f0c404-5cb3-11e7-907b-a6006ad3dba0 ``` -If you do not specify the UUID column value when inserting a new record, the UUID value is filled with zero: +The default UUID is all-zero. It is used, for example, when a new record is inserted but no value for a UUID column is specified: ``` text 00000000-0000-0000-0000-000000000000 ``` -## How to Generate +## Generating UUIDs -To generate the UUID value, ClickHouse provides the [generateUUIDv4](../../sql-reference/functions/uuid-functions.md) function. +ClickHouse provides the [generateUUIDv4](../../sql-reference/functions/uuid-functions.md) function to generate random UUID version 4 values. ## Usage Example **Example 1** -This example demonstrates creating a table with the UUID type column and inserting a value into the table. +This example demonstrates the creation of a table with a UUID column and the insertion of a value into the table. ``` sql CREATE TABLE t_uuid (x UUID, y String) ENGINE=TinyLog -``` -``` sql INSERT INTO t_uuid SELECT generateUUIDv4(), 'Example 1' -``` -``` sql SELECT * FROM t_uuid ``` +Result: + ``` text ┌────────────────────────────────────x─┬─y─────────┐ │ 417ddc5d-e556-4d27-95dd-a34d84e46a50 │ Example 1 │ @@ -50,13 +50,11 @@ SELECT * FROM t_uuid **Example 2** -In this example, the UUID column value is not specified when inserting a new record. +In this example, no UUID column value is specified when the record is inserted, i.e. the default UUID value is inserted: ``` sql INSERT INTO t_uuid (y) VALUES ('Example 2') -``` -``` sql SELECT * FROM t_uuid ``` diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md index 6c3d80683db..dd8031461e0 100644 --- a/docs/en/sql-reference/dictionaries/index.md +++ b/docs/en/sql-reference/dictionaries/index.md @@ -1092,7 +1092,7 @@ Types of sources (`source_type`): - [Local file](#local_file) - [Executable File](#executable) - [Executable Pool](#executable_pool) -- [HTTP(s)](#http) +- [HTTP(S)](#http) - DBMS - [ODBC](#odbc) - [MySQL](#mysql) @@ -1102,7 +1102,7 @@ Types of sources (`source_type`): - [Cassandra](#cassandra) - [PostgreSQL](#postgresql) -## Local File {#local_file} +### Local File {#local_file} Example of settings: @@ -1132,7 +1132,7 @@ When a dictionary with source `FILE` is created via DDL command (`CREATE DICTION - [Dictionary function](../../sql-reference/table-functions/dictionary.md#dictionary-function) -## Executable File {#executable} +### Executable File {#executable} Working with executable files depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable file’s STDIN. Otherwise, ClickHouse starts the executable file and treats its output as dictionary data. @@ -1161,7 +1161,7 @@ Setting fields: That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled; otherwise, the DB user would be able to execute arbitrary binaries on the ClickHouse node. -## Executable Pool {#executable_pool} +### Executable Pool {#executable_pool} Executable pool allows loading data from pool of processes. This source does not work with dictionary layouts that need to load all data from source. Executable pool works if the dictionary [is stored](#ways-to-store-dictionaries-in-memory) using `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache`, `direct`, or `complex_key_direct` layouts. @@ -1196,9 +1196,9 @@ Setting fields: That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node. -## Http(s) {#https} +### HTTP(S) {#https} -Working with an HTTP(s) server depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method. +Working with an HTTP(S) server depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method. Example of settings: @@ -1248,7 +1248,55 @@ Setting fields: When creating a dictionary using the DDL command (`CREATE DICTIONARY ...`) remote hosts for HTTP dictionaries are checked against the contents of `remote_url_allow_hosts` section from config to prevent database users to access arbitrary HTTP server. -### Known Vulnerability of the ODBC Dictionary Functionality +### DBMS + +#### ODBC + +You can use this method to connect any database that has an ODBC driver. + +Example of settings: + +``` xml + + + DatabaseName + ShemaName.TableName
+ DSN=some_parameters + SQL_QUERY + SELECT id, value_1, value_2 FROM ShemaName.TableName +
+ +``` + +or + +``` sql +SOURCE(ODBC( + db 'DatabaseName' + table 'SchemaName.TableName' + connection_string 'DSN=some_parameters' + invalidate_query 'SQL_QUERY' + query 'SELECT id, value_1, value_2 FROM db_name.table_name' +)) +``` + +Setting fields: + +- `db` – Name of the database. Omit it if the database name is set in the `` parameters. +- `table` – Name of the table and schema if exists. +- `connection_string` – Connection string. +- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). +- `query` – The custom query. Optional parameter. + +:::note +The `table` and `query` fields cannot be used together. And either one of the `table` or `query` fields must be declared. +::: + +ClickHouse receives quoting symbols from ODBC-driver and quote all settings in queries to driver, so it’s necessary to set table name accordingly to table name case in database. + +If you have a problems with encodings when using Oracle, see the corresponding [FAQ](/knowledgebase/oracle-odbc) item. + +##### Known Vulnerability of the ODBC Dictionary Functionality :::note When connecting to the database through the ODBC driver connection parameter `Servername` can be substituted. In this case values of `USERNAME` and `PASSWORD` from `odbc.ini` are sent to the remote server and can be compromised. @@ -1277,7 +1325,7 @@ SELECT * FROM odbc('DSN=gregtest;Servername=some-server.com', 'test_db'); ODBC driver will send values of `USERNAME` and `PASSWORD` from `odbc.ini` to `some-server.com`. -### Example of Connecting Postgresql +##### Example of Connecting Postgresql Ubuntu OS. @@ -1358,7 +1406,7 @@ LIFETIME(MIN 300 MAX 360) You may need to edit `odbc.ini` to specify the full path to the library with the driver `DRIVER=/usr/local/lib/psqlodbcw.so`. -### Example of Connecting MS SQL Server +##### Example of Connecting MS SQL Server Ubuntu OS. @@ -1462,55 +1510,7 @@ LAYOUT(FLAT()) LIFETIME(MIN 300 MAX 360) ``` -## DBMS - -### ODBC - -You can use this method to connect any database that has an ODBC driver. - -Example of settings: - -``` xml - - - DatabaseName - ShemaName.TableName
- DSN=some_parameters - SQL_QUERY - SELECT id, value_1, value_2 FROM ShemaName.TableName -
- -``` - -or - -``` sql -SOURCE(ODBC( - db 'DatabaseName' - table 'SchemaName.TableName' - connection_string 'DSN=some_parameters' - invalidate_query 'SQL_QUERY' - query 'SELECT id, value_1, value_2 FROM db_name.table_name' -)) -``` - -Setting fields: - -- `db` – Name of the database. Omit it if the database name is set in the `` parameters. -- `table` – Name of the table and schema if exists. -- `connection_string` – Connection string. -- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). -- `query` – The custom query. Optional parameter. - -:::note -The `table` and `query` fields cannot be used together. And either one of the `table` or `query` fields must be declared. -::: - -ClickHouse receives quoting symbols from ODBC-driver and quote all settings in queries to driver, so it’s necessary to set table name accordingly to table name case in database. - -If you have a problems with encodings when using Oracle, see the corresponding [FAQ](/knowledgebase/oracle-odbc) item. - -### Mysql +#### Mysql Example of settings: @@ -1627,7 +1627,7 @@ SOURCE(MYSQL( )) ``` -### ClickHouse +#### ClickHouse Example of settings: @@ -1680,7 +1680,7 @@ Setting fields: The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. ::: -### Mongodb +#### Mongodb Example of settings: @@ -1723,7 +1723,7 @@ Setting fields: - `options` - MongoDB connection string options (optional parameter). -### Redis +#### Redis Example of settings: @@ -1756,7 +1756,7 @@ Setting fields: - `storage_type` – The structure of internal Redis storage using for work with keys. `simple` is for simple sources and for hashed single key sources, `hash_map` is for hashed sources with two keys. Ranged sources and cache sources with complex key are unsupported. May be omitted, default value is `simple`. - `db_index` – The specific numeric index of Redis logical database. May be omitted, default value is 0. -### Cassandra +#### Cassandra Example of settings: @@ -1798,7 +1798,7 @@ Setting fields: The `column_family` or `where` fields cannot be used together with the `query` field. And either one of the `column_family` or `query` fields must be declared. ::: -### PostgreSQL +#### PostgreSQL Example of settings: @@ -1855,7 +1855,7 @@ Setting fields: The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. ::: -## Null +### Null A special source that can be used to create dummy (empty) dictionaries. Such dictionaries can useful for tests or with setups with separated data and query nodes at nodes with Distributed tables. @@ -2476,52 +2476,3 @@ Dictionary updates (other than loading at first use) do not block queries. Durin We recommend periodically updating the dictionaries with the geobase. During an update, generate new files and write them to a separate location. When everything is ready, rename them to the files used by the server. There are also functions for working with OS identifiers and search engines, but they shouldn’t be used. - -## Embedded Dictionaries - - - -ClickHouse contains a built-in feature for working with a geobase. - -This allows you to: - -- Use a region’s ID to get its name in the desired language. -- Use a region’s ID to get the ID of a city, area, federal district, country, or continent. -- Check whether a region is part of another region. -- Get a chain of parent regions. - -All the functions support “translocality,” the ability to simultaneously use different perspectives on region ownership. For more information, see the section “Functions for working with web analytics dictionaries”. - -The internal dictionaries are disabled in the default package. -To enable them, uncomment the parameters `path_to_regions_hierarchy_file` and `path_to_regions_names_files` in the server configuration file. - -The geobase is loaded from text files. - -Place the `regions_hierarchy*.txt` files into the `path_to_regions_hierarchy_file` directory. This configuration parameter must contain the path to the `regions_hierarchy.txt` file (the default regional hierarchy), and the other files (`regions_hierarchy_ua.txt`) must be located in the same directory. - -Put the `regions_names_*.txt` files in the `path_to_regions_names_files` directory. - -You can also create these files yourself. The file format is as follows: - -`regions_hierarchy*.txt`: TabSeparated (no header), columns: - -- region ID (`UInt32`) -- parent region ID (`UInt32`) -- region type (`UInt8`): 1 - continent, 3 - country, 4 - federal district, 5 - region, 6 - city; other types do not have values -- population (`UInt32`) — optional column - -`regions_names_*.txt`: TabSeparated (no header), columns: - -- region ID (`UInt32`) -- region name (`String`) — Can’t contain tabs or line feeds, even escaped ones. - -A flat array is used for storing in RAM. For this reason, IDs shouldn’t be more than a million. - -Dictionaries can be updated without restarting the server. However, the set of available dictionaries is not updated. -For updates, the file modification times are checked. If a file has changed, the dictionary is updated. -The interval to check for changes is configured in the `builtin_dictionaries_reload_interval` parameter. -Dictionary updates (other than loading at first use) do not block queries. During updates, queries use the old versions of dictionaries. If an error occurs during an update, the error is written to the server log, and queries continue using the old version of dictionaries. - -We recommend periodically updating the dictionaries with the geobase. During an update, generate new files and write them to a separate location. When everything is ready, rename them to the files used by the server. - -There are also functions for working with OS identifiers and search engines, but they shouldn’t be used. diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 44d385312d0..f66994ed5a6 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -183,9 +183,8 @@ arrayConcat(arrays) **Arguments** - `arrays` – Arbitrary number of arguments of [Array](../../sql-reference/data-types/array.md) type. - **Example** - +**Example** ``` sql SELECT arrayConcat([1, 2], [3, 4], [5, 6]) AS res diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 87d84425029..3901ca9667a 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -1819,6 +1819,72 @@ Result: └────────────────────────────────────┘ ``` +## toUTCTimestamp + +Convert DateTime/DateTime64 type value from other time zone to UTC timezone timestamp + +**Syntax** + +``` sql +toUTCTimestamp(time_val, time_zone) +``` + +**Arguments** + +- `time_val` — A DateTime/DateTime64 type const value or a expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md) +- `time_zone` — A String type const value or a expression represent the time zone. [String types](../../sql-reference/data-types/string.md) + +**Returned value** + +- DateTime/DateTime64 in text form + +**Example** + +``` sql +SELECT toUTCTimestamp(toDateTime('2023-03-16'), 'Asia/Shanghai'); +``` + +Result: + +``` text +┌─toUTCTimestamp(toDateTime('2023-03-16'),'Asia/Shanghai')┐ +│ 2023-03-15 16:00:00 │ +└─────────────────────────────────────────────────────────┘ +``` + +## fromUTCTimestamp + +Convert DateTime/DateTime64 type value from UTC timezone to other time zone timestamp + +**Syntax** + +``` sql +fromUTCTimestamp(time_val, time_zone) +``` + +**Arguments** + +- `time_val` — A DateTime/DateTime64 type const value or a expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md) +- `time_zone` — A String type const value or a expression represent the time zone. [String types](../../sql-reference/data-types/string.md) + +**Returned value** + +- DateTime/DateTime64 in text form + +**Example** + +``` sql +SELECT fromUTCTimestamp(toDateTime64('2023-03-16 10:00:00', 3), 'Asia/Shanghai'); +``` + +Result: + +``` text +┌─fromUTCTimestamp(toDateTime64('2023-03-16 10:00:00',3),'Asia/Shanghai')─┐ +│ 2023-03-16 18:00:00.000 │ +└─────────────────────────────────────────────────────────────────────────┘ +``` + ## Related content - Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) diff --git a/docs/en/sql-reference/functions/files.md b/docs/en/sql-reference/functions/files.md index 73d72aa50e5..d62cd1db88d 100644 --- a/docs/en/sql-reference/functions/files.md +++ b/docs/en/sql-reference/functions/files.md @@ -18,7 +18,7 @@ file(path[, default]) **Arguments** -- `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports wildcards `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings. +- `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports wildcards `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings. - `default` — The value returned if the file does not exist or cannot be accessed. Supported data types: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal). **Example** diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 06097d92480..556fe622c27 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -51,7 +51,7 @@ Calculates the MD5 from a string and returns the resulting set of bytes as Fixed If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the ‘sipHash128’ function instead. If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))). -## sipHash64 (#hash_functions-siphash64) +## sipHash64 {#hash_functions-siphash64} Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value. @@ -63,9 +63,9 @@ This is a cryptographic hash function. It works at least three times faster than The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm: -1. The first and the second hash value are concatenated to an array which is hashed. -2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way. -3. This calculation is repeated for all remaining hash values of the original input. +1. The first and the second hash value are concatenated to an array which is hashed. +2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way. +3. This calculation is repeated for all remaining hash values of the original input. **Arguments** diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 26dcccfd42e..bfbd26551d3 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -2552,3 +2552,187 @@ Result: This function can be used together with [generateRandom](../../sql-reference/table-functions/generate.md) to generate completely random tables. +## structureToCapnProtoSchema {#structure_to_capn_proto_schema} + +Converts ClickHouse table structure to CapnProto schema. + +**Syntax** + +``` sql +structureToCapnProtoSchema(structure) +``` + +**Arguments** + +- `structure` — Table structure in a format `column1_name column1_type, column2_name column2_type, ...`. +- `root_struct_name` — Name for root struct in CapnProto schema. Default value - `Message`; + +**Returned value** + +- CapnProto schema + +Type: [String](../../sql-reference/data-types/string.md). + +**Examples** + +Query: + +``` sql +SELECT structureToCapnProtoSchema('column1 String, column2 UInt32, column3 Array(String)') FORMAT RawBLOB +``` + +Result: + +``` text +@0xf96402dd754d0eb7; + +struct Message +{ + column1 @0 : Data; + column2 @1 : UInt32; + column3 @2 : List(Data); +} +``` + +Query: + +``` sql +SELECT structureToCapnProtoSchema('column1 Nullable(String), column2 Tuple(element1 UInt32, element2 Array(String)), column3 Map(String, String)') FORMAT RawBLOB +``` + +Result: + +``` text +@0xd1c8320fecad2b7f; + +struct Message +{ + struct Column1 + { + union + { + value @0 : Data; + null @1 : Void; + } + } + column1 @0 : Column1; + struct Column2 + { + element1 @0 : UInt32; + element2 @1 : List(Data); + } + column2 @1 : Column2; + struct Column3 + { + struct Entry + { + key @0 : Data; + value @1 : Data; + } + entries @0 : List(Entry); + } + column3 @2 : Column3; +} +``` + +Query: + +``` sql +SELECT structureToCapnProtoSchema('column1 String, column2 UInt32', 'Root') FORMAT RawBLOB +``` + +Result: + +``` text +@0x96ab2d4ab133c6e1; + +struct Root +{ + column1 @0 : Data; + column2 @1 : UInt32; +} +``` + +## structureToProtobufSchema {#structure_to_protobuf_schema} + +Converts ClickHouse table structure to Protobuf schema. + +**Syntax** + +``` sql +structureToProtobufSchema(structure) +``` + +**Arguments** + +- `structure` — Table structure in a format `column1_name column1_type, column2_name column2_type, ...`. +- `root_message_name` — Name for root message in Protobuf schema. Default value - `Message`; + +**Returned value** + +- Protobuf schema + +Type: [String](../../sql-reference/data-types/string.md). + +**Examples** + +Query: + +``` sql +SELECT structureToProtobufSchema('column1 String, column2 UInt32, column3 Array(String)') FORMAT RawBLOB +``` + +Result: + +``` text +syntax = "proto3"; + +message Message +{ + bytes column1 = 1; + uint32 column2 = 2; + repeated bytes column3 = 3; +} +``` + +Query: + +``` sql +SELECT structureToProtobufSchema('column1 Nullable(String), column2 Tuple(element1 UInt32, element2 Array(String)), column3 Map(String, String)') FORMAT RawBLOB +``` + +Result: + +``` text +syntax = "proto3"; + +message Message +{ + bytes column1 = 1; + message Column2 + { + uint32 element1 = 1; + repeated bytes element2 = 2; + } + Column2 column2 = 2; + map column3 = 3; +} +``` + +Query: + +``` sql +SELECT structureToProtobufSchema('column1 String, column2 UInt32', 'Root') FORMAT RawBLOB +``` + +Result: + +``` text +syntax = "proto3"; + +message Root +{ + bytes column1 = 1; + uint32 column2 = 2; +} +``` diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 527ce2434c0..e39bdbae06e 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -729,6 +729,30 @@ Returns whether string `str` ends with `suffix`. endsWith(str, suffix) ``` +## endsWithUTF8 + +Returns whether string `str` ends with `suffix`, the difference between `endsWithUTF8` and `endsWith` is that `endsWithUTF8` match `str` and `suffix` by UTF-8 characters. + +**Syntax** + +```sql +endsWithUTF8(str, suffix) +``` + +**Example** + +``` sql +SELECT endsWithUTF8('中国', '\xbd'), endsWith('中国', '\xbd') +``` + +Result: + +```result +┌─endsWithUTF8('中国', '½')─┬─endsWith('中国', '½')─┐ +│ 0 │ 1 │ +└──────────────────────────┴──────────────────────┘ +``` + ## startsWith Returns whether string `str` starts with `prefix`. @@ -745,6 +769,25 @@ startsWith(str, prefix) SELECT startsWith('Spider-Man', 'Spi'); ``` +## startsWithUTF8 + +Returns whether string `str` starts with `prefix`, the difference between `startsWithUTF8` and `startsWith` is that `startsWithUTF8` match `str` and `suffix` by UTF-8 characters. + + +**Example** + +``` sql +SELECT startsWithUTF8('中国', '\xe4'), startsWith('中国', '\xe4') +``` + +Result: + +```result +┌─startsWithUTF8('中国', '⥩─┬─startsWith('中国', '⥩─┐ +│ 0 │ 1 │ +└────────────────────────────┴────────────────────────┘ +``` + ## trim Removes the specified characters from the start or end of a string. If not specified otherwise, the function removes whitespace (ASCII-character 32). diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index 7ed2deaeda6..88e4ac03fdb 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -559,6 +559,29 @@ Result: └────────────────────────────┘ ``` +## tupleConcat + +Combines tuples passed as arguments. + +``` sql +tupleConcat(tuples) +``` + +**Arguments** + +- `tuples` – Arbitrary number of arguments of [Tuple](../../sql-reference/data-types/tuple.md) type. + +**Example** + +``` sql +SELECT tupleConcat((1, 2), (3, 4), (true, false)) AS res +``` + +``` text +┌─res──────────────────┐ +│ (1,2,3,4,true,false) │ +└──────────────────────┘ +``` ## Distance functions diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md index 7dadc2be5b2..dca34d16f25 100644 --- a/docs/en/sql-reference/statements/alter/index.md +++ b/docs/en/sql-reference/statements/alter/index.md @@ -36,6 +36,8 @@ These `ALTER` statements modify entities related to role-based access control: [ALTER TABLE ... MODIFY COMMENT](/docs/en/sql-reference/statements/alter/comment.md) statement adds, modifies, or removes comments to the table, regardless if it was set before or not. +[ALTER NAMED COLLECTION](/docs/en/sql-reference/statements/alter/named-collection.md) statement modifies [Named Collections](/docs/en/operations/named-collections.md). + ## Mutations `ALTER` queries that are intended to manipulate table data are implemented with a mechanism called “mutations”, most notably [ALTER TABLE … DELETE](/docs/en/sql-reference/statements/alter/delete.md) and [ALTER TABLE … UPDATE](/docs/en/sql-reference/statements/alter/update.md). They are asynchronous background processes similar to merges in [MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables that to produce new “mutated” versions of parts. diff --git a/docs/en/sql-reference/statements/alter/named-collection.md b/docs/en/sql-reference/statements/alter/named-collection.md new file mode 100644 index 00000000000..ac6752127c1 --- /dev/null +++ b/docs/en/sql-reference/statements/alter/named-collection.md @@ -0,0 +1,30 @@ +--- +slug: /en/sql-reference/statements/alter/named-collection +sidebar_label: NAMED COLLECTION +--- + +# ALTER NAMED COLLECTION + +This query intends to modify already existing named collections. + +**Syntax** + +```sql +ALTER NAMED COLLECTION [IF EXISTS] name [ON CLUSTER cluster] +[ SET +key_name1 = 'some value', +key_name2 = 'some value', +key_name3 = 'some value', +... ] | +[ DELETE key_name4, key_name5, ... ] +``` + +**Example** + +```sql +CREATE NAMED COLLECTION foobar AS a = '1', b = '2'; + +ALTER NAMED COLLECTION foobar SET a = '2', c = '3'; + +ALTER NAMED COLLECTION foobar DELETE b; +``` diff --git a/docs/en/sql-reference/statements/create/index.md b/docs/en/sql-reference/statements/create/index.md index 14e29d051d7..fa39526a53e 100644 --- a/docs/en/sql-reference/statements/create/index.md +++ b/docs/en/sql-reference/statements/create/index.md @@ -8,13 +8,14 @@ sidebar_label: CREATE Create queries make a new entity of one of the following kinds: -- [DATABASE](../../../sql-reference/statements/create/database.md) -- [TABLE](../../../sql-reference/statements/create/table.md) -- [VIEW](../../../sql-reference/statements/create/view.md) -- [DICTIONARY](../../../sql-reference/statements/create/dictionary.md) -- [FUNCTION](../../../sql-reference/statements/create/function.md) -- [USER](../../../sql-reference/statements/create/user.md) -- [ROLE](../../../sql-reference/statements/create/role.md) -- [ROW POLICY](../../../sql-reference/statements/create/row-policy.md) -- [QUOTA](../../../sql-reference/statements/create/quota.md) -- [SETTINGS PROFILE](../../../sql-reference/statements/create/settings-profile.md) +- [DATABASE](/docs/en/sql-reference/statements/create/database.md) +- [TABLE](/docs/en/sql-reference/statements/create/table.md) +- [VIEW](/docs/en/sql-reference/statements/create/view.md) +- [DICTIONARY](/docs/en/sql-reference/statements/create/dictionary.md) +- [FUNCTION](/docs/en/sql-reference/statements/create/function.md) +- [USER](/docs/en/sql-reference/statements/create/user.md) +- [ROLE](/docs/en/sql-reference/statements/create/role.md) +- [ROW POLICY](/docs/en/sql-reference/statements/create/row-policy.md) +- [QUOTA](/docs/en/sql-reference/statements/create/quota.md) +- [SETTINGS PROFILE](/docs/en/sql-reference/statements/create/settings-profile.md) +- [NAMED COLLECTION](/docs/en/sql-reference/statements/create/named-collection.md) diff --git a/docs/en/sql-reference/statements/create/named-collection.md b/docs/en/sql-reference/statements/create/named-collection.md new file mode 100644 index 00000000000..1fc7b11c554 --- /dev/null +++ b/docs/en/sql-reference/statements/create/named-collection.md @@ -0,0 +1,34 @@ +--- +slug: /en/sql-reference/statements/create/named-collection +sidebar_label: NAMED COLLECTION +--- + +# CREATE NAMED COLLECTION + +Creates a new named collection. + +**Syntax** + +```sql +CREATE NAMED COLLECTION [IF NOT EXISTS] name [ON CLUSTER cluster] AS +key_name1 = 'some value', +key_name2 = 'some value', +key_name3 = 'some value', +... +``` + +**Example** + +```sql +CREATE NAMED COLLECTION foobar AS a = '1', b = '2'; +``` + +**Related statements** + +- [CREATE NAMED COLLECTION](https://clickhouse.com/docs/en/sql-reference/statements/alter/named-collection) +- [DROP NAMED COLLECTION](https://clickhouse.com/docs/en/sql-reference/statements/drop#drop-function) + + +**See Also** + +- [Named collections guide](/docs/en/operations/named-collections.md) diff --git a/docs/en/sql-reference/statements/create/quota.md b/docs/en/sql-reference/statements/create/quota.md index c69285171ab..a6ced870c18 100644 --- a/docs/en/sql-reference/statements/create/quota.md +++ b/docs/en/sql-reference/statements/create/quota.md @@ -11,6 +11,7 @@ Syntax: ``` sql CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name] + [IN access_storage_type] [KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED] [FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year} {MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] | diff --git a/docs/en/sql-reference/statements/create/role.md b/docs/en/sql-reference/statements/create/role.md index 9b14e220e1f..4b6fffe4f60 100644 --- a/docs/en/sql-reference/statements/create/role.md +++ b/docs/en/sql-reference/statements/create/role.md @@ -11,6 +11,7 @@ Syntax: ``` sql CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] + [IN access_storage_type] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...] ``` diff --git a/docs/en/sql-reference/statements/create/row-policy.md b/docs/en/sql-reference/statements/create/row-policy.md index 83bb2e6bb9a..cd7718793bd 100644 --- a/docs/en/sql-reference/statements/create/row-policy.md +++ b/docs/en/sql-reference/statements/create/row-policy.md @@ -16,6 +16,7 @@ Syntax: ``` sql CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] policy_name1 [ON CLUSTER cluster_name1] ON [db1.]table1|db1.* [, policy_name2 [ON CLUSTER cluster_name2] ON [db2.]table2|db2.* ...] + [IN access_storage_type] [FOR SELECT] USING condition [AS {PERMISSIVE | RESTRICTIVE}] [TO {role1 [, role2 ...] | ALL | ALL EXCEPT role1 [, role2 ...]}] diff --git a/docs/en/sql-reference/statements/create/settings-profile.md b/docs/en/sql-reference/statements/create/settings-profile.md index 8e221a4d82f..d8afce9d6de 100644 --- a/docs/en/sql-reference/statements/create/settings-profile.md +++ b/docs/en/sql-reference/statements/create/settings-profile.md @@ -12,6 +12,7 @@ Syntax: ``` sql CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] + [IN access_storage_type] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...] ``` diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index d168be63c36..11d4eae7bc8 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -14,6 +14,7 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'}] [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] + [IN access_storage_type] [DEFAULT ROLE role [,...]] [DEFAULT DATABASE database | NONE] [GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]] diff --git a/docs/en/sql-reference/statements/drop.md b/docs/en/sql-reference/statements/drop.md index b6208c2fd52..eed76dbcd5c 100644 --- a/docs/en/sql-reference/statements/drop.md +++ b/docs/en/sql-reference/statements/drop.md @@ -49,7 +49,7 @@ Deletes a user. Syntax: ``` sql -DROP USER [IF EXISTS] name [,...] [ON CLUSTER cluster_name] +DROP USER [IF EXISTS] name [,...] [ON CLUSTER cluster_name] [FROM access_storage_type] ``` ## DROP ROLE @@ -59,7 +59,7 @@ Deletes a role. The deleted role is revoked from all the entities where it was a Syntax: ``` sql -DROP ROLE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] +DROP ROLE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] [FROM access_storage_type] ``` ## DROP ROW POLICY @@ -69,7 +69,7 @@ Deletes a row policy. Deleted row policy is revoked from all the entities where Syntax: ``` sql -DROP [ROW] POLICY [IF EXISTS] name [,...] ON [database.]table [,...] [ON CLUSTER cluster_name] +DROP [ROW] POLICY [IF EXISTS] name [,...] ON [database.]table [,...] [ON CLUSTER cluster_name] [FROM access_storage_type] ``` ## DROP QUOTA @@ -79,7 +79,7 @@ Deletes a quota. The deleted quota is revoked from all the entities where it was Syntax: ``` sql -DROP QUOTA [IF EXISTS] name [,...] [ON CLUSTER cluster_name] +DROP QUOTA [IF EXISTS] name [,...] [ON CLUSTER cluster_name] [FROM access_storage_type] ``` ## DROP SETTINGS PROFILE @@ -89,7 +89,7 @@ Deletes a settings profile. The deleted settings profile is revoked from all the Syntax: ``` sql -DROP [SETTINGS] PROFILE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] +DROP [SETTINGS] PROFILE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] [FROM access_storage_type] ``` ## DROP VIEW @@ -119,3 +119,20 @@ DROP FUNCTION [IF EXISTS] function_name [on CLUSTER cluster] CREATE FUNCTION linear_equation AS (x, k, b) -> k*x + b; DROP FUNCTION linear_equation; ``` + +## DROP NAMED COLLECTION + +Deletes a named collection. + +**Syntax** + +``` sql +DROP NAMED COLLECTION [IF EXISTS] name [on CLUSTER cluster] +``` + +**Example** + +``` sql +CREATE NAMED COLLECTION foobar AS a = '1', b = '2'; +DROP NAMED COLLECTION foobar; +``` diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md index d6e30827f9b..e0cc98c2351 100644 --- a/docs/en/sql-reference/statements/insert-into.md +++ b/docs/en/sql-reference/statements/insert-into.md @@ -11,7 +11,7 @@ Inserts data into a table. **Syntax** ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` You can specify a list of columns to insert using the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier). @@ -107,7 +107,7 @@ If table has [constraints](../../sql-reference/statements/create/table.md#constr **Syntax** ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] SELECT ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] SELECT ... ``` Columns are mapped according to their position in the SELECT clause. However, their names in the SELECT expression and the table for INSERT may differ. If necessary, type casting is performed. @@ -126,7 +126,7 @@ To insert a default value instead of `NULL` into a column with not nullable data **Syntax** ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name ``` Use the syntax above to insert data from a file, or files, stored on the **client** side. `file_name` and `type` are string literals. Input file [format](../../interfaces/formats.md) must be set in the `FORMAT` clause. diff --git a/docs/en/sql-reference/statements/move.md b/docs/en/sql-reference/statements/move.md new file mode 100644 index 00000000000..fac738ff711 --- /dev/null +++ b/docs/en/sql-reference/statements/move.md @@ -0,0 +1,32 @@ +--- +slug: /en/sql-reference/statements/move +sidebar_position: 54 +sidebar_label: MOVE +--- + +# MOVE access entity statement + +This statement allows to move an access entity from one access storage to another. + +Syntax: + +```sql +MOVE {USER, ROLE, QUOTA, SETTINGS PROFILE, ROW POLICY} name1 [, name2, ...] TO access_storage_type +``` + +Currently, there are five access storages in ClickHouse: + - `local_directory` + - `memory` + - `replicated` + - `users_xml` (ro) + - `ldap` (ro) + +Examples: + +```sql +MOVE USER test TO local_directory +``` + +```sql +MOVE ROLE test TO memory +``` diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index fb601cd5d35..443db7c5ac2 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -66,13 +66,13 @@ RELOAD FUNCTION [ON CLUSTER cluster_name] function_name ## DROP DNS CACHE -Resets ClickHouse’s internal DNS cache. Sometimes (for old ClickHouse versions) it is necessary to use this command when changing the infrastructure (changing the IP address of another ClickHouse server or the server used by dictionaries). +Clears ClickHouse’s internal DNS cache. Sometimes (for old ClickHouse versions) it is necessary to use this command when changing the infrastructure (changing the IP address of another ClickHouse server or the server used by dictionaries). For more convenient (automatic) cache management, see disable_internal_dns_cache, dns_cache_update_period parameters. ## DROP MARK CACHE -Resets the mark cache. +Clears the mark cache. ## DROP REPLICA @@ -106,22 +106,18 @@ Similar to `SYSTEM DROP REPLICA`, but removes the `Replicated` database replica ## DROP UNCOMPRESSED CACHE -Reset the uncompressed data cache. +Clears the uncompressed data cache. The uncompressed data cache is enabled/disabled with the query/user/profile-level setting [use_uncompressed_cache](../../operations/settings/settings.md#setting-use_uncompressed_cache). Its size can be configured using the server-level setting [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size). ## DROP COMPILED EXPRESSION CACHE -Reset the compiled expression cache. +Clears the compiled expression cache. The compiled expression cache is enabled/disabled with the query/user/profile-level setting [compile_expressions](../../operations/settings/settings.md#compile-expressions). ## DROP QUERY CACHE -Resets the [query cache](../../operations/query-cache.md). - -```sql -SYSTEM DROP QUERY CACHE [ON CLUSTER cluster_name] -``` +Clears the [query cache](../../operations/query-cache.md). ## FLUSH LOGS @@ -314,6 +310,22 @@ Provides possibility to start background fetch tasks from replication queues whi SYSTEM START REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name] ``` +### STOP PULLING REPLICATION LOG + +Stops loading new entries from replication log to replication queue in a `ReplicatedMergeTree` table. + +``` sql +SYSTEM STOP PULLING REPLICATION LOG [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name] +``` + +### START PULLING REPLICATION LOG + +Cancels `SYSTEM STOP PULLING REPLICATION LOG`. + +``` sql +SYSTEM START PULLING REPLICATION LOG [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name] +``` + ### SYNC REPLICA Wait until a `ReplicatedMergeTree` table will be synced with other replicas in a cluster, but no more than `receive_timeout` seconds. @@ -427,9 +439,9 @@ SYSTEM STOP LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QU ``` - If `CUSTOM 'protocol'` modifier is specified, the custom protocol with the specified name defined in the protocols section of the server configuration will be stopped. -- If `QUERIES ALL` modifier is specified, all protocols are stopped. -- If `QUERIES DEFAULT` modifier is specified, all default protocols are stopped. -- If `QUERIES CUSTOM` modifier is specified, all custom protocols are stopped. +- If `QUERIES ALL [EXCEPT .. [,..]]` modifier is specified, all protocols are stopped, unless specified with `EXCEPT` clause. +- If `QUERIES DEFAULT [EXCEPT .. [,..]]` modifier is specified, all default protocols are stopped, unless specified with `EXCEPT` clause. +- If `QUERIES CUSTOM [EXCEPT .. [,..]]` modifier is specified, all custom protocols are stopped, unless specified with `EXCEPT` clause. ### SYSTEM START LISTEN diff --git a/docs/en/sql-reference/statements/truncate.md b/docs/en/sql-reference/statements/truncate.md index 457031a2157..4b46210aa09 100644 --- a/docs/en/sql-reference/statements/truncate.md +++ b/docs/en/sql-reference/statements/truncate.md @@ -4,8 +4,9 @@ sidebar_position: 52 sidebar_label: TRUNCATE --- -# TRUNCATE Statement +# TRUNCATE Statements +## TRUNCATE TABLE ``` sql TRUNCATE TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] ``` @@ -21,3 +22,10 @@ You can specify how long (in seconds) to wait for inactive replicas to execute ` :::note If the `alter_sync` is set to `2` and some replicas are not active for more than the time, specified by the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown. ::: + +## TRUNCATE DATABASE +``` sql +TRUNCATE DATBASE [IF EXISTS] [db.]name [ON CLUSTER cluster] +``` + +Removes all tables from a database but keeps the database itself. When the clause `IF EXISTS` is omitted, the query returns an error if the database does not exist. diff --git a/docs/en/sql-reference/table-functions/azureBlobStorage.md b/docs/en/sql-reference/table-functions/azureBlobStorage.md index 7bb5d892c47..59c92e1327e 100644 --- a/docs/en/sql-reference/table-functions/azureBlobStorage.md +++ b/docs/en/sql-reference/table-functions/azureBlobStorage.md @@ -19,7 +19,7 @@ azureBlobStorage(- connection_string|storage_account_url, container_name, blobpa - `connection_string|storage_account_url` — connection_string includes account name & key ([Create connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&bc=%2Fazure%2Fstorage%2Fblobs%2Fbreadcrumb%2Ftoc.json#configure-a-connection-string-for-an-azure-storage-account)) or you could also provide the storage account url here and account name & account key as separate parameters (see parameters account_name & account_key) - `container_name` - Container name -- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. +- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. - `account_name` - if storage_account_url is used, then account name can be specified here - `account_key` - if storage_account_url is used, then account key can be specified here - `format` — The [format](../../interfaces/formats.md#formats) of the file. diff --git a/docs/en/sql-reference/table-functions/azureBlobStorageCluster.md b/docs/en/sql-reference/table-functions/azureBlobStorageCluster.md new file mode 100644 index 00000000000..20dfd35d5db --- /dev/null +++ b/docs/en/sql-reference/table-functions/azureBlobStorageCluster.md @@ -0,0 +1,47 @@ +--- +slug: /en/sql-reference/table-functions/azureBlobStorageCluster +sidebar_position: 55 +sidebar_label: azureBlobStorageCluster +title: "azureBlobStorageCluster Table Function" +--- + +Allows processing files from [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisks in S3 file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished. +This table function is similar to the [s3Cluster function](../../sql-reference/table-functions/s3Cluster.md). + +**Syntax** + +``` sql +azureBlobStorageCluster(cluster_name, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]) +``` + +**Arguments** + +- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. +- `connection_string|storage_account_url` — connection_string includes account name & key ([Create connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&bc=%2Fazure%2Fstorage%2Fblobs%2Fbreadcrumb%2Ftoc.json#configure-a-connection-string-for-an-azure-storage-account)) or you could also provide the storage account url here and account name & account key as separate parameters (see parameters account_name & account_key) +- `container_name` - Container name +- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. +- `account_name` - if storage_account_url is used, then account name can be specified here +- `account_key` - if storage_account_url is used, then account key can be specified here +- `format` — The [format](../../interfaces/formats.md#formats) of the file. +- `compression` — Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension. (same as setting to `auto`). +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. + +**Returned value** + +A table with the specified structure for reading or writing data in the specified file. + +**Examples** + +Select the count for the file `test_cluster_*.csv`, using all the nodes in the `cluster_simple` cluster: + +``` sql +SELECT count(*) from azureBlobStorageCluster( + 'cluster_simple', 'http://azurite1:10000/devstoreaccount1', 'test_container', 'test_cluster_count.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', + 'auto', 'key UInt64') +``` + +**See Also** + +- [AzureBlobStorage engine](../../engines/table-engines/integrations/azureBlobStorage.md) +- [azureBlobStorage table function](../../sql-reference/table-functions/azureBlobStorage.md) diff --git a/docs/en/sql-reference/table-functions/cluster.md b/docs/en/sql-reference/table-functions/cluster.md index 7362c433e0e..a083c6b89a6 100644 --- a/docs/en/sql-reference/table-functions/cluster.md +++ b/docs/en/sql-reference/table-functions/cluster.md @@ -16,14 +16,14 @@ All available clusters are listed in the [system.clusters](../../operations/syst **Syntax** ``` sql -cluster('cluster_name', db.table[, sharding_key]) -cluster('cluster_name', db, table[, sharding_key]) -clusterAllReplicas('cluster_name', db.table[, sharding_key]) -clusterAllReplicas('cluster_name', db, table[, sharding_key]) +cluster(['cluster_name', db.table, sharding_key]) +cluster(['cluster_name', db, table, sharding_key]) +clusterAllReplicas(['cluster_name', db.table, sharding_key]) +clusterAllReplicas(['cluster_name', db, table, sharding_key]) ``` **Arguments** -- `cluster_name` – Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. +- `cluster_name` – Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers, set `default` if not specified. - `db.table` or `db`, `table` - Name of a database and a table. - `sharding_key` - A sharding key. Optional. Needs to be specified if the cluster has more than one shard. diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index 00917414e0c..4db9494502e 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -13,16 +13,18 @@ The `file` function can be used in `SELECT` and `INSERT` queries to read from or **Syntax** ``` sql -file(path [,format] [,structure] [,compression]) +file([path_to_archive ::] path [,format] [,structure] [,compression]) ``` **Parameters** - `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings. +- `path_to_archive` - The relative path to zip/tar/7z archive. Path to archive support the same globs as `path`. - `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — The existing compression type when used in a `SELECT` query, or the desired compression type when used in an `INSERT` query. The supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`. + **Returned value** A table with the specified structure for reading or writing data in the specified file. @@ -128,6 +130,11 @@ file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32'); └─────────┴─────────┴─────────┘ ``` +Getting data from table in table.csv, located in archive1.zip or/and archive2.zip +``` sql +SELECT * FROM file('user_files/archives/archive{1..2}.zip :: table.csv'); +``` + ## Globs in Path Multiple path components can have globs. For being processed file must exist and match to the whole path pattern (not only suffix or prefix). diff --git a/docs/en/sql-reference/table-functions/gcs.md b/docs/en/sql-reference/table-functions/gcs.md index 01b4e4f6a69..48c2381696e 100644 --- a/docs/en/sql-reference/table-functions/gcs.md +++ b/docs/en/sql-reference/table-functions/gcs.md @@ -22,7 +22,7 @@ The GCS Table Function integrates with Google Cloud Storage by using the GCS XML **Arguments** -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. :::note GCS The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API: diff --git a/docs/en/sql-reference/table-functions/hdfsCluster.md b/docs/en/sql-reference/table-functions/hdfsCluster.md index 832be46d05f..75100eeb4f3 100644 --- a/docs/en/sql-reference/table-functions/hdfsCluster.md +++ b/docs/en/sql-reference/table-functions/hdfsCluster.md @@ -17,7 +17,7 @@ hdfsCluster(cluster_name, URI, format, structure) **Arguments** - `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. -- `URI` — URI to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `URI` — URI to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. diff --git a/docs/en/sql-reference/table-functions/iceberg.md b/docs/en/sql-reference/table-functions/iceberg.md index 30db0ef00aa..fa86b436a5e 100644 --- a/docs/en/sql-reference/table-functions/iceberg.md +++ b/docs/en/sql-reference/table-functions/iceberg.md @@ -21,7 +21,7 @@ iceberg(url [,aws_access_key_id, aws_secret_access_key] [,format] [,structure]) - `format` — The [format](/docs/en/interfaces/formats.md/#formats) of the file. By default `Parquet` is used. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. -Engine parameters can be specified using [Named Collections](../../operations/named-collections.md) +Engine parameters can be specified using [Named Collections](/docs/en/operations/named-collections.md). **Returned value** diff --git a/docs/en/sql-reference/table-functions/remote.md b/docs/en/sql-reference/table-functions/remote.md index fba3ea55653..59ed4bf1985 100644 --- a/docs/en/sql-reference/table-functions/remote.md +++ b/docs/en/sql-reference/table-functions/remote.md @@ -13,10 +13,10 @@ Both functions can be used in `SELECT` and `INSERT` queries. ## Syntax ``` sql -remote('addresses_expr', db, table[, 'user'[, 'password'], sharding_key]) -remote('addresses_expr', db.table[, 'user'[, 'password'], sharding_key]) -remoteSecure('addresses_expr', db, table[, 'user'[, 'password'], sharding_key]) -remoteSecure('addresses_expr', db.table[, 'user'[, 'password'], sharding_key]) +remote('addresses_expr', [db, table, 'user'[, 'password'], sharding_key]) +remote('addresses_expr', [db.table, 'user'[, 'password'], sharding_key]) +remoteSecure('addresses_expr', [db, table, 'user'[, 'password'], sharding_key]) +remoteSecure('addresses_expr', [db.table, 'user'[, 'password'], sharding_key]) ``` ## Parameters @@ -29,6 +29,8 @@ remoteSecure('addresses_expr', db.table[, 'user'[, 'password'], sharding_key]) The port is required for an IPv6 address. + If only specify this parameter, `db` and `table` will use `system.one` by default. + Type: [String](../../sql-reference/data-types/string.md). - `db` — Database name. Type: [String](../../sql-reference/data-types/string.md). diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 55c825b8b9b..07addafcf58 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -23,7 +23,7 @@ For GCS, substitute your HMAC key and HMAC secret where you see `aws_access_key_ **Arguments** -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path). :::note GCS The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API: diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md index d5bdc85f9f8..675aef54d34 100644 --- a/docs/en/sql-reference/table-functions/s3Cluster.md +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -16,7 +16,7 @@ s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [, **Arguments** - `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. -- `source` — URL to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `source` — URL to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). - `access_key_id` and `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md index 677ed011960..859de86f019 100644 --- a/docs/en/sql-reference/table-functions/url.md +++ b/docs/en/sql-reference/table-functions/url.md @@ -56,7 +56,7 @@ Character `|` inside patterns is used to specify failover addresses. They are it ## Storage Settings {#storage-settings} - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default. -- [disable_url_encoding](/docs/en/operations/settings/settings.md#disable_url_encoding) - allows to disable decoding/encoding path in uri. Disabled by default. +- [enable_url_encoding](/docs/en/operations/settings/settings.md#enable_url_encoding) - allows to enable/disable decoding/encoding path in uri. Enabled by default. **See Also** diff --git a/docs/ru/engines/table-engines/special/buffer.md b/docs/ru/engines/table-engines/special/buffer.md index 574d9273088..a4e58e66e0c 100644 --- a/docs/ru/engines/table-engines/special/buffer.md +++ b/docs/ru/engines/table-engines/special/buffer.md @@ -9,7 +9,7 @@ sidebar_label: Buffer Буферизует записываемые данные в оперативке, периодически сбрасывая их в другую таблицу. При чтении, производится чтение данных одновременно из буфера и из другой таблицы. ``` sql -Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes) +Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes [,flush_time [,flush_rows [,flush_bytes]]]) ``` Параметры движка: diff --git a/docs/ru/getting-started/example-datasets/wikistat.md b/docs/ru/getting-started/example-datasets/wikistat.md deleted file mode 100644 index 479616d667b..00000000000 --- a/docs/ru/getting-started/example-datasets/wikistat.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -slug: /ru/getting-started/example-datasets/wikistat -sidebar_position: 17 -sidebar_label: WikiStat ---- - -# WikiStat {#wikistat} - -См: http://dumps.wikimedia.org/other/pagecounts-raw/ - -Создание таблицы: - -``` sql -CREATE TABLE wikistat -( - date Date, - time DateTime, - project String, - subproject String, - path String, - hits UInt64, - size UInt64 -) ENGINE = MergeTree(date, (path, time), 8192); -``` - -Загрузка данных: - -``` bash -$ for i in {2007..2016}; do for j in {01..12}; do echo $i-$j >&2; curl -sSL "http://dumps.wikimedia.org/other/pagecounts-raw/$i/$i-$j/" | grep -oE 'pagecounts-[0-9]+-[0-9]+\.gz'; done; done | sort | uniq | tee links.txt -$ cat links.txt | while read link; do wget http://dumps.wikimedia.org/other/pagecounts-raw/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1/')/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1-\2/')/$link; done -$ ls -1 /opt/wikistat/ | grep gz | while read i; do echo $i; gzip -cd /opt/wikistat/$i | ./wikistat-loader --time="$(echo -n $i | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})([0-9]{2})-([0-9]{2})([0-9]{2})([0-9]{2})\.gz/\1-\2-\3 \4-00-00/')" | clickhouse-client --query="INSERT INTO wikistat FORMAT TabSeparated"; done -``` diff --git a/docs/ru/getting-started/example-datasets/wikistat.md b/docs/ru/getting-started/example-datasets/wikistat.md new file mode 120000 index 00000000000..2d429d00984 --- /dev/null +++ b/docs/ru/getting-started/example-datasets/wikistat.md @@ -0,0 +1 @@ +../../../en/getting-started/example-datasets/wikistat.md \ No newline at end of file diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index e232b63f049..5571936f4c5 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -1353,8 +1353,6 @@ ClickHouse поддерживает настраиваемую точность $ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parquet" ``` -Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested). - Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата Parquet, используйте команду следующего вида: ``` bash @@ -1413,8 +1411,6 @@ ClickHouse поддерживает настраиваемую точность $ cat filename.arrow | clickhouse-client --query="INSERT INTO some_table FORMAT Arrow" ``` -Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested). - ### Вывод данных {#selecting-data-arrow} Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата Arrow, используйте команду следующего вида: @@ -1471,8 +1467,6 @@ ClickHouse поддерживает настраиваемую точность $ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC" ``` -Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_orc_import_nested](../operations/settings/settings.md#input_format_orc_import_nested). - ### Вывод данных {#selecting-data-2} Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата ORC, используйте команду следующего вида: diff --git a/docs/ru/operations/named-collections.md b/docs/ru/operations/named-collections.md index ba6b47116ad..48ee7c9f15d 100644 --- a/docs/ru/operations/named-collections.md +++ b/docs/ru/operations/named-collections.md @@ -88,7 +88,6 @@ SELECT * FROM s3_engine_table LIMIT 3; 3306 test 8 - 1 1 diff --git a/docs/ru/operations/optimizing-performance/profile-guided-optimization.md b/docs/ru/operations/optimizing-performance/profile-guided-optimization.md new file mode 120000 index 00000000000..31cb656bd99 --- /dev/null +++ b/docs/ru/operations/optimizing-performance/profile-guided-optimization.md @@ -0,0 +1 @@ +../../../en/operations/optimizing-performance/profile-guided-optimization.md \ No newline at end of file diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 81a696bcfc1..7b026244624 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -1106,7 +1106,7 @@ ClickHouse использует потоки из глобального пул - verificationMode - Способ проверки сертификатов узла. Подробности находятся в описании класса [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h). Допустимые значения: `none`, `relaxed`, `strict`, `once`. - verificationDepth - Максимальная длина верификационной цепи. Верификация завершится ошибкой, если длина цепи сертификатов превысит установленное значение. - loadDefaultCAFile - Признак того, что будут использоваться встроенные CA-сертификаты для OpenSSL. Допустимые значения: `true`, `false`. \| -- cipherList - Поддерживаемые OpenSSL-шифры. Например, `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`. +- cipherList - Поддерживаемые OpenSSL-шифры. Например, `ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH`. - cacheSessions - Включение/выключение кеширования сессии. Использовать обязательно вместе с `sessionIdContext`. Допустимые значения: `true`, `false`. - sessionIdContext - Уникальный набор произвольных символов, которые сервер добавляет к каждому сгенерированному идентификатору. Длина строки не должна превышать `SSL_MAX_SSL_SESSION_ID_LENGTH`. Рекомендуется к использованию всегда, поскольку позволяет избежать проблем как в случае, если сервер кеширует сессию, так и если клиент затребовал кеширование. По умолчанию `${application.name}`. - sessionCacheSize - Максимальное количество сессий, которые кэширует сервер. По умолчанию - 1024\*20. 0 - неограниченное количество сессий. diff --git a/docs/ru/operations/settings/query-complexity.md b/docs/ru/operations/settings/query-complexity.md index de9bb969085..fb3b18bd46a 100644 --- a/docs/ru/operations/settings/query-complexity.md +++ b/docs/ru/operations/settings/query-complexity.md @@ -311,6 +311,52 @@ FORMAT Null; **Подробности** -При вставке данных, ClickHouse вычисляет количество партиций во вставленном блоке. Если число партиций больше, чем `max_partitions_per_insert_block`, ClickHouse генерирует исключение со следующим текстом: +При вставке данных ClickHouse проверяет количество партиций во вставляемом блоке. Если количество разделов превышает число `max_partitions_per_insert_block`, ClickHouse либо логирует предупреждение, либо выбрасывает исключение в зависимости от значения `throw_on_max_partitions_per_insert_block`. Исключения имеют следующий текст: -> «Too many partitions for single INSERT block (more than» + toString(max_parts) + «). The limit is controlled by ‘max_partitions_per_insert_block’ setting. Large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).» +> “Too many partitions for a single INSERT block (`partitions_count` partitions, limit is ” + toString(max_partitions) + “). The limit is controlled by the ‘max_partitions_per_insert_block’ setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).” + +## throw_on_max_partitions_per_insert_block {#settings-throw_on_max_partition_per_insert_block} + +Позволяет контролировать поведение при достижении `max_partitions_per_insert_block` + +- `true` - Когда вставляемый блок достигает `max_partitions_per_insert_block`, возникает исключение. +- `false` - Записывает предупреждение при достижении `max_partitions_per_insert_block`. + +Значение по умолчанию: `true` + +## max_sessions_for_user {#max-sessions-per-user} + +Максимальное количество одновременных сессий на одного аутентифицированного пользователя. + +Пример: + +``` xml + + + 1 + + + 2 + + + 0 + + + + + + single_session_profile + + + + two_sessions_profile + + + + unlimited_sessions_profile + + +``` + +Значение по умолчанию: 0 (неограниченное количество сессий). diff --git a/docs/ru/operations/settings/settings-profiles.md b/docs/ru/operations/settings/settings-profiles.md index ba2cb9a601f..0d094c637ac 100644 --- a/docs/ru/operations/settings/settings-profiles.md +++ b/docs/ru/operations/settings/settings-profiles.md @@ -39,7 +39,7 @@ SET profile = 'web' 8 - + 1000000000 100000000000 @@ -67,6 +67,7 @@ SET profile = 'web' 50 100 + 4 1 diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 957a917c780..d3db890ad7a 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -238,39 +238,6 @@ ClickHouse применяет настройку в тех случаях, ко В случае превышения `input_format_allow_errors_ratio` ClickHouse генерирует исключение. -## input_format_parquet_import_nested {#input_format_parquet_import_nested} - -Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [Parquet](../../interfaces/formats.md#data-format-parquet). - -Возможные значения: - -- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур. -- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур. - -Значение по умолчанию: `0`. - -## input_format_arrow_import_nested {#input_format_arrow_import_nested} - -Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [Arrow](../../interfaces/formats.md#data_types-matching-arrow). - -Возможные значения: - -- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур. -- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур. - -Значение по умолчанию: `0`. - -## input_format_orc_import_nested {#input_format_orc_import_nested} - -Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [ORC](../../interfaces/formats.md#data-format-orc). - -Возможные значения: - -- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур. -- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур. - -Значение по умолчанию: `0`. - ## input_format_values_interpret_expressions {#settings-input_format_values_interpret_expressions} Включает или отключает парсер SQL, если потоковый парсер не может проанализировать данные. Этот параметр используется только для формата [Values](../../interfaces/formats.md#data-format-values) при вставке данных. Дополнительные сведения о парсерах читайте в разделе [Синтаксис](../../sql-reference/syntax.md). @@ -4213,3 +4180,29 @@ SELECT *, timezone() FROM test_tz WHERE d = '2000-01-01 00:00:00' SETTINGS sessi - Запрос: `SELECT * FROM file('sample.csv')` Если чтение и обработка `sample.csv` прошли успешно, файл будет переименован в `processed_sample_1683473210851438.csv`. + +## precise_float_parsing {#precise_float_parsing} + +Позволяет выбрать алгоритм, используемый при парсинге [Float32/Float64](../../sql-reference/data-types/float.md): +* Если установлено значение `1`, то используется точный метод. Он более медленный, но всегда возвращает число, наиболее близкое к входному значению. +* В противном случае используется быстрый метод (поведение по умолчанию). Обычно результат его работы совпадает с результатом, полученным точным методом, однако в редких случаях он может отличаться на 1 или 2 наименее значимых цифры. + +Возможные значения: `0`, `1`. + +Значение по умолчанию: `0`. + +Пример: + +```sql +SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 0; + +┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐ +│ 1.7090999999999998 │ 15008753.000000002 │ +└─────────────────────┴──────────────────────────┘ + +SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 1; + +┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐ +│ 1.7091 │ 15008753 │ +└─────────────────────┴──────────────────────────┘ +``` diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md index 4fa6ac4ce66..747e36b8809 100644 --- a/docs/ru/sql-reference/statements/insert-into.md +++ b/docs/ru/sql-reference/statements/insert-into.md @@ -11,7 +11,7 @@ sidebar_label: INSERT INTO **Синтаксис** ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier). @@ -100,7 +100,7 @@ INSERT INTO t FORMAT TabSeparated **Синтаксис** ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] SELECT ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] SELECT ... ``` Соответствие столбцов определяется их позицией в секции SELECT. При этом, их имена в выражении SELECT и в таблице для INSERT, могут отличаться. При необходимости выполняется приведение типов данных, эквивалентное соответствующему оператору CAST. @@ -120,7 +120,7 @@ INSERT INTO [db.]table [(c1, c2, c3)] SELECT ... **Синтаксис** ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name ``` Используйте этот синтаксис, чтобы вставить данные из файла, который хранится на стороне **клиента**. `file_name` и `type` задаются в виде строковых литералов. [Формат](../../interfaces/formats.md) входного файла должен быть задан в секции `FORMAT`. diff --git a/docs/zh/engines/table-engines/special/buffer.md b/docs/zh/engines/table-engines/special/buffer.md index bb95ecdc583..f92a819f3c3 100644 --- a/docs/zh/engines/table-engines/special/buffer.md +++ b/docs/zh/engines/table-engines/special/buffer.md @@ -5,7 +5,7 @@ slug: /zh/engines/table-engines/special/buffer 缓冲数据写入 RAM 中,周期性地将数据刷新到另一个表。在读取操作时,同时从缓冲区和另一个表读取数据。 - Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes) + Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes [,flush_time [,flush_rows [,flush_bytes]]]) 引擎的参数:database,table - 要刷新数据的表。可以使用返回字符串的常量表达式而不是数据库名称。 num_layers - 并行层数。在物理上,该表将表示为 num_layers 个独立缓冲区。建议值为16。min_time,max_time,min_rows,max_rows,min_bytes,max_bytes - 从缓冲区刷新数据的条件。 diff --git a/docs/zh/getting-started/example-datasets/wikistat.md b/docs/zh/getting-started/example-datasets/wikistat.md deleted file mode 100644 index 4ce13b0f1d3..00000000000 --- a/docs/zh/getting-started/example-datasets/wikistat.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -slug: /zh/getting-started/example-datasets/wikistat -sidebar_position: 17 -sidebar_label: WikiStat ---- - -# WikiStat {#wikistat} - -参考: http://dumps.wikimedia.org/other/pagecounts-raw/ - -创建表结构: - -``` sql -CREATE TABLE wikistat -( - date Date, - time DateTime, - project String, - subproject String, - path String, - hits UInt64, - size UInt64 -) ENGINE = MergeTree(date, (path, time), 8192); -``` - -加载数据: - -``` bash -$ for i in {2007..2016}; do for j in {01..12}; do echo $i-$j >&2; curl -sSL "http://dumps.wikimedia.org/other/pagecounts-raw/$i/$i-$j/" | grep -oE 'pagecounts-[0-9]+-[0-9]+\.gz'; done; done | sort | uniq | tee links.txt -$ cat links.txt | while read link; do wget http://dumps.wikimedia.org/other/pagecounts-raw/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1/')/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1-\2/')/$link; done -$ ls -1 /opt/wikistat/ | grep gz | while read i; do echo $i; gzip -cd /opt/wikistat/$i | ./wikistat-loader --time="$(echo -n $i | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})([0-9]{2})-([0-9]{2})([0-9]{2})([0-9]{2})\.gz/\1-\2-\3 \4-00-00/')" | clickhouse-client --query="INSERT INTO wikistat FORMAT TabSeparated"; done -``` diff --git a/docs/zh/getting-started/example-datasets/wikistat.md b/docs/zh/getting-started/example-datasets/wikistat.md new file mode 120000 index 00000000000..2d429d00984 --- /dev/null +++ b/docs/zh/getting-started/example-datasets/wikistat.md @@ -0,0 +1 @@ +../../../en/getting-started/example-datasets/wikistat.md \ No newline at end of file diff --git a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md index eedc913cf82..3f42f3f8da4 100644 --- a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md +++ b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md @@ -346,9 +346,7 @@ UserID.bin,URL.bin,和EventTime.bin是UserID - 我们将主键列(UserID, URL)中的一些列值标记为橙色。 - 这些橙色标记的列值是每个颗粒中每个主键列的最小值。这里的例外是最后一个颗粒(上图中的颗粒1082),最后一个颗粒我们标记的是最大的值。 - - 正如我们将在下面看到的,这些橙色标记的列值将是表主索引中的条目。 + 这些橙色标记的列值是每个颗粒中第一行的主键列值。正如我们将在下面看到的,这些橙色标记的列值将是表主索引中的条目。 - 我们从0开始对行进行编号,以便与ClickHouse内部行编号方案对齐,该方案也用于记录消息。 ::: @@ -1071,13 +1069,6 @@ ClickHouse服务器日志文件中相应的跟踪日志确认了ClickHouse正在 ## 通过projections使用联合主键索引 -
Projections目前是一个实验性的功能,因此我们需要告诉ClickHouse: - -```sql -SET optimize_use_projections = 1; -``` - - 在原表上创建projection: ```sql ALTER TABLE hits_UserID_URL @@ -1096,10 +1087,12 @@ ALTER TABLE hits_UserID_URL :::note - 该projection正在创建一个隐藏表,该表的行顺序和主索引基于该projection的给定order BY子句 -- 我们使用MATERIALIZE关键字,以便立即用源表hits_UserID_URL的所有887万行导入隐藏表 +- `SHOW TABLES` 语句查询是不会列出这个隐藏表的 +- 我们使用`MATERIALIZE`关键字,以便立即用源表hits_UserID_URL的所有887万行导入隐藏表 - 如果在源表hits_UserID_URL中插入了新行,那么这些行也会自动插入到隐藏表中 - 查询总是(从语法上)针对源表hits_UserID_URL,但是如果隐藏表的行顺序和主索引允许更有效地执行查询,那么将使用该隐藏表 -- 实际上,隐式创建的隐藏表的行顺序和主索引与我们显式创建的辅助表相同: +- 请注意,投影(projections)不会使 `ORDER BY` 查询语句的效率更高,即使 `ORDER BY` 匹配上了 projection 的 `ORDER BY` 语句(请参阅:https://github.com/ClickHouse/ClickHouse/issues/47333) +- 实际上,隐式创建的隐藏表的行顺序和主索引与我们显式创建的辅助表相同: @@ -1163,7 +1156,7 @@ ClickHouse服务器日志文件中跟踪日志确认了ClickHouse正在对索引 ``` -## 移除无效的主键列 +## 小结 带有联合主键(UserID, URL)的表的主索引对于加快UserID的查询过滤非常有用。但是,尽管URL列是联合主键的一部分,但该索引在加速URL查询过滤方面并没有提供显著的帮助。 @@ -1176,4 +1169,12 @@ ClickHouse服务器日志文件中跟踪日志确认了ClickHouse正在对索引 但是,如果复合主键中的键列在基数上有很大的差异,那么查询按基数升序对主键列进行排序是有益的。 -主键键列之间的基数差越大,主键键列的顺序越重要。我们将在以后的文章中对此进行演示。请继续关注。 +主键键列之间的基数差得越大,主键中的列的顺序越重要。我们将在下一章节对此进行演示。 + +# 高效地为键列排序 + +TODO + +# 高效地识别单行 + +TODO diff --git a/docs/zh/operations/optimizing-performance/profile-guided-optimization.md b/docs/zh/operations/optimizing-performance/profile-guided-optimization.md new file mode 120000 index 00000000000..31cb656bd99 --- /dev/null +++ b/docs/zh/operations/optimizing-performance/profile-guided-optimization.md @@ -0,0 +1 @@ +../../../en/operations/optimizing-performance/profile-guided-optimization.md \ No newline at end of file diff --git a/docs/zh/operations/server-configuration-parameters/settings.md b/docs/zh/operations/server-configuration-parameters/settings.md index f6106d8734e..8e2cb389f04 100644 --- a/docs/zh/operations/server-configuration-parameters/settings.md +++ b/docs/zh/operations/server-configuration-parameters/settings.md @@ -455,7 +455,7 @@ SSL客户端/服务器配置。 - verificationMode – The method for checking the node’s certificates. Details are in the description of the [A.背景](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) 同学们 可能的值: `none`, `relaxed`, `strict`, `once`. - verificationDepth – The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value. - loadDefaultCAFile – Indicates that built-in CA certificates for OpenSSL will be used. Acceptable values: `true`, `false`. \| -- cipherList – Supported OpenSSL encryptions. For example: `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`. +- cipherList – Supported OpenSSL encryptions. For example: `ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH`. - cacheSessions – Enables or disables caching sessions. Must be used in combination with `sessionIdContext`. 可接受的值: `true`, `false`. - sessionIdContext – A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed `SSL_MAX_SSL_SESSION_ID_LENGTH`. 始终建议使用此参数,因为如果服务器缓存会话,以及客户端请求缓存,它有助于避免出现问题。 默认值: `${application.name}`. - sessionCacheSize – The maximum number of sessions that the server caches. Default value: 1024\*20. 0 – Unlimited sessions. diff --git a/docs/zh/sql-reference/statements/insert-into.md b/docs/zh/sql-reference/statements/insert-into.md index 9acc1655f9a..f80c0a8a8ea 100644 --- a/docs/zh/sql-reference/statements/insert-into.md +++ b/docs/zh/sql-reference/statements/insert-into.md @@ -8,7 +8,7 @@ INSERT INTO 语句主要用于向系统中添加数据. 查询的基本格式: ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` 您可以在查询中指定要插入的列的列表,如:`[(c1, c2, c3)]`。您还可以使用列[匹配器](../../sql-reference/statements/select/index.md#asterisk)的表达式,例如`*`和/或[修饰符](../../sql-reference/statements/select/index.md#select-modifiers),例如 [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier)。 @@ -71,7 +71,7 @@ INSERT INTO [db.]table [(c1, c2, c3)] FORMAT format_name data_set 例如,下面的查询所使用的输入格式就与上面INSERT … VALUES的中使用的输入格式相同: ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] FORMAT Values (v11, v12, v13), (v21, v22, v23), ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FORMAT Values (v11, v12, v13), (v21, v22, v23), ... ``` ClickHouse会清除数据前所有的空白字符与一个换行符(如果有换行符的话)。所以在进行查询时,我们建议您将数据放入到输入输出格式名称后的新的一行中去(如果数据是以空白字符开始的,这将非常重要)。 @@ -93,7 +93,7 @@ INSERT INTO t FORMAT TabSeparated ### 使用`SELECT`的结果写入 {#inserting-the-results-of-select} ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] SELECT ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] SELECT ... ``` 写入与SELECT的列的对应关系是使用位置来进行对应的,尽管它们在SELECT表达式与INSERT中的名称可能是不同的。如果需要,会对它们执行对应的类型转换。 diff --git a/packages/clickhouse-keeper.yaml b/packages/clickhouse-keeper.yaml index e9c2e929755..3b77d968763 100644 --- a/packages/clickhouse-keeper.yaml +++ b/packages/clickhouse-keeper.yaml @@ -32,6 +32,12 @@ contents: dst: /usr/bin/clickhouse-keeper - src: clickhouse-keeper.service dst: /lib/systemd/system/clickhouse-keeper.service +- src: clickhouse + dst: /usr/bin/clickhouse-keeper-client + type: symlink +- src: clickhouse + dst: /usr/bin/clickhouse-keeper-converter + type: symlink # docs - src: ../AUTHORS dst: /usr/share/doc/clickhouse-keeper/AUTHORS diff --git a/packages/clickhouse-server.yaml b/packages/clickhouse-server.yaml index 018e88ef828..66299fddd4a 100644 --- a/packages/clickhouse-server.yaml +++ b/packages/clickhouse-server.yaml @@ -55,6 +55,9 @@ contents: - src: clickhouse dst: /usr/bin/clickhouse-keeper type: symlink +- src: clickhouse + dst: /usr/bin/clickhouse-keeper-client + type: symlink - src: root/usr/bin/clickhouse-report dst: /usr/bin/clickhouse-report - src: root/usr/bin/clickhouse-server diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index 466a0c194f7..c5acd10f791 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -1,8 +1,6 @@ #include #include -#include #include -#include #include #include #include @@ -18,9 +16,7 @@ #include #include #include -#include #include -#include #include #include #include @@ -38,8 +34,6 @@ #include -namespace fs = std::filesystem; - /** A tool for evaluating ClickHouse performance. * The tool emulates a case with fixed amount of simultaneously executing queries. */ @@ -79,7 +73,6 @@ public: bool randomize_, size_t max_iterations_, double max_time_, - const String & json_path_, size_t confidence_, const String & query_id_, const String & query_to_execute_, @@ -98,7 +91,6 @@ public: cumulative(cumulative_), max_iterations(max_iterations_), max_time(max_time_), - json_path(json_path_), confidence(confidence_), query_id(query_id_), query_to_execute(query_to_execute_), @@ -165,9 +157,6 @@ public: int main(const std::vector &) override { - if (!json_path.empty() && fs::exists(json_path)) /// Clear file with previous results - fs::remove(json_path); - readQueries(); runBenchmark(); return 0; @@ -197,7 +186,6 @@ private: bool cumulative; size_t max_iterations; double max_time; - String json_path; size_t confidence; String query_id; String query_to_execute; @@ -226,26 +214,23 @@ private: size_t read_bytes = 0; size_t result_rows = 0; size_t result_bytes = 0; - double work_time = 0; using Sampler = ReservoirSampler; Sampler sampler {1 << 16}; - void add(double seconds, size_t read_rows_inc, size_t read_bytes_inc, size_t result_rows_inc, size_t result_bytes_inc) + void add(double duration, size_t read_rows_inc, size_t read_bytes_inc, size_t result_rows_inc, size_t result_bytes_inc) { ++queries; - work_time += seconds; read_rows += read_rows_inc; read_bytes += read_bytes_inc; result_rows += result_rows_inc; result_bytes += result_bytes_inc; - sampler.insert(seconds); + sampler.insert(duration); } void clear() { queries = 0; - work_time = 0; read_rows = 0; read_bytes = 0; result_rows = 0; @@ -331,10 +316,13 @@ private: return false; } - if (delay > 0 && delay_watch.elapsedSeconds() > delay) + double seconds = delay_watch.elapsedSeconds(); + if (delay > 0 && seconds > delay) { printNumberOfQueriesExecuted(queries_executed); - cumulative ? report(comparison_info_total) : report(comparison_info_per_interval); + cumulative + ? report(comparison_info_total, total_watch.elapsedSeconds()) + : report(comparison_info_per_interval, seconds); delay_watch.restart(); } } @@ -350,16 +338,7 @@ private: try { for (size_t i = 0; i < concurrency; ++i) - { - EntryPtrs connection_entries; - connection_entries.reserve(connections.size()); - - for (const auto & connection : connections) - connection_entries.emplace_back(std::make_shared( - connection->get(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(settings)))); - - pool.scheduleOrThrowOnError([this, connection_entries]() mutable { thread(connection_entries); }); - } + pool.scheduleOrThrowOnError([this]() mutable { thread(); }); } catch (...) { @@ -389,21 +368,18 @@ private: pool.wait(); total_watch.stop(); - if (!json_path.empty()) - reportJSON(comparison_info_total, json_path); - printNumberOfQueriesExecuted(queries_executed); - report(comparison_info_total); + report(comparison_info_total, total_watch.elapsedSeconds()); } - void thread(EntryPtrs & connection_entries) + void thread() { Query query; /// Randomly choosing connection index pcg64 generator(randomSeed()); - std::uniform_int_distribution distribution(0, connection_entries.size() - 1); + std::uniform_int_distribution distribution(0, connections.size() - 1); /// In these threads we do not accept INT signal. sigset_t sig_set; @@ -423,15 +399,13 @@ private: extracted = queue.tryPop(query, 100); if (shutdown || (max_iterations && queries_executed == max_iterations)) - { return; - } } const auto connection_index = distribution(generator); try { - execute(connection_entries, query, connection_index); + execute(query, connection_index); consecutive_errors = 0; } catch (...) @@ -460,17 +434,18 @@ private: } } - void execute(EntryPtrs & connection_entries, Query & query, size_t connection_index) + void execute(Query & query, size_t connection_index) { Stopwatch watch; - Connection & connection = **connection_entries[connection_index]; + ConnectionPool::Entry entry = connections[connection_index]->get( + ConnectionTimeouts::getTCPTimeoutsWithoutFailover(settings)); if (reconnect) - connection.disconnect(); + entry->disconnect(); RemoteQueryExecutor executor( - connection, query, {}, global_context, nullptr, Scalars(), Tables(), query_processing_stage); + *entry, query, {}, global_context, nullptr, Scalars(), Tables(), query_processing_stage); if (!query_id.empty()) executor.setQueryId(query_id); @@ -485,19 +460,19 @@ private: executor.finish(); - double seconds = (display_client_side_time || progress.elapsed_ns == 0) + double duration = (display_client_side_time || progress.elapsed_ns == 0) ? watch.elapsedSeconds() : progress.elapsed_ns / 1e9; std::lock_guard lock(mutex); size_t info_index = round_robin ? 0 : connection_index; - comparison_info_per_interval[info_index]->add(seconds, progress.read_rows, progress.read_bytes, info.rows, info.bytes); - comparison_info_total[info_index]->add(seconds, progress.read_rows, progress.read_bytes, info.rows, info.bytes); - t_test.add(info_index, seconds); + comparison_info_per_interval[info_index]->add(duration, progress.read_rows, progress.read_bytes, info.rows, info.bytes); + comparison_info_total[info_index]->add(duration, progress.read_rows, progress.read_bytes, info.rows, info.bytes); + t_test.add(info_index, duration); } - void report(MultiStats & infos) + void report(MultiStats & infos, double seconds) { std::lock_guard lock(mutex); @@ -510,8 +485,6 @@ private: if (0 == info->queries) return; - double seconds = info->work_time / concurrency; - std::string connection_description = connections[i]->getDescription(); if (round_robin) { @@ -525,10 +498,10 @@ private: } std::cerr << connection_description << ", " - << "queries " << info->queries << ", "; + << "queries: " << info->queries << ", "; if (info->errors) { - std::cerr << "errors " << info->errors << ", "; + std::cerr << "errors: " << info->errors << ", "; } std::cerr << "QPS: " << (info->queries / seconds) << ", " @@ -567,62 +540,6 @@ private: } } - void reportJSON(MultiStats & infos, const std::string & filename) - { - WriteBufferFromFile json_out(filename); - - std::lock_guard lock(mutex); - - auto print_key_value = [&](auto key, auto value, bool with_comma = true) - { - json_out << double_quote << key << ": " << value << (with_comma ? ",\n" : "\n"); - }; - - auto print_percentile = [&json_out](Stats & info, auto percent, bool with_comma = true) - { - json_out << "\"" << percent << "\": " << info.sampler.quantileNearest(percent / 100.0) << (with_comma ? ",\n" : "\n"); - }; - - json_out << "{\n"; - - for (size_t i = 0; i < infos.size(); ++i) - { - const auto & info = infos[i]; - - json_out << double_quote << connections[i]->getDescription() << ": {\n"; - json_out << double_quote << "statistics" << ": {\n"; - - double seconds = info->work_time / concurrency; - - print_key_value("QPS", info->queries.load() / seconds); - print_key_value("RPS", info->read_rows / seconds); - print_key_value("MiBPS", info->read_bytes / seconds / 1048576); - print_key_value("RPS_result", info->result_rows / seconds); - print_key_value("MiBPS_result", info->result_bytes / seconds / 1048576); - print_key_value("num_queries", info->queries.load()); - print_key_value("num_errors", info->errors, false); - - json_out << "},\n"; - json_out << double_quote << "query_time_percentiles" << ": {\n"; - - if (info->queries != 0) - { - for (int percent = 0; percent <= 90; percent += 10) - print_percentile(*info, percent); - - print_percentile(*info, 95); - print_percentile(*info, 99); - print_percentile(*info, 99.9); - print_percentile(*info, 99.99, false); - } - - json_out << "}\n"; - json_out << (i == infos.size() - 1 ? "}\n" : "},\n"); - } - - json_out << "}\n"; - } - public: ~Benchmark() override @@ -675,7 +592,6 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) ("iterations,i", value()->default_value(0), "amount of queries to be executed") ("timelimit,t", value()->default_value(0.), "stop launch of queries after specified time limit") ("randomize,r", "randomize order of execution") - ("json", value()->default_value(""), "write final report to specified file in JSON format") ("host,h", value()->multitoken(), "list of hosts") ("port", value()->multitoken(), "list of ports") ("roundrobin", "Instead of comparing queries for different --host/--port just pick one random --host/--port for every query and send query to it.") @@ -739,7 +655,6 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) options.count("randomize"), options["iterations"].as(), options["timelimit"].as(), - options["json"].as(), options["confidence"].as(), options["query_id"].as(), options["query"].as(), diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index efe7121cace..556eca808f6 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -608,6 +608,8 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t ss << "ALTER TABLE " << getQuotedTable(original_table) << ((partition_name == "'all'") ? " DROP PARTITION ID " : " DROP PARTITION ") << partition_name; UInt64 num_shards_drop_partition = executeQueryOnCluster(task_table.cluster_push, ss.str(), task_cluster->settings_push, ClusterExecutionMode::ON_EACH_SHARD); + if (num_shards_drop_partition != task_table.cluster_push->getShardCount()) + return TaskStatus::Error; LOG_INFO(log, "Drop partition {} in original table {} have been executed successfully on {} shards of {}", partition_name, getQuotedTable(original_table), num_shards_drop_partition, task_table.cluster_push->getShardCount()); diff --git a/programs/disks/ICommand.cpp b/programs/disks/ICommand.cpp index 52d1a2196a9..86188fb6db1 100644 --- a/programs/disks/ICommand.cpp +++ b/programs/disks/ICommand.cpp @@ -1,4 +1,6 @@ #include "ICommand.h" +#include + namespace DB { diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index 43c66a32302..d7d61bbcd3b 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -163,13 +163,15 @@ int mainEntryClickHouseFormat(int argc, char ** argv) { ASTPtr res = parseQueryAndMovePosition( parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth); - /// For insert query with data(INSERT INTO ... VALUES ...), will lead to format fail, - /// should throw exception early and make exception message more readable. + + /// For insert query with data(INSERT INTO ... VALUES ...), that will lead to the formatting failure, + /// we should throw an exception early, and make exception message more readable. if (const auto * insert_query = res->as(); insert_query && insert_query->data) { throw Exception(DB::ErrorCodes::INVALID_FORMAT_INSERT_QUERY_WITH_DATA, "Can't format ASTInsertQuery with data, since data will be lost"); } + if (!quiet) { if (!backslash) diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index d7086c95beb..e10a9fea86b 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -997,7 +997,9 @@ namespace { /// sudo respects limits in /etc/security/limits.conf e.g. open files, /// that's why we are using it instead of the 'clickhouse su' tool. - command = fmt::format("sudo -u '{}' {}", user, command); + /// by default, sudo resets all the ENV variables, but we should preserve + /// the values /etc/default/clickhouse in /etc/init.d/clickhouse file + command = fmt::format("sudo --preserve-env -u '{}' {}", user, command); } fmt::print("Will run {}\n", command); diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp index 05928a0d20b..c822a631798 100644 --- a/programs/keeper-client/Commands.cpp +++ b/programs/keeper-client/Commands.cpp @@ -1,5 +1,6 @@ #include "Commands.h" +#include #include "KeeperClient.h" @@ -24,8 +25,18 @@ void LSCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con else path = client->cwd; - for (const auto & child : client->zookeeper->getChildren(path)) - std::cout << child << " "; + auto children = client->zookeeper->getChildren(path); + std::sort(children.begin(), children.end()); + + bool need_space = false; + for (const auto & child : children) + { + if (std::exchange(need_space, true)) + std::cout << " "; + + std::cout << child; + } + std::cout << "\n"; } @@ -77,7 +88,7 @@ void SetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co client->zookeeper->set( client->getAbsolutePath(query->args[0].safeGet()), query->args[1].safeGet(), - static_cast(query->args[2].safeGet())); + static_cast(query->args[2].get())); } bool CreateCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const @@ -115,6 +126,21 @@ void CreateCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) static_cast(query->args[2].safeGet())); } +bool TouchCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const +{ + String arg; + if (!parseKeeperPath(pos, expected, arg)) + return false; + node->args.push_back(std::move(arg)); + + return true; +} + +void TouchCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const +{ + client->zookeeper->createIfNotExists(client->getAbsolutePath(query->args[0].safeGet()), ""); +} + bool GetCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const { String arg; @@ -130,6 +156,173 @@ void GetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co std::cout << client->zookeeper->get(client->getAbsolutePath(query->args[0].safeGet())) << "\n"; } +bool GetStatCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const +{ + String arg; + if (!parseKeeperPath(pos, expected, arg)) + return true; + + node->args.push_back(std::move(arg)); + return true; +} + +void GetStatCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const +{ + Coordination::Stat stat; + String path; + if (!query->args.empty()) + path = client->getAbsolutePath(query->args[0].safeGet()); + else + path = client->cwd; + + client->zookeeper->get(path, &stat); + + std::cout << "cZxid = " << stat.czxid << "\n"; + std::cout << "mZxid = " << stat.mzxid << "\n"; + std::cout << "pZxid = " << stat.pzxid << "\n"; + std::cout << "ctime = " << stat.ctime << "\n"; + std::cout << "mtime = " << stat.mtime << "\n"; + std::cout << "version = " << stat.version << "\n"; + std::cout << "cversion = " << stat.cversion << "\n"; + std::cout << "aversion = " << stat.aversion << "\n"; + std::cout << "ephemeralOwner = " << stat.ephemeralOwner << "\n"; + std::cout << "dataLength = " << stat.dataLength << "\n"; + std::cout << "numChildren = " << stat.numChildren << "\n"; +} + +bool FindSuperNodes::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const +{ + ASTPtr threshold; + if (!ParserUnsignedInteger{}.parse(pos, threshold, expected)) + return false; + + node->args.push_back(threshold->as().value); + + String path; + if (!parseKeeperPath(pos, expected, path)) + path = "."; + + node->args.push_back(std::move(path)); + return true; +} + +void FindSuperNodes::execute(const ASTKeeperQuery * query, KeeperClient * client) const +{ + auto threshold = query->args[0].safeGet(); + auto path = client->getAbsolutePath(query->args[1].safeGet()); + + Coordination::Stat stat; + client->zookeeper->get(path, &stat); + + if (stat.numChildren >= static_cast(threshold)) + { + std::cout << static_cast(path) << "\t" << stat.numChildren << "\n"; + return; + } + + auto children = client->zookeeper->getChildren(path); + std::sort(children.begin(), children.end()); + for (const auto & child : children) + { + auto next_query = *query; + next_query.args[1] = DB::Field(path / child); + execute(&next_query, client); + } +} + +bool DeleteStaleBackups::parse(IParser::Pos & /* pos */, std::shared_ptr & /* node */, Expected & /* expected */) const +{ + return true; +} + +void DeleteStaleBackups::execute(const ASTKeeperQuery * /* query */, KeeperClient * client) const +{ + client->askConfirmation( + "You are going to delete all inactive backups in /clickhouse/backups.", + [client] + { + fs::path backup_root = "/clickhouse/backups"; + auto backups = client->zookeeper->getChildren(backup_root); + std::sort(backups.begin(), backups.end()); + + for (const auto & child : backups) + { + auto backup_path = backup_root / child; + std::cout << "Found backup " << backup_path << ", checking if it's active\n"; + + String stage_path = backup_path / "stage"; + auto stages = client->zookeeper->getChildren(stage_path); + + bool is_active = false; + for (const auto & stage : stages) + { + if (startsWith(stage, "alive")) + { + is_active = true; + break; + } + } + + if (is_active) + { + std::cout << "Backup " << backup_path << " is active, not going to delete\n"; + continue; + } + + std::cout << "Backup " << backup_path << " is not active, deleting it\n"; + client->zookeeper->removeRecursive(backup_path); + } + }); +} + +bool FindBigFamily::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const +{ + String path; + if (!parseKeeperPath(pos, expected, path)) + path = "."; + + node->args.push_back(std::move(path)); + + ASTPtr count; + if (ParserUnsignedInteger{}.parse(pos, count, expected)) + node->args.push_back(count->as().value); + else + node->args.push_back(UInt64(10)); + + return true; +} + +void FindBigFamily::execute(const ASTKeeperQuery * query, KeeperClient * client) const +{ + auto path = client->getAbsolutePath(query->args[0].safeGet()); + auto n = query->args[1].safeGet(); + + std::vector> result; + + std::queue queue; + queue.push(path); + while (!queue.empty()) + { + auto next_path = queue.front(); + queue.pop(); + + auto children = client->zookeeper->getChildren(next_path); + std::transform(children.cbegin(), children.cend(), children.begin(), [&](const String & child) { return next_path / child; }); + + auto response = client->zookeeper->get(children); + + for (size_t i = 0; i < response.size(); ++i) + { + result.emplace_back(response[i].stat.numChildren, children[i]); + queue.push(children[i]); + } + } + + std::sort(result.begin(), result.end(), std::greater()); + for (UInt64 i = 0; i < std::min(result.size(), static_cast(n)); ++i) + std::cout << std::get<1>(result[i]) << "\t" << std::get<0>(result[i]) << "\n"; +} + bool RMCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const { String arg; @@ -170,7 +363,7 @@ bool HelpCommand::parse(IParser::Pos & /* pos */, std::shared_ptrgetHelpMessage() << "\n"; + std::cout << pair.second->generateHelpString() << "\n"; } bool FourLetterWordCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const diff --git a/programs/keeper-client/Commands.h b/programs/keeper-client/Commands.h index e4debd53e42..7d64fef1738 100644 --- a/programs/keeper-client/Commands.h +++ b/programs/keeper-client/Commands.h @@ -21,6 +21,12 @@ public: virtual String getName() const = 0; virtual ~IKeeperClientCommand() = default; + + String generateHelpString() const + { + return fmt::vformat(getHelpMessage(), fmt::make_format_args(getName())); + } + }; using Command = std::shared_ptr; @@ -34,7 +40,7 @@ class LSCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "ls [path] -- Lists the nodes for the given path (default: cwd)"; } + String getHelpMessage() const override { return "{} [path] -- Lists the nodes for the given path (default: cwd)"; } }; class CDCommand : public IKeeperClientCommand @@ -45,7 +51,7 @@ class CDCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "cd [path] -- Change the working path (default `.`)"; } + String getHelpMessage() const override { return "{} [path] -- Change the working path (default `.`)"; } }; class SetCommand : public IKeeperClientCommand @@ -58,7 +64,7 @@ class SetCommand : public IKeeperClientCommand String getHelpMessage() const override { - return "set [version] -- Updates the node's value. Only update if version matches (default: -1)"; + return "{} [version] -- Updates the node's value. Only update if version matches (default: -1)"; } }; @@ -70,7 +76,18 @@ class CreateCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "create -- Creates new node"; } + String getHelpMessage() const override { return "{} [mode] -- Creates new node with the set value"; } +}; + +class TouchCommand : public IKeeperClientCommand +{ + String getName() const override { return "touch"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override { return "{} -- Creates new node with an empty string as value. Doesn't throw an exception if the node already exists"; } }; class GetCommand : public IKeeperClientCommand @@ -81,9 +98,63 @@ class GetCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "get -- Returns the node's value"; } + String getHelpMessage() const override { return "{} -- Returns the node's value"; } }; +class GetStatCommand : public IKeeperClientCommand +{ + String getName() const override { return "get_stat"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override { return "{} [path] -- Returns the node's stat (default `.`)"; } +}; + +class FindSuperNodes : public IKeeperClientCommand +{ + String getName() const override { return "find_super_nodes"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override + { + return "{} [path] -- Finds nodes with number of children larger than some threshold for the given path (default `.`)"; + } +}; + +class DeleteStaleBackups : public IKeeperClientCommand +{ + String getName() const override { return "delete_stale_backups"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override + { + return "{} -- Deletes ClickHouse nodes used for backups that are now inactive"; + } +}; + +class FindBigFamily : public IKeeperClientCommand +{ + String getName() const override { return "find_big_family"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override + { + return "{} [path] [n] -- Returns the top n nodes with the biggest family in the subtree (default path = `.` and n = 10)"; + } +}; + + class RMCommand : public IKeeperClientCommand { String getName() const override { return "rm"; } @@ -92,7 +163,7 @@ class RMCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "remove -- Remove the node"; } + String getHelpMessage() const override { return "{} -- Remove the node"; } }; class RMRCommand : public IKeeperClientCommand @@ -103,7 +174,7 @@ class RMRCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "rmr -- Recursively deletes path. Confirmation required"; } + String getHelpMessage() const override { return "{} -- Recursively deletes path. Confirmation required"; } }; class HelpCommand : public IKeeperClientCommand @@ -114,7 +185,7 @@ class HelpCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "help -- Prints this message"; } + String getHelpMessage() const override { return "{} -- Prints this message"; } }; class FourLetterWordCommand : public IKeeperClientCommand @@ -125,7 +196,7 @@ class FourLetterWordCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "flwc -- Executes four-letter-word command"; } + String getHelpMessage() const override { return "{} -- Executes four-letter-word command"; } }; } diff --git a/programs/keeper-client/KeeperClient.cpp b/programs/keeper-client/KeeperClient.cpp index f41dca1e27a..893be4d25d1 100644 --- a/programs/keeper-client/KeeperClient.cpp +++ b/programs/keeper-client/KeeperClient.cpp @@ -131,7 +131,7 @@ void KeeperClient::defineOptions(Poco::Util::OptionSet & options) .binding("host")); options.addOption( - Poco::Util::Option("port", "p", "server port. default `2181`") + Poco::Util::Option("port", "p", "server port. default `9181`") .argument("") .binding("port")); @@ -176,7 +176,12 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */) std::make_shared(), std::make_shared(), std::make_shared(), + std::make_shared(), std::make_shared(), + std::make_shared(), + std::make_shared(), + std::make_shared(), + std::make_shared(), std::make_shared(), std::make_shared(), std::make_shared(), @@ -266,8 +271,16 @@ void KeeperClient::runInteractive() LineReader::Patterns query_extenders = {"\\"}; LineReader::Patterns query_delimiters = {}; + char word_break_characters[] = " \t\v\f\a\b\r\n/"; - ReplxxLineReader lr(suggest, history_file, false, query_extenders, query_delimiters, {}); + ReplxxLineReader lr( + suggest, + history_file, + /* multiline= */ false, + query_extenders, + query_delimiters, + word_break_characters, + /* highlighter_= */ {}); lr.enableBracketedPaste(); while (true) @@ -299,7 +312,7 @@ int KeeperClient::main(const std::vector & /* args */) } auto host = config().getString("host", "localhost"); - auto port = config().getString("port", "2181"); + auto port = config().getString("port", "9181"); zk_args.hosts = {host + ":" + port}; zk_args.connection_timeout_ms = config().getInt("connection-timeout", 10) * 1000; zk_args.session_timeout_ms = config().getInt("session-timeout", 10) * 1000; diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 43a8d84b513..a43a312ba54 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -57,6 +57,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/IO/ReadBuffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPPathHints.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperTCPHandler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/TCPServer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/NotFoundHandler.cpp diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index a38467c3369..22f0b2c2ac6 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -110,19 +110,18 @@ void Keeper::createServer(const std::string & listen_host, const char * port_nam } catch (const Poco::Exception &) { - std::string message = "Listen [" + listen_host + "]:" + std::to_string(port) + " failed: " + getCurrentExceptionMessage(false); - if (listen_try) { - LOG_WARNING(&logger(), "{}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, then consider to " + LOG_WARNING(&logger(), "Listen [{}]:{} failed: {}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, " + "then consider to " "specify not disabled IPv4 or IPv6 address to listen in element of configuration " "file. Example for disabled IPv6: 0.0.0.0 ." " Example for disabled IPv4: ::", - message); + listen_host, port, getCurrentExceptionMessage(false)); } else { - throw Exception::createDeprecated(message, ErrorCodes::NETWORK_ERROR); + throw Exception(ErrorCodes::NETWORK_ERROR, "Listen [{}]:{} failed: {}", listen_host, port, getCurrentExceptionMessage(false)); } } } @@ -150,7 +149,7 @@ int Keeper::run() } if (config().hasOption("version")) { - std::cout << DBMS_NAME << " keeper version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; + std::cout << VERSION_NAME << " keeper version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; return 0; } @@ -288,13 +287,27 @@ try std::string path; if (config().has("keeper_server.storage_path")) + { path = config().getString("keeper_server.storage_path"); + } else if (config().has("keeper_server.log_storage_path")) + { path = std::filesystem::path(config().getString("keeper_server.log_storage_path")).parent_path(); + } else if (config().has("keeper_server.snapshot_storage_path")) + { path = std::filesystem::path(config().getString("keeper_server.snapshot_storage_path")).parent_path(); + } + else if (std::filesystem::is_directory(std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination")) + { + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, + "By default 'keeper.storage_path' could be assigned to {}, but the directory {} already exists. Please specify 'keeper.storage_path' in the keeper configuration explicitly", + KEEPER_DEFAULT_PATH, String{std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination"}); + } else - path = std::filesystem::path{KEEPER_DEFAULT_PATH}; + { + path = KEEPER_DEFAULT_PATH; + } std::filesystem::create_directories(path); @@ -330,6 +343,7 @@ try auto global_context = Context::createGlobal(shared_context.get()); global_context->makeGlobalContext(); + global_context->setApplicationType(Context::ApplicationType::KEEPER); global_context->setPath(path); global_context->setRemoteHostFilter(config()); @@ -365,7 +379,7 @@ try } /// Initialize keeper RAFT. Do nothing if no keeper_server in config. - global_context->initializeKeeperDispatcher(/* start_async = */ true); + global_context->initializeKeeperDispatcher(/* start_async = */ false); FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher()); auto config_getter = [&] () -> const Poco::Util::AbstractConfiguration & diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 587c88a2745..2ba4d245f21 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -2,6 +2,8 @@ #include #include +#include +#include #include #include #include @@ -655,43 +657,61 @@ void LocalServer::processConfig() /// There is no need for concurrent queries, override max_concurrent_queries. global_context->getProcessList().setMaxSize(0); - /// Size of cache for uncompressed blocks. Zero means disabled. - String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", ""); - size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", 0); - if (uncompressed_cache_size) - global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size); + const size_t memory_amount = getMemoryAmount(); + const double cache_size_to_ram_max_ratio = config().getDouble("cache_size_to_ram_max_ratio", 0.5); + const size_t max_cache_size = static_cast(memory_amount * cache_size_to_ram_max_ratio); - /// Size of cache for marks (index of MergeTree family of tables). - String mark_cache_policy = config().getString("mark_cache_policy", ""); - size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120); - if (mark_cache_size) - global_context->setMarkCache(mark_cache_policy, mark_cache_size); + String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", DEFAULT_UNCOMPRESSED_CACHE_POLICY); + size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE); + if (uncompressed_cache_size > max_cache_size) + { + uncompressed_cache_size = max_cache_size; + LOG_INFO(log, "Lowered uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } + global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size); - /// Size of cache for uncompressed blocks of MergeTree indices. Zero means disabled. - size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", 0); - if (index_uncompressed_cache_size) - global_context->setIndexUncompressedCache(index_uncompressed_cache_size); + String mark_cache_policy = config().getString("mark_cache_policy", DEFAULT_MARK_CACHE_POLICY); + size_t mark_cache_size = config().getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE); + if (!mark_cache_size) + LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation."); + if (mark_cache_size > max_cache_size) + { + mark_cache_size = max_cache_size; + LOG_INFO(log, "Lowered mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size)); + } + global_context->setMarkCache(mark_cache_policy, mark_cache_size); - /// Size of cache for index marks (index of MergeTree skip indices). - size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", 0); - if (index_mark_cache_size) - global_context->setIndexMarkCache(index_mark_cache_size); + size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE); + if (index_uncompressed_cache_size > max_cache_size) + { + index_uncompressed_cache_size = max_cache_size; + LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } + global_context->setIndexUncompressedCache(index_uncompressed_cache_size); - /// A cache for mmapped files. - size_t mmap_cache_size = config().getUInt64("mmap_cache_size", 1000); /// The choice of default is arbitrary. - if (mmap_cache_size) - global_context->setMMappedFileCache(mmap_cache_size); + size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", DEFAULT_INDEX_MARK_CACHE_MAX_SIZE); + if (index_mark_cache_size > max_cache_size) + { + index_mark_cache_size = max_cache_size; + LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } + global_context->setIndexMarkCache(index_mark_cache_size); + + size_t mmap_cache_size = config().getUInt64("mmap_cache_size", DEFAULT_MMAP_CACHE_MAX_SIZE); + if (mmap_cache_size > max_cache_size) + { + mmap_cache_size = max_cache_size; + LOG_INFO(log, "Lowered mmap file cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } + global_context->setMMappedFileCache(mmap_cache_size); + + /// Initialize a dummy query cache. + global_context->setQueryCache(0, 0, 0, 0); #if USE_EMBEDDED_COMPILER - /// 128 MB - constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 128; - size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", compiled_expression_cache_size_default); - - constexpr size_t compiled_expression_cache_elements_size_default = 10000; - size_t compiled_expression_cache_elements_size - = config().getUInt64("compiled_expression_cache_elements_size", compiled_expression_cache_elements_size_default); - - CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size, compiled_expression_cache_elements_size); + size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE); + size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES); + CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements); #endif /// NOTE: it is important to apply any overrides before diff --git a/programs/main.cpp b/programs/main.cpp index 9a3ad47a86e..4af9e3a3067 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -466,6 +466,11 @@ int main(int argc_, char ** argv_) checkHarmfulEnvironmentVariables(argv_); #endif + /// This is used for testing. For example, + /// clickhouse-local should be able to run a simple query without throw/catch. + if (getenv("CLICKHOUSE_TERMINATE_ON_ANY_EXCEPTION")) // NOLINT(concurrency-mt-unsafe) + DB::terminate_on_any_exception = true; + /// Reset new handler to default (that throws std::bad_alloc) /// It is needed because LLVM library clobbers it. std::set_new_handler(nullptr); diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index 3042ae2bb57..21471bce8a9 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -365,17 +365,14 @@ static void transformFixedString(const UInt8 * src, UInt8 * dst, size_t size, UI hash.update(seed); hash.update(i); + const auto checksum = getSipHash128AsArray(hash); if (size >= 16) { - char * hash_dst = reinterpret_cast(std::min(pos, end - 16)); - hash.get128(hash_dst); + auto * hash_dst = std::min(pos, end - 16); + memcpy(hash_dst, checksum.data(), checksum.size()); } else - { - char value[16]; - hash.get128(value); - memcpy(dst, value, end - dst); - } + memcpy(dst, checksum.data(), end - dst); pos += 16; ++i; @@ -393,7 +390,10 @@ static void transformFixedString(const UInt8 * src, UInt8 * dst, size_t size, UI static void transformUUID(const UUID & src_uuid, UUID & dst_uuid, UInt64 seed) { - const UInt128 & src = src_uuid.toUnderType(); + auto src_copy = src_uuid; + transformEndianness(src_copy); + + const UInt128 & src = src_copy.toUnderType(); UInt128 & dst = dst_uuid.toUnderType(); SipHash hash; @@ -401,10 +401,11 @@ static void transformUUID(const UUID & src_uuid, UUID & dst_uuid, UInt64 seed) hash.update(reinterpret_cast(&src), sizeof(UUID)); /// Saving version and variant from an old UUID - hash.get128(reinterpret_cast(&dst)); + dst = hash.get128(); - dst.items[1] = (dst.items[1] & 0x1fffffffffffffffull) | (src.items[1] & 0xe000000000000000ull); - dst.items[0] = (dst.items[0] & 0xffffffffffff0fffull) | (src.items[0] & 0x000000000000f000ull); + const UInt64 trace[2] = {0x000000000000f000ull, 0xe000000000000000ull}; + UUIDHelpers::getLowBytes(dst_uuid) = (UUIDHelpers::getLowBytes(dst_uuid) & (0xffffffffffffffffull - trace[1])) | (UUIDHelpers::getLowBytes(src_uuid) & trace[1]); + UUIDHelpers::getHighBytes(dst_uuid) = (UUIDHelpers::getHighBytes(dst_uuid) & (0xffffffffffffffffull - trace[0])) | (UUIDHelpers::getHighBytes(src_uuid) & trace[0]); } class FixedStringModel : public IModel diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 796ab583fe4..d87b308c340 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -325,19 +326,18 @@ void Server::createServer( } catch (const Poco::Exception &) { - std::string message = "Listen [" + listen_host + "]:" + std::to_string(port) + " failed: " + getCurrentExceptionMessage(false); - if (listen_try) { - LOG_WARNING(&logger(), "{}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, then consider to " + LOG_WARNING(&logger(), "Listen [{}]:{} failed: {}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, " + "then consider to " "specify not disabled IPv4 or IPv6 address to listen in element of configuration " "file. Example for disabled IPv6: 0.0.0.0 ." " Example for disabled IPv4: ::", - message); + listen_host, port, getCurrentExceptionMessage(false)); } else { - throw Exception::createDeprecated(message, ErrorCodes::NETWORK_ERROR); + throw Exception(ErrorCodes::NETWORK_ERROR, "Listen [{}]:{} failed: {}", listen_host, port, getCurrentExceptionMessage(false)); } } } @@ -389,7 +389,7 @@ int Server::run() } if (config().hasOption("version")) { - std::cout << DBMS_NAME << " server version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; + std::cout << VERSION_NAME << " server version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; return 0; } return Application::run(); // NOLINT @@ -658,10 +658,10 @@ try global_context->addWarningMessage("Server was built with sanitizer. It will work slowly."); #endif - const auto memory_amount = getMemoryAmount(); + const size_t physical_server_memory = getMemoryAmount(); LOG_INFO(log, "Available RAM: {}; physical cores: {}; logical cores: {}.", - formatReadableSizeWithBinarySuffix(memory_amount), + formatReadableSizeWithBinarySuffix(physical_server_memory), getNumberOfPhysicalCPUCores(), // on ARM processors it can show only enabled at current moment cores std::thread::hardware_concurrency()); @@ -1105,6 +1105,69 @@ try if (config().has("macros")) global_context->setMacros(std::make_unique(config(), "macros", log)); + /// Set up caches. + + const size_t max_cache_size = static_cast(physical_server_memory * server_settings.cache_size_to_ram_max_ratio); + + String uncompressed_cache_policy = server_settings.uncompressed_cache_policy; + size_t uncompressed_cache_size = server_settings.uncompressed_cache_size; + if (uncompressed_cache_size > max_cache_size) + { + uncompressed_cache_size = max_cache_size; + LOG_INFO(log, "Lowered uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } + global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size); + + String mark_cache_policy = server_settings.mark_cache_policy; + size_t mark_cache_size = server_settings.mark_cache_size; + if (mark_cache_size > max_cache_size) + { + mark_cache_size = max_cache_size; + LOG_INFO(log, "Lowered mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size)); + } + global_context->setMarkCache(mark_cache_policy, mark_cache_size); + + size_t index_uncompressed_cache_size = server_settings.index_uncompressed_cache_size; + if (index_uncompressed_cache_size > max_cache_size) + { + index_uncompressed_cache_size = max_cache_size; + LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } + global_context->setIndexUncompressedCache(index_uncompressed_cache_size); + + size_t index_mark_cache_size = server_settings.index_mark_cache_size; + if (index_mark_cache_size > max_cache_size) + { + index_mark_cache_size = max_cache_size; + LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } + global_context->setIndexMarkCache(index_mark_cache_size); + + size_t mmap_cache_size = server_settings.mmap_cache_size; + if (mmap_cache_size > max_cache_size) + { + mmap_cache_size = max_cache_size; + LOG_INFO(log, "Lowered mmap file cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } + global_context->setMMappedFileCache(mmap_cache_size); + + size_t query_cache_max_size_in_bytes = config().getUInt64("query_cache.max_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_SIZE); + size_t query_cache_max_entries = config().getUInt64("query_cache.max_entries", DEFAULT_QUERY_CACHE_MAX_ENTRIES); + size_t query_cache_query_cache_max_entry_size_in_bytes = config().getUInt64("query_cache.max_entry_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_BYTES); + size_t query_cache_max_entry_size_in_rows = config().getUInt64("query_cache.max_entry_rows_in_rows", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_ROWS); + if (query_cache_max_size_in_bytes > max_cache_size) + { + query_cache_max_size_in_bytes = max_cache_size; + LOG_INFO(log, "Lowered query cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } + global_context->setQueryCache(query_cache_max_size_in_bytes, query_cache_max_entries, query_cache_query_cache_max_entry_size_in_bytes, query_cache_max_entry_size_in_rows); + +#if USE_EMBEDDED_COMPILER + size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE); + size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES); + CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements); +#endif + /// Initialize main config reloader. std::string include_from_path = config().getString("include_from", "/etc/metrika.xml"); @@ -1136,9 +1199,10 @@ try server_settings_.loadSettingsFromConfig(*config); size_t max_server_memory_usage = server_settings_.max_server_memory_usage; - double max_server_memory_usage_to_ram_ratio = server_settings_.max_server_memory_usage_to_ram_ratio; - size_t default_max_server_memory_usage = static_cast(memory_amount * max_server_memory_usage_to_ram_ratio); + + size_t current_physical_server_memory = getMemoryAmount(); /// With cgroups, the amount of memory available to the server can be changed dynamically. + size_t default_max_server_memory_usage = static_cast(current_physical_server_memory * max_server_memory_usage_to_ram_ratio); if (max_server_memory_usage == 0) { @@ -1146,7 +1210,7 @@ try LOG_INFO(log, "Setting max_server_memory_usage was set to {}" " ({} available * {:.2f} max_server_memory_usage_to_ram_ratio)", formatReadableSizeWithBinarySuffix(max_server_memory_usage), - formatReadableSizeWithBinarySuffix(memory_amount), + formatReadableSizeWithBinarySuffix(current_physical_server_memory), max_server_memory_usage_to_ram_ratio); } else if (max_server_memory_usage > default_max_server_memory_usage) @@ -1157,7 +1221,7 @@ try " calculated as {} available" " * {:.2f} max_server_memory_usage_to_ram_ratio", formatReadableSizeWithBinarySuffix(max_server_memory_usage), - formatReadableSizeWithBinarySuffix(memory_amount), + formatReadableSizeWithBinarySuffix(current_physical_server_memory), max_server_memory_usage_to_ram_ratio); } @@ -1167,14 +1231,14 @@ try size_t merges_mutations_memory_usage_soft_limit = server_settings_.merges_mutations_memory_usage_soft_limit; - size_t default_merges_mutations_server_memory_usage = static_cast(memory_amount * server_settings_.merges_mutations_memory_usage_to_ram_ratio); + size_t default_merges_mutations_server_memory_usage = static_cast(current_physical_server_memory * server_settings_.merges_mutations_memory_usage_to_ram_ratio); if (merges_mutations_memory_usage_soft_limit == 0) { merges_mutations_memory_usage_soft_limit = default_merges_mutations_server_memory_usage; LOG_INFO(log, "Setting merges_mutations_memory_usage_soft_limit was set to {}" " ({} available * {:.2f} merges_mutations_memory_usage_to_ram_ratio)", formatReadableSizeWithBinarySuffix(merges_mutations_memory_usage_soft_limit), - formatReadableSizeWithBinarySuffix(memory_amount), + formatReadableSizeWithBinarySuffix(current_physical_server_memory), server_settings_.merges_mutations_memory_usage_to_ram_ratio); } else if (merges_mutations_memory_usage_soft_limit > default_merges_mutations_server_memory_usage) @@ -1183,7 +1247,7 @@ try LOG_WARNING(log, "Setting merges_mutations_memory_usage_soft_limit was set to {}" " ({} available * {:.2f} merges_mutations_memory_usage_to_ram_ratio)", formatReadableSizeWithBinarySuffix(merges_mutations_memory_usage_soft_limit), - formatReadableSizeWithBinarySuffix(memory_amount), + formatReadableSizeWithBinarySuffix(current_physical_server_memory), server_settings_.merges_mutations_memory_usage_to_ram_ratio); } @@ -1323,7 +1387,14 @@ try global_context->updateStorageConfiguration(*config); global_context->updateInterserverCredentials(*config); + + global_context->updateUncompressedCacheConfiguration(*config); + global_context->updateMarkCacheConfiguration(*config); + global_context->updateIndexUncompressedCacheConfiguration(*config); + global_context->updateIndexMarkCacheConfiguration(*config); + global_context->updateMMappedFileCacheConfiguration(*config); global_context->updateQueryCacheConfiguration(*config); + CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, "encryption_codecs"); #if USE_SSL CertificateReloader::instance().tryLoad(*config); @@ -1483,21 +1554,6 @@ try /// Limit on total number of concurrently executed queries. global_context->getProcessList().setMaxSize(server_settings.max_concurrent_queries); - /// Set up caches. - - size_t max_cache_size = static_cast(memory_amount * server_settings.cache_size_to_ram_max_ratio); - - String uncompressed_cache_policy = server_settings.uncompressed_cache_policy; - LOG_INFO(log, "Uncompressed cache policy name {}", uncompressed_cache_policy); - size_t uncompressed_cache_size = server_settings.uncompressed_cache_size; - if (uncompressed_cache_size > max_cache_size) - { - uncompressed_cache_size = max_cache_size; - LOG_INFO(log, "Uncompressed cache size was lowered to {} because the system has low amount of memory", - formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); - } - global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size); - /// Load global settings from default_profile and system_profile. global_context->setDefaultProfiles(config()); @@ -1513,46 +1569,16 @@ try server_settings.async_insert_queue_flush_on_shutdown)); } - size_t mark_cache_size = server_settings.mark_cache_size; - String mark_cache_policy = server_settings.mark_cache_policy; - if (!mark_cache_size) - LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation."); - if (mark_cache_size > max_cache_size) - { - mark_cache_size = max_cache_size; - LOG_INFO(log, "Mark cache size was lowered to {} because the system has low amount of memory", - formatReadableSizeWithBinarySuffix(mark_cache_size)); - } - global_context->setMarkCache(mark_cache_policy, mark_cache_size); - - if (server_settings.index_uncompressed_cache_size) - global_context->setIndexUncompressedCache(server_settings.index_uncompressed_cache_size); - - if (server_settings.index_mark_cache_size) - global_context->setIndexMarkCache(server_settings.index_mark_cache_size); - - if (server_settings.mmap_cache_size) - global_context->setMMappedFileCache(server_settings.mmap_cache_size); - - /// A cache for query results. - global_context->setQueryCache(config()); - -#if USE_EMBEDDED_COMPILER - /// 128 MB - constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 128; - size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", compiled_expression_cache_size_default); - - constexpr size_t compiled_expression_cache_elements_size_default = 10000; - size_t compiled_expression_cache_elements_size = config().getUInt64("compiled_expression_cache_elements_size", compiled_expression_cache_elements_size_default); - - CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size, compiled_expression_cache_elements_size); -#endif - /// Set path for format schema files fs::path format_schema_path(config().getString("format_schema_path", path / "format_schemas/")); global_context->setFormatSchemaPath(format_schema_path); fs::create_directories(format_schema_path); + /// Set path for filesystem caches + fs::path filesystem_caches_path(config().getString("filesystem_caches_path", "")); + if (!filesystem_caches_path.empty()) + global_context->setFilesystemCachesPath(filesystem_caches_path); + /// Check sanity of MergeTreeSettings on server startup { size_t background_pool_tasks = global_context->getMergeMutateExecutor()->getMaxTasksCount(); @@ -1650,6 +1676,10 @@ try database_catalog.initializeAndLoadTemporaryDatabase(); loadMetadataSystem(global_context); maybeConvertSystemDatabase(global_context); + /// This has to be done before the initialization of system logs, + /// otherwise there is a race condition between the system database initialization + /// and creation of new tables in the database. + startupSystemTables(); /// After attaching system databases we can initialize system log. global_context->initializeSystemLogs(); global_context->setSystemZooKeeperLogAfterInitializationIfNeeded(); @@ -1668,7 +1698,6 @@ try /// Then, load remaining databases loadMetadata(global_context, default_database); convertDatabasesEnginesIfNeed(global_context); - startupSystemTables(); database_catalog.startupBackgroundCleanup(); /// After loading validate that default database exists database_catalog.assertDatabaseExists(default_database); @@ -2045,6 +2074,9 @@ void Server::createServers( for (const auto & protocol : protocols) { + if (!server_type.shouldStart(ServerType::Type::CUSTOM, protocol)) + continue; + std::string prefix = "protocols." + protocol + "."; std::string port_name = prefix + "port"; std::string description {" protocol"}; @@ -2054,9 +2086,6 @@ void Server::createServers( if (!config.has(prefix + "port")) continue; - if (!server_type.shouldStart(ServerType::Type::CUSTOM, port_name)) - continue; - std::vector hosts; if (config.has(prefix + "host")) hosts.push_back(config.getString(prefix + "host")); diff --git a/programs/server/config.d/clusters.xml b/programs/server/config.d/clusters.xml new file mode 120000 index 00000000000..97289377736 --- /dev/null +++ b/programs/server/config.d/clusters.xml @@ -0,0 +1 @@ +../../../tests/config/config.d/clusters.xml \ No newline at end of file diff --git a/programs/server/config.xml b/programs/server/config.xml index 14b8954fc39..85cdda63558 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -317,7 +317,7 @@ 0 - 100 + 1000 query result using Cache = CacheBase; - /// query --> query execution count - using TimesExecuted = std::unordered_map; - public: /// Buffers multiple partial query result chunks (buffer()) and eventually stores them as cache entry (finalizeWrite()). /// @@ -177,14 +173,14 @@ public: friend class QueryCache; /// for createReader() }; - QueryCache(); + QueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_); - void updateConfiguration(const Poco::Util::AbstractConfiguration & config); + void updateConfiguration(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_); Reader createReader(const Key & key); Writer createWriter(const Key & key, std::chrono::milliseconds min_query_runtime, bool squash_partial_results, size_t max_block_size, size_t max_query_cache_size_in_bytes_quota, size_t max_query_cache_entries_quota); - void reset(); + void clear(); size_t weight() const; size_t count() const; @@ -199,14 +195,15 @@ private: Cache cache; /// has its own locking --> not protected by mutex mutable std::mutex mutex; + + /// query --> query execution count + using TimesExecuted = std::unordered_map; TimesExecuted times_executed TSA_GUARDED_BY(mutex); /// Cache configuration size_t max_entry_size_in_bytes TSA_GUARDED_BY(mutex) = 0; size_t max_entry_size_in_rows TSA_GUARDED_BY(mutex) = 0; - size_t cache_size_in_bytes TSA_GUARDED_BY(mutex) = 0; /// Updated in each cache insert/delete - friend class StorageSystemQueryCache; }; diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp index d50289a5728..85597ce881a 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -44,11 +45,25 @@ void WriteBufferToFileSegment::nextImpl() size_t bytes_to_write = offset(); + FileCacheReserveStat reserve_stat; /// In case of an error, we don't need to finalize the file segment /// because it will be deleted soon and completed in the holder's destructor. - bool ok = file_segment->reserve(bytes_to_write); + bool ok = file_segment->reserve(bytes_to_write, &reserve_stat); + if (!ok) - throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Failed to reserve space for the file cache ({})", file_segment->getInfoForLog()); + { + String reserve_stat_msg; + for (const auto & [kind, stat] : reserve_stat.stat_by_kind) + reserve_stat_msg += fmt::format("{} hold {}, can release {}; ", + toString(kind), ReadableSize(stat.non_releasable_size), ReadableSize(stat.releasable_size)); + + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Failed to reserve {} bytes for {}: {}(segment info: {})", + bytes_to_write, + file_segment->getKind() == FileSegmentKind::Temporary ? "temporary file" : "the file in cache", + reserve_stat_msg, + file_segment->getInfoForLog() + ); + } try { diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp index 413ff1db6bc..6c09b327ca1 100644 --- a/src/Interpreters/ClientInfo.cpp +++ b/src/Interpreters/ClientInfo.cpp @@ -194,9 +194,9 @@ void ClientInfo::setInitialQuery() query_kind = QueryKind::INITIAL_QUERY; fillOSUserHostNameAndVersionInfo(); if (client_name.empty()) - client_name = DBMS_NAME; + client_name = VERSION_NAME; else - client_name = (DBMS_NAME " ") + client_name; + client_name = (VERSION_NAME " ") + client_name; } @@ -210,9 +210,9 @@ void ClientInfo::fillOSUserHostNameAndVersionInfo() client_hostname = getFQDNOrHostName(); - client_version_major = DBMS_VERSION_MAJOR; - client_version_minor = DBMS_VERSION_MINOR; - client_version_patch = DBMS_VERSION_PATCH; + client_version_major = VERSION_MAJOR; + client_version_minor = VERSION_MINOR; + client_version_patch = VERSION_PATCH; client_tcp_protocol_version = DBMS_TCP_PROTOCOL_VERSION; } diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index b90acd1d576..cb75487cbbc 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -273,6 +273,8 @@ public: /// Are distributed DDL Queries (ON CLUSTER Clause) allowed for this cluster bool areDistributedDDLQueriesAllowed() const { return allow_distributed_ddl_queries; } + const String & getName() const { return name; } + private: SlotToShard slot_to_shard; diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 953e38d56cd..da716d57f88 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -35,6 +36,11 @@ namespace ErrorCodes extern const int ALL_REPLICAS_ARE_STALE; } +namespace FailPoints +{ + extern const char use_delayed_remote_source[]; +} + namespace ClusterProxy { @@ -134,6 +140,12 @@ void SelectStreamFactory::createForShard( const auto & settings = context->getSettingsRef(); + fiu_do_on(FailPoints::use_delayed_remote_source, + { + emplace_remote_stream(/*lazy=*/true, /*local_delay=*/999999); + return; + }); + if (settings.prefer_localhost_replica && shard_info.isLocal()) { StoragePtr main_table_storage; diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/src/Interpreters/ClusterProxy/SelectStreamFactory.h index 1cc5a3b1a77..ca07fd5deda 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.h +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.h @@ -60,9 +60,6 @@ public: /// (When there is a local replica with big delay). bool lazy = false; time_t local_delay = 0; - - /// Set only if parallel reading from replicas is used. - std::shared_ptr coordinator; }; using Shards = std::vector; diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 2fed626ffb7..ccbce0b3dd4 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -29,7 +29,6 @@ namespace ErrorCodes { extern const int TOO_LARGE_DISTRIBUTED_DEPTH; extern const int LOGICAL_ERROR; - extern const int SUPPORT_IS_DISABLED; } namespace ClusterProxy @@ -235,7 +234,8 @@ void executeQuery( std::move(external_tables), log, shards, - query_info.storage_limits); + query_info.storage_limits, + query_info.getCluster()->getName()); read_from_remote->setStepDescription("Read from remote replica"); plan->addStep(std::move(read_from_remote)); @@ -267,21 +267,57 @@ void executeQueryWithParallelReplicas( const StorageID & main_table, const ASTPtr & table_func_ptr, SelectStreamFactory & stream_factory, - const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info, + const ASTPtr & query_ast, + ContextPtr context, + const SelectQueryInfo & query_info, const ClusterPtr & not_optimized_cluster) { - if (not_optimized_cluster->getShardsInfo().size() != 1) - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Cluster for parallel replicas should consist only from one shard"); - - auto shard_info = not_optimized_cluster->getShardsInfo().front(); - const auto & settings = context->getSettingsRef(); - ClusterPtr new_cluster = not_optimized_cluster->getClusterWithReplicasAsShards(settings); + auto new_context = Context::createCopy(context); + auto scalars = new_context->hasQueryContext() ? new_context->getQueryContext()->getScalars() : Scalars{}; + + UInt64 shard_num = 0; /// shard_num is 1-based, so 0 - no shard specified + const auto it = scalars.find("_shard_num"); + if (it != scalars.end()) + { + const Block & block = it->second; + const auto & column = block.safeGetByPosition(0).column; + shard_num = column->getUInt(0); + } + + size_t all_replicas_count = 0; + ClusterPtr new_cluster; + /// if got valid shard_num from query initiator, then parallel replicas scope is the specified shard + /// shards are numbered in order of appearance in the cluster config + if (shard_num > 0) + { + const auto shard_count = not_optimized_cluster->getShardCount(); + if (shard_num > shard_count) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Shard number is greater than shard count: shard_num={} shard_count={} cluster={}", + shard_num, + shard_count, + not_optimized_cluster->getName()); + + chassert(shard_count == not_optimized_cluster->getShardsAddresses().size()); + + LOG_DEBUG(&Poco::Logger::get("executeQueryWithParallelReplicas"), "Parallel replicas query in shard scope: shard_num={} cluster={}", + shard_num, not_optimized_cluster->getName()); + + const auto shard_replicas_num = not_optimized_cluster->getShardsAddresses()[shard_num - 1].size(); + all_replicas_count = std::min(static_cast(settings.max_parallel_replicas), shard_replicas_num); + + /// shard_num is 1-based, but getClusterWithSingleShard expects 0-based index + new_cluster = not_optimized_cluster->getClusterWithSingleShard(shard_num - 1); + } + else + { + new_cluster = not_optimized_cluster->getClusterWithReplicasAsShards(settings); + all_replicas_count = std::min(static_cast(settings.max_parallel_replicas), new_cluster->getShardCount()); + } - auto all_replicas_count = std::min(static_cast(settings.max_parallel_replicas), new_cluster->getShardCount()); auto coordinator = std::make_shared(all_replicas_count); - auto remote_plan = std::make_unique(); - auto plans = std::vector(); /// This is a little bit weird, but we construct an "empty" coordinator without /// any specified reading/coordination method (like Default, InOrder, InReverseOrder) @@ -290,8 +326,6 @@ void executeQueryWithParallelReplicas( /// to then tell it about the reading method we chose. query_info.coordinator = coordinator; - auto new_context = Context::createCopy(context); - auto scalars = new_context->hasQueryContext() ? new_context->getQueryContext()->getScalars() : Scalars{}; auto external_tables = new_context->getExternalTables(); auto read_from_remote = std::make_unique( @@ -309,20 +343,7 @@ void executeQueryWithParallelReplicas( &Poco::Logger::get("ReadFromParallelRemoteReplicasStep"), query_info.storage_limits); - remote_plan->addStep(std::move(read_from_remote)); - remote_plan->addInterpreterContext(context); - plans.emplace_back(std::move(remote_plan)); - - if (std::all_of(plans.begin(), plans.end(), [](const QueryPlanPtr & plan) { return !plan; })) - throw Exception(ErrorCodes::LOGICAL_ERROR, "No plans were generated for reading from shard. This is a bug"); - - DataStreams input_streams; - input_streams.reserve(plans.size()); - for (const auto & plan : plans) - input_streams.emplace_back(plan->getCurrentDataStream()); - - auto union_step = std::make_unique(std::move(input_streams)); - query_plan.unitePlans(std::move(union_step), std::move(plans)); + query_plan.addStep(std::move(read_from_remote)); } } diff --git a/src/Interpreters/ComparisonGraph.h b/src/Interpreters/ComparisonGraph.h index 70543227b58..4fd90dad371 100644 --- a/src/Interpreters/ComparisonGraph.h +++ b/src/Interpreters/ComparisonGraph.h @@ -118,7 +118,7 @@ private: { size_t operator() (const IAST::Hash & hash) const { - return hash.first; + return hash.low64; } }; diff --git a/src/Interpreters/ConcurrentHashJoin.h b/src/Interpreters/ConcurrentHashJoin.h index 1283879971d..3052c688e5f 100644 --- a/src/Interpreters/ConcurrentHashJoin.h +++ b/src/Interpreters/ConcurrentHashJoin.h @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -33,9 +32,16 @@ class ConcurrentHashJoin : public IJoin { public: - explicit ConcurrentHashJoin(ContextPtr context_, std::shared_ptr table_join_, size_t slots_, const Block & right_sample_block, bool any_take_last_row_ = false); + explicit ConcurrentHashJoin( + ContextPtr context_, + std::shared_ptr table_join_, + size_t slots_, + const Block & right_sample_block, + bool any_take_last_row_ = false); + ~ConcurrentHashJoin() override = default; + std::string getName() const override { return "ConcurrentHashJoin"; } const TableJoin & getTableJoin() const override { return *table_join; } bool addBlockToJoin(const Block & block, bool check_limits) override; void checkTypesOfKeys(const Block & block) const override; @@ -66,7 +72,6 @@ private: IColumn::Selector selectDispatchBlock(const Strings & key_columns_names, const Block & from_block); Blocks dispatchBlock(const Strings & key_columns_names, const Block & from_block); - }; } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index d1b88988e5e..6f25bc728a2 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -2,14 +2,10 @@ #include #include #include -#include #include -#include #include #include -#include #include -#include #include #include #include @@ -17,12 +13,10 @@ #include #include #include -#include #include #include #include #include -#include #include #include #include @@ -34,8 +28,6 @@ #include #include #include -#include -#include #include #include #include @@ -44,7 +36,7 @@ #include #include #include -#include +#include #include #include #include @@ -55,7 +47,6 @@ #include #include #include -#include #include #include #include @@ -69,7 +60,6 @@ #include #include #include -#include #include #include #include @@ -86,8 +76,6 @@ #include #include #include -#include -#include #include #include #include @@ -97,14 +85,12 @@ #include #include #include -#include #include #include #include #include #include #include -#include #include #include #include @@ -117,12 +103,8 @@ #include #include #include -#include #include -#if USE_ROCKSDB -#include -#endif namespace fs = std::filesystem; @@ -158,13 +140,13 @@ namespace CurrentMetrics extern const Metric IOWriterThreadsActive; } + namespace DB { namespace ErrorCodes { extern const int BAD_ARGUMENTS; - extern const int BAD_GET; extern const int UNKNOWN_DATABASE; extern const int UNKNOWN_TABLE; extern const int TABLE_ALREADY_EXISTS; @@ -179,6 +161,7 @@ namespace ErrorCodes extern const int UNKNOWN_FUNCTION; extern const int ILLEGAL_COLUMN; extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; + extern const int CLUSTER_DOESNT_EXIST; } #define SHUTDOWN(log, desc, ptr, method) do \ @@ -232,6 +215,7 @@ struct ContextSharedPart : boost::noncopyable String user_files_path; /// Path to the directory with user provided files, usable by 'file' table function. String dictionaries_lib_path; /// Path to the directory with user provided binaries and libraries for external dictionaries. String user_scripts_path; /// Path to the directory with user provided scripts. + String filesystem_caches_path; /// Path to the directory with filesystem caches. ConfigurationPtr config; /// Global configuration settings. String tmp_path; /// Path to the temporary files that occur when processing the request. @@ -262,26 +246,27 @@ struct ContextSharedPart : boost::noncopyable std::optional backups_worker; - String default_profile_name; /// Default profile name used for default values. - String system_profile_name; /// Profile used by system processes - String buffer_profile_name; /// Profile used by Buffer engine for flushing to the underlying + String default_profile_name; /// Default profile name used for default values. + String system_profile_name; /// Profile used by system processes + String buffer_profile_name; /// Profile used by Buffer engine for flushing to the underlying std::unique_ptr access_control; mutable ResourceManagerPtr resource_manager; - mutable UncompressedCachePtr uncompressed_cache; /// The cache of decompressed blocks. - mutable MarkCachePtr mark_cache; /// Cache of marks in compressed files. - mutable std::unique_ptr load_marks_threadpool; /// Threadpool for loading marks cache. - mutable std::unique_ptr prefetch_threadpool; /// Threadpool for loading marks cache. - mutable UncompressedCachePtr index_uncompressed_cache; /// The cache of decompressed blocks for MergeTree indices. - mutable MarkCachePtr index_mark_cache; /// Cache of marks in compressed files of MergeTree indices. - mutable QueryCachePtr query_cache; /// Cache of query results. - mutable MMappedFileCachePtr mmap_cache; /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads. - ProcessList process_list; /// Executing queries at the moment. + mutable UncompressedCachePtr uncompressed_cache; /// The cache of decompressed blocks. + mutable MarkCachePtr mark_cache; /// Cache of marks in compressed files. + mutable std::unique_ptr load_marks_threadpool; /// Threadpool for loading marks cache. + mutable std::unique_ptr prefetch_threadpool; /// Threadpool for loading marks cache. + mutable UncompressedCachePtr index_uncompressed_cache; /// The cache of decompressed blocks for MergeTree indices. + mutable QueryCachePtr query_cache; /// Cache of query results. + mutable MarkCachePtr index_mark_cache; /// Cache of marks in compressed files of MergeTree indices. + mutable MMappedFileCachePtr mmap_cache; /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads. + ProcessList process_list; /// Executing queries at the moment. + SessionTracker session_tracker; GlobalOvercommitTracker global_overcommit_tracker; - MergeList merge_list; /// The list of executable merge (for (Replicated)?MergeTree) - MovesList moves_list; /// The list of executing moves (for (Replicated)?MergeTree) + MergeList merge_list; /// The list of executable merge (for (Replicated)?MergeTree) + MovesList moves_list; /// The list of executing moves (for (Replicated)?MergeTree) ReplicatedFetchList replicated_fetch_list; - ConfigurationPtr users_config; /// Config with the users, profiles and quotas sections. - InterserverIOHandler interserver_io_handler; /// Handler for interserver communication. + ConfigurationPtr users_config; /// Config with the users, profiles and quotas sections. + InterserverIOHandler interserver_io_handler; /// Handler for interserver communication. mutable std::unique_ptr buffer_flush_schedule_pool; /// A thread pool that can do background flush for Buffer tables. mutable std::unique_ptr schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables) @@ -563,7 +548,7 @@ struct ContextSharedPart : boost::noncopyable */ #if USE_EMBEDDED_COMPILER if (auto * cache = CompiledExpressionCacheFactory::instance().tryGetCache()) - cache->reset(); + cache->clear(); #endif /// Preemptive destruction is important, because these objects may have a refcount to ContextShared (cyclic reference). @@ -739,6 +724,9 @@ std::unique_lock Context::getLock() const ProcessList & Context::getProcessList() { return shared->process_list; } const ProcessList & Context::getProcessList() const { return shared->process_list; } OvercommitTracker * Context::getGlobalOvercommitTracker() const { return &shared->global_overcommit_tracker; } + +SessionTracker & Context::getSessionTracker() { return shared->session_tracker; } + MergeList & Context::getMergeList() { return shared->merge_list; } const MergeList & Context::getMergeList() const { return shared->merge_list; } MovesList & Context::getMovesList() { return shared->moves_list; } @@ -784,6 +772,12 @@ String Context::getUserScriptsPath() const return shared->user_scripts_path; } +String Context::getFilesystemCachesPath() const +{ + auto lock = getLock(); + return shared->filesystem_caches_path; +} + Strings Context::getWarnings() const { Strings common_warnings; @@ -875,6 +869,16 @@ void Context::setPath(const String & path) shared->user_scripts_path = shared->path + "user_scripts/"; } +void Context::setFilesystemCachesPath(const String & path) +{ + auto lock = getLock(); + + if (!fs::path(path).is_absolute()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filesystem caches path must be absolute: {}", path); + + shared->filesystem_caches_path = path; +} + static void setupTmpPath(Poco::Logger * log, const std::string & path) try { @@ -1083,52 +1087,32 @@ ConfigurationPtr Context::getUsersConfig() return shared->users_config; } -void Context::setUser(const UUID & user_id_, bool set_current_profiles_, bool set_current_roles_, bool set_current_database_) +void Context::setUser(const UUID & user_id_, const std::optional> & current_roles_) { /// Prepare lists of user's profiles, constraints, settings, roles. + /// NOTE: AccessControl::read() and other AccessControl's functions may require some IO work, + /// so Context::getLock() must be unlocked while we're doing this. - std::shared_ptr user; - std::shared_ptr temp_access; - if (set_current_profiles_ || set_current_roles_ || set_current_database_) - { - std::optional params; - { - auto lock = getLock(); - params.emplace(ContextAccessParams{user_id_, /* full_access= */ false, /* use_default_roles = */ true, {}, settings, current_database, client_info}); - } - /// `temp_access` is used here only to extract information about the user, not to actually check access. - /// NOTE: AccessControl::getContextAccess() may require some IO work, so Context::getLock() must be unlocked while we're doing this. - temp_access = getAccessControl().getContextAccess(*params); - user = temp_access->getUser(); - } + auto user = getAccessControl().read(user_id_); - std::shared_ptr profiles; - if (set_current_profiles_) - profiles = temp_access->getDefaultProfileInfo(); - - std::optional> roles; - if (set_current_roles_) - roles = user->granted_roles.findGranted(user->default_roles); - - String database; - if (set_current_database_) - database = user->default_database; + auto new_current_roles = current_roles_ ? user->granted_roles.findGranted(*current_roles_) : user->granted_roles.findGranted(user->default_roles); + auto enabled_roles = getAccessControl().getEnabledRolesInfo(new_current_roles, {}); + auto enabled_profiles = getAccessControl().getEnabledSettingsInfo(user_id_, user->settings, enabled_roles->enabled_roles, enabled_roles->settings_from_enabled_roles); + const auto & database = user->default_database; /// Apply user's profiles, constraints, settings, roles. + auto lock = getLock(); setUserID(user_id_); - if (profiles) - { - /// A profile can specify a value and a readonly constraint for same setting at the same time, - /// so we shouldn't check constraints here. - setCurrentProfiles(*profiles, /* check_constraints= */ false); - } + /// A profile can specify a value and a readonly constraint for same setting at the same time, + /// so we shouldn't check constraints here. + setCurrentProfiles(*enabled_profiles, /* check_constraints= */ false); - if (roles) - setCurrentRoles(*roles); + setCurrentRoles(new_current_roles); + /// It's optional to specify the DEFAULT DATABASE in the user's definition. if (!database.empty()) setCurrentDatabase(database); } @@ -1157,13 +1141,6 @@ std::optional Context::getUserID() const } -void Context::setQuotaKey(String quota_key_) -{ - auto lock = getLock(); - client_info.quota_key = std::move(quota_key_); -} - - void Context::setCurrentRoles(const std::vector & current_roles_) { auto lock = getLock(); @@ -1303,7 +1280,7 @@ void Context::setCurrentProfiles(const SettingsProfilesInfo & profiles_info, boo { auto lock = getLock(); if (check_constraints) - checkSettingsConstraints(profiles_info.settings); + checkSettingsConstraints(profiles_info.settings, SettingSource::PROFILE); applySettingsChanges(profiles_info.settings); settings_constraints_and_current_profiles = profiles_info.getConstraintsAndProfileIDs(settings_constraints_and_current_profiles); } @@ -1601,7 +1578,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const } } auto hash = table_expression->getTreeHash(); - String key = toString(hash.first) + '_' + toString(hash.second); + auto key = toString(hash); StoragePtr & res = table_function_results[key]; if (!res) { @@ -1752,7 +1729,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const auto new_hash = table_expression->getTreeHash(); if (hash != new_hash) { - key = toString(new_hash.first) + '_' + toString(new_hash.second); + key = toString(new_hash); table_function_results[key] = res; } } @@ -1761,8 +1738,8 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const TableFunctionPtr & table_function_ptr) { - auto hash = table_expression->getTreeHash(); - String key = toString(hash.first) + '_' + toString(hash.second); + const auto hash = table_expression->getTreeHash(); + const auto key = toString(hash); StoragePtr & res = table_function_results[key]; if (!res) @@ -1857,29 +1834,29 @@ void Context::applySettingsChanges(const SettingsChanges & changes) } -void Context::checkSettingsConstraints(const SettingsProfileElements & profile_elements) const +void Context::checkSettingsConstraints(const SettingsProfileElements & profile_elements, SettingSource source) const { - getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, profile_elements); + getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, profile_elements, source); } -void Context::checkSettingsConstraints(const SettingChange & change) const +void Context::checkSettingsConstraints(const SettingChange & change, SettingSource source) const { - getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, change); + getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, change, source); } -void Context::checkSettingsConstraints(const SettingsChanges & changes) const +void Context::checkSettingsConstraints(const SettingsChanges & changes, SettingSource source) const { - getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, changes); + getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, changes, source); } -void Context::checkSettingsConstraints(SettingsChanges & changes) const +void Context::checkSettingsConstraints(SettingsChanges & changes, SettingSource source) const { - getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, changes); + getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, changes, source); } -void Context::clampToSettingsConstraints(SettingsChanges & changes) const +void Context::clampToSettingsConstraints(SettingsChanges & changes, SettingSource source) const { - getSettingsConstraintsAndCurrentProfiles()->constraints.clamp(settings, changes); + getSettingsConstraintsAndCurrentProfiles()->constraints.clamp(settings, changes, source); } void Context::checkMergeTreeSettingsConstraints(const MergeTreeSettings & merge_tree_settings, const SettingsChanges & changes) const @@ -2301,6 +2278,16 @@ void Context::setUncompressedCache(const String & uncompressed_cache_policy, siz shared->uncompressed_cache = std::make_shared(uncompressed_cache_policy, max_size_in_bytes); } +void Context::updateUncompressedCacheConfiguration(const Poco::Util::AbstractConfiguration & config) +{ + auto lock = getLock(); + + if (!shared->uncompressed_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Uncompressed cache was not created yet."); + + size_t max_size_in_bytes = config.getUInt64("uncompressed_cache_size", DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE); + shared->uncompressed_cache->setMaxSize(max_size_in_bytes); +} UncompressedCachePtr Context::getUncompressedCache() const { @@ -2308,14 +2295,13 @@ UncompressedCachePtr Context::getUncompressedCache() const return shared->uncompressed_cache; } - -void Context::dropUncompressedCache() const +void Context::clearUncompressedCache() const { auto lock = getLock(); - if (shared->uncompressed_cache) - shared->uncompressed_cache->reset(); -} + if (shared->uncompressed_cache) + shared->uncompressed_cache->clear(); +} void Context::setMarkCache(const String & mark_cache_policy, size_t cache_size_in_bytes) { @@ -2327,17 +2313,29 @@ void Context::setMarkCache(const String & mark_cache_policy, size_t cache_size_i shared->mark_cache = std::make_shared(mark_cache_policy, cache_size_in_bytes); } +void Context::updateMarkCacheConfiguration(const Poco::Util::AbstractConfiguration & config) +{ + auto lock = getLock(); + + if (!shared->mark_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mark cache was not created yet."); + + size_t max_size_in_bytes = config.getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE); + shared->mark_cache->setMaxSize(max_size_in_bytes); +} + MarkCachePtr Context::getMarkCache() const { auto lock = getLock(); return shared->mark_cache; } -void Context::dropMarkCache() const +void Context::clearMarkCache() const { auto lock = getLock(); + if (shared->mark_cache) - shared->mark_cache->reset(); + shared->mark_cache->clear(); } ThreadPool & Context::getLoadMarksThreadpool() const @@ -2355,15 +2353,174 @@ ThreadPool & Context::getLoadMarksThreadpool() const return *shared->load_marks_threadpool; } -static size_t getPrefetchThreadpoolSizeFromConfig(const Poco::Util::AbstractConfiguration & config) +void Context::setIndexUncompressedCache(size_t max_size_in_bytes) { - return config.getUInt(".prefetch_threadpool_pool_size", 100); + auto lock = getLock(); + + if (shared->index_uncompressed_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index uncompressed cache has been already created."); + + shared->index_uncompressed_cache = std::make_shared(max_size_in_bytes); } -size_t Context::getPrefetchThreadpoolSize() const +void Context::updateIndexUncompressedCacheConfiguration(const Poco::Util::AbstractConfiguration & config) { - const auto & config = getConfigRef(); - return getPrefetchThreadpoolSizeFromConfig(config); + auto lock = getLock(); + + if (!shared->index_uncompressed_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index uncompressed cache was not created yet."); + + size_t max_size_in_bytes = config.getUInt64("index_uncompressed_cache_size", DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE); + shared->index_uncompressed_cache->setMaxSize(max_size_in_bytes); +} + +UncompressedCachePtr Context::getIndexUncompressedCache() const +{ + auto lock = getLock(); + return shared->index_uncompressed_cache; +} + +void Context::clearIndexUncompressedCache() const +{ + auto lock = getLock(); + + if (shared->index_uncompressed_cache) + shared->index_uncompressed_cache->clear(); +} + +void Context::setIndexMarkCache(size_t cache_size_in_bytes) +{ + auto lock = getLock(); + + if (shared->index_mark_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index mark cache has been already created."); + + shared->index_mark_cache = std::make_shared(cache_size_in_bytes); +} + +void Context::updateIndexMarkCacheConfiguration(const Poco::Util::AbstractConfiguration & config) +{ + auto lock = getLock(); + + if (!shared->index_mark_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index mark cache was not created yet."); + + size_t max_size_in_bytes = config.getUInt64("index_mark_cache_size", DEFAULT_INDEX_MARK_CACHE_MAX_SIZE); + shared->index_mark_cache->setMaxSize(max_size_in_bytes); +} + +MarkCachePtr Context::getIndexMarkCache() const +{ + auto lock = getLock(); + return shared->index_mark_cache; +} + +void Context::clearIndexMarkCache() const +{ + auto lock = getLock(); + + if (shared->index_mark_cache) + shared->index_mark_cache->clear(); +} + +void Context::setMMappedFileCache(size_t cache_size_in_num_entries) +{ + auto lock = getLock(); + + if (shared->mmap_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapped file cache has been already created."); + + shared->mmap_cache = std::make_shared(cache_size_in_num_entries); +} + +void Context::updateMMappedFileCacheConfiguration(const Poco::Util::AbstractConfiguration & config) +{ + auto lock = getLock(); + + if (!shared->mmap_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapped file cache was not created yet."); + + size_t max_size_in_bytes = config.getUInt64("mmap_cache_size", DEFAULT_MMAP_CACHE_MAX_SIZE); + shared->mmap_cache->setMaxSize(max_size_in_bytes); +} + +MMappedFileCachePtr Context::getMMappedFileCache() const +{ + auto lock = getLock(); + return shared->mmap_cache; +} + +void Context::clearMMappedFileCache() const +{ + auto lock = getLock(); + + if (shared->mmap_cache) + shared->mmap_cache->clear(); +} + +void Context::setQueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_rows) +{ + auto lock = getLock(); + + if (shared->query_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Query cache has been already created."); + + shared->query_cache = std::make_shared(max_size_in_bytes, max_entries, max_entry_size_in_bytes, max_entry_size_in_rows); +} + +void Context::updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config) +{ + auto lock = getLock(); + + if (!shared->query_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Query cache was not created yet."); + + size_t max_size_in_bytes = config.getUInt64("query_cache.max_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_SIZE); + size_t max_entries = config.getUInt64("query_cache.max_entries", DEFAULT_QUERY_CACHE_MAX_ENTRIES); + size_t max_entry_size_in_bytes = config.getUInt64("query_cache.max_entry_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_BYTES); + size_t max_entry_size_in_rows = config.getUInt64("query_cache.max_entry_rows_in_rows", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_ROWS); + shared->query_cache->updateConfiguration(max_size_in_bytes, max_entries, max_entry_size_in_bytes, max_entry_size_in_rows); +} + +QueryCachePtr Context::getQueryCache() const +{ + auto lock = getLock(); + return shared->query_cache; +} + +void Context::clearQueryCache() const +{ + auto lock = getLock(); + + if (shared->query_cache) + shared->query_cache->clear(); +} + +void Context::clearCaches() const +{ + auto lock = getLock(); + + if (!shared->uncompressed_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Uncompressed cache was not created yet."); + shared->uncompressed_cache->clear(); + + if (!shared->mark_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mark cache was not created yet."); + shared->mark_cache->clear(); + + if (!shared->index_uncompressed_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index uncompressed cache was not created yet."); + shared->index_uncompressed_cache->clear(); + + if (!shared->index_mark_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index mark cache was not created yet."); + shared->index_mark_cache->clear(); + + if (!shared->mmap_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mmapped file cache was not created yet."); + shared->mmap_cache->clear(); + + /// Intentionally not clearing the query cache which is transactionally inconsistent by design. } ThreadPool & Context::getPrefetchThreadpool() const @@ -2381,131 +2538,10 @@ ThreadPool & Context::getPrefetchThreadpool() const return *shared->prefetch_threadpool; } -void Context::setIndexUncompressedCache(size_t max_size_in_bytes) +size_t Context::getPrefetchThreadpoolSize() const { - auto lock = getLock(); - - if (shared->index_uncompressed_cache) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Index uncompressed cache has been already created."); - - shared->index_uncompressed_cache = std::make_shared(max_size_in_bytes); -} - - -UncompressedCachePtr Context::getIndexUncompressedCache() const -{ - auto lock = getLock(); - return shared->index_uncompressed_cache; -} - - -void Context::dropIndexUncompressedCache() const -{ - auto lock = getLock(); - if (shared->index_uncompressed_cache) - shared->index_uncompressed_cache->reset(); -} - - -void Context::setIndexMarkCache(size_t cache_size_in_bytes) -{ - auto lock = getLock(); - - if (shared->index_mark_cache) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Index mark cache has been already created."); - - shared->index_mark_cache = std::make_shared(cache_size_in_bytes); -} - -MarkCachePtr Context::getIndexMarkCache() const -{ - auto lock = getLock(); - return shared->index_mark_cache; -} - -void Context::dropIndexMarkCache() const -{ - auto lock = getLock(); - if (shared->index_mark_cache) - shared->index_mark_cache->reset(); -} - -void Context::setQueryCache(const Poco::Util::AbstractConfiguration & config) -{ - auto lock = getLock(); - - if (shared->query_cache) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Query cache has been already created."); - - shared->query_cache = std::make_shared(); - shared->query_cache->updateConfiguration(config); -} - -void Context::updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config) -{ - auto lock = getLock(); - if (shared->query_cache) - shared->query_cache->updateConfiguration(config); -} - -QueryCachePtr Context::getQueryCache() const -{ - auto lock = getLock(); - return shared->query_cache; -} - -void Context::dropQueryCache() const -{ - auto lock = getLock(); - if (shared->query_cache) - shared->query_cache->reset(); -} - -void Context::setMMappedFileCache(size_t cache_size_in_num_entries) -{ - auto lock = getLock(); - - if (shared->mmap_cache) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapped file cache has been already created."); - - shared->mmap_cache = std::make_shared(cache_size_in_num_entries); -} - -MMappedFileCachePtr Context::getMMappedFileCache() const -{ - auto lock = getLock(); - return shared->mmap_cache; -} - -void Context::dropMMappedFileCache() const -{ - auto lock = getLock(); - if (shared->mmap_cache) - shared->mmap_cache->reset(); -} - - -void Context::dropCaches() const -{ - auto lock = getLock(); - - if (shared->uncompressed_cache) - shared->uncompressed_cache->reset(); - - if (shared->mark_cache) - shared->mark_cache->reset(); - - if (shared->index_uncompressed_cache) - shared->index_uncompressed_cache->reset(); - - if (shared->index_mark_cache) - shared->index_mark_cache->reset(); - - if (shared->query_cache) - shared->query_cache->reset(); - - if (shared->mmap_cache) - shared->mmap_cache->reset(); + const auto & config = getConfigRef(); + return config.getUInt(".prefetch_threadpool_pool_size", 100); } BackgroundSchedulePool & Context::getBufferFlushSchedulePool() const @@ -2711,7 +2747,10 @@ zkutil::ZooKeeperPtr Context::getZooKeeper() const const auto & config = shared->zookeeper_config ? *shared->zookeeper_config : getConfigRef(); if (!shared->zookeeper) shared->zookeeper = std::make_shared(config, zkutil::getZooKeeperConfigName(config), getZooKeeperLog()); - else if (shared->zookeeper->expired()) + else if (shared->zookeeper->hasReachedDeadline()) + shared->zookeeper->finalize("ZooKeeper session has reached its deadline"); + + if (shared->zookeeper->expired()) { Stopwatch watch; LOG_DEBUG(shared->log, "Trying to establish a new connection with ZooKeeper"); @@ -3071,7 +3110,7 @@ UInt16 Context::getServerPort(const String & port_name) const { auto it = shared->server_ports.find(port_name); if (it == shared->server_ports.end()) - throw Exception(ErrorCodes::BAD_GET, "There is no port named {}", port_name); + throw Exception(ErrorCodes::CLUSTER_DOESNT_EXIST, "There is no port named {}", port_name); else return it->second; } @@ -3080,7 +3119,7 @@ std::shared_ptr Context::getCluster(const std::string & cluster_name) c { if (auto res = tryGetCluster(cluster_name)) return res; - throw Exception(ErrorCodes::BAD_GET, "Requested cluster '{}' not found", cluster_name); + throw Exception(ErrorCodes::CLUSTER_DOESNT_EXIST, "Requested cluster '{}' not found", cluster_name); } @@ -3200,7 +3239,12 @@ void Context::setCluster(const String & cluster_name, const std::shared_ptrsystem_logs = std::make_unique(getGlobalContext(), getConfigRef()); } @@ -3401,6 +3445,16 @@ std::shared_ptr Context::getAsynchronousInsertLog() const return shared->system_logs->asynchronous_insert_log; } +std::vector Context::getSystemLogs() const +{ + auto lock = getLock(); + + if (!shared->system_logs) + return {}; + + return shared->system_logs->logs; +} + CompressionCodecPtr Context::chooseCompressionCodec(size_t part_size, double part_size_ratio) const { auto lock = getLock(); @@ -4548,14 +4602,6 @@ ReadSettings Context::getReadSettings() const return res; } -ReadSettings Context::getBackupReadSettings() const -{ - ReadSettings read_settings = getReadSettings(); - read_settings.remote_throttler = getBackupsThrottler(); - read_settings.local_throttler = getBackupsThrottler(); - return read_settings; -} - WriteSettings Context::getWriteSettings() const { WriteSettings res; @@ -4628,4 +4674,9 @@ void Context::setClientProtocolVersion(UInt64 version) client_protocol_version = version; } +const ServerSettings & Context::getServerSettings() const +{ + return shared->server_settings; +} + } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 0d567816ec9..471dfb7c1f7 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -85,6 +86,7 @@ struct Progress; struct FileProgress; class Clusters; class QueryCache; +class ISystemLog; class QueryLog; class QueryThreadLog; class QueryViewsLog; @@ -202,6 +204,10 @@ using MergeTreeMetadataCachePtr = std::shared_ptr; class PreparedSetsCache; using PreparedSetsCachePtr = std::shared_ptr; +class SessionTracker; + +struct ServerSettings; + /// An empty interface for an arbitrary object that may be attached by a shared pointer /// to query context, when using ClickHouse as a library. struct IHostContext @@ -485,6 +491,7 @@ public: String getUserFilesPath() const; String getDictionariesLibPath() const; String getUserScriptsPath() const; + String getFilesystemCachesPath() const; /// A list of warnings about server configuration to place in `system.warnings` table. Strings getWarnings() const; @@ -495,6 +502,8 @@ public: TemporaryDataOnDiskScopePtr getSharedTempDataOnDisk() const; void setTempDataOnDisk(TemporaryDataOnDiskScopePtr temp_data_on_disk_); + void setFilesystemCachesPath(const String & path); + void setPath(const String & path); void setFlagsPath(const String & path); void setUserFilesPath(const String & path); @@ -531,16 +540,12 @@ public: /// Sets the current user assuming that he/she is already authenticated. /// WARNING: This function doesn't check password! - void setUser(const UUID & user_id_, bool set_current_profiles_ = true, bool set_current_roles_ = true, bool set_current_database_ = true); + void setUser(const UUID & user_id_, const std::optional> & current_roles_ = {}); UserPtr getUser() const; - void setUserID(const UUID & user_id_); std::optional getUserID() const; - String getUserName() const; - void setQuotaKey(String quota_key_); - void setCurrentRoles(const std::vector & current_roles_); void setCurrentRolesDefault(); boost::container::flat_set getCurrentRoles() const; @@ -735,11 +740,11 @@ public: void applySettingsChanges(const SettingsChanges & changes); /// Checks the constraints. - void checkSettingsConstraints(const SettingsProfileElements & profile_elements) const; - void checkSettingsConstraints(const SettingChange & change) const; - void checkSettingsConstraints(const SettingsChanges & changes) const; - void checkSettingsConstraints(SettingsChanges & changes) const; - void clampToSettingsConstraints(SettingsChanges & changes) const; + void checkSettingsConstraints(const SettingsProfileElements & profile_elements, SettingSource source) const; + void checkSettingsConstraints(const SettingChange & change, SettingSource source) const; + void checkSettingsConstraints(const SettingsChanges & changes, SettingSource source) const; + void checkSettingsConstraints(SettingsChanges & changes, SettingSource source) const; + void clampToSettingsConstraints(SettingsChanges & changes, SettingSource source) const; void checkMergeTreeSettingsConstraints(const MergeTreeSettings & merge_tree_settings, const SettingsChanges & changes) const; /// Reset settings to default value @@ -861,6 +866,8 @@ public: OvercommitTracker * getGlobalOvercommitTracker() const; + SessionTracker & getSessionTracker(); + MergeList & getMergeList(); const MergeList & getMergeList() const; @@ -913,44 +920,38 @@ public: void setSystemZooKeeperLogAfterInitializationIfNeeded(); - /// Create a cache of uncompressed blocks of specified size. This can be done only once. - void setUncompressedCache(const String & uncompressed_cache_policy, size_t max_size_in_bytes); - std::shared_ptr getUncompressedCache() const; - void dropUncompressedCache() const; + /// --- Caches ------------------------------------------------------------------------------------------ + + void setUncompressedCache(const String & uncompressed_cache_policy, size_t max_size_in_bytes); + void updateUncompressedCacheConfiguration(const Poco::Util::AbstractConfiguration & config); + std::shared_ptr getUncompressedCache() const; + void clearUncompressedCache() const; - /// Create a cache of marks of specified size. This can be done only once. void setMarkCache(const String & mark_cache_policy, size_t cache_size_in_bytes); + void updateMarkCacheConfiguration(const Poco::Util::AbstractConfiguration & config); std::shared_ptr getMarkCache() const; - void dropMarkCache() const; + void clearMarkCache() const; ThreadPool & getLoadMarksThreadpool() const; - ThreadPool & getPrefetchThreadpool() const; - - /// Note: prefetchThreadpool is different from threadpoolReader - /// in the way that its tasks are - wait for marks to be loaded - /// and make a prefetch by putting a read task to threadpoolReader. - size_t getPrefetchThreadpoolSize() const; - - /// Create a cache of index uncompressed blocks of specified size. This can be done only once. void setIndexUncompressedCache(size_t max_size_in_bytes); + void updateIndexUncompressedCacheConfiguration(const Poco::Util::AbstractConfiguration & config); std::shared_ptr getIndexUncompressedCache() const; - void dropIndexUncompressedCache() const; + void clearIndexUncompressedCache() const; - /// Create a cache of index marks of specified size. This can be done only once. void setIndexMarkCache(size_t cache_size_in_bytes); + void updateIndexMarkCacheConfiguration(const Poco::Util::AbstractConfiguration & config); std::shared_ptr getIndexMarkCache() const; - void dropIndexMarkCache() const; + void clearIndexMarkCache() const; - /// Create a cache of mapped files to avoid frequent open/map/unmap/close and to reuse from several threads. void setMMappedFileCache(size_t cache_size_in_num_entries); + void updateMMappedFileCacheConfiguration(const Poco::Util::AbstractConfiguration & config); std::shared_ptr getMMappedFileCache() const; - void dropMMappedFileCache() const; + void clearMMappedFileCache() const; - /// Create a cache of query results for statements which run repeatedly. - void setQueryCache(const Poco::Util::AbstractConfiguration & config); + void setQueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_rows); void updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config); std::shared_ptr getQueryCache() const; - void dropQueryCache() const; + void clearQueryCache() const; /** Clear the caches of the uncompressed blocks and marks. * This is usually done when renaming tables, changing the type of columns, deleting a table. @@ -958,7 +959,16 @@ public: * (when deleting a table - it is necessary, since in its place another can appear) * const - because the change in the cache is not considered significant. */ - void dropCaches() const; + void clearCaches() const; + + /// ----------------------------------------------------------------------------------------------------- + + ThreadPool & getPrefetchThreadpool() const; + + /// Note: prefetchThreadpool is different from threadpoolReader + /// in the way that its tasks are - wait for marks to be loaded + /// and make a prefetch by putting a read task to threadpoolReader. + size_t getPrefetchThreadpoolSize() const; /// Settings for MergeTree background tasks stored in config.xml BackgroundTaskSchedulingSettings getBackgroundProcessingTaskSchedulingSettings() const; @@ -1019,6 +1029,8 @@ public: std::shared_ptr getFilesystemReadPrefetchesLog() const; std::shared_ptr getAsynchronousInsertLog() const; + std::vector getSystemLogs() const; + /// Returns an object used to log operations with parts if it possible. /// Provide table name to make required checks. std::shared_ptr getPartLog(const String & part_database) const; @@ -1165,9 +1177,6 @@ public: /** Get settings for reading from filesystem. */ ReadSettings getReadSettings() const; - /** Get settings for reading from filesystem for BACKUPs. */ - ReadSettings getBackupReadSettings() const; - /** Get settings for writing to filesystem. */ WriteSettings getWriteSettings() const; @@ -1187,11 +1196,15 @@ public: void setPreparedSetsCache(const PreparedSetsCachePtr & cache); PreparedSetsCachePtr getPreparedSetsCache() const; + const ServerSettings & getServerSettings() const; + private: std::unique_lock getLock() const; void initGlobal(); + void setUserID(const UUID & user_id_); + template void checkAccessImpl(const Args &... args) const; diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 92e6bcb326c..8be334d6223 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -574,7 +574,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) if (create_active_res != Coordination::Error::ZNONODE && create_active_res != Coordination::Error::ZNODEEXISTS) { chassert(Coordination::isHardwareError(create_active_res)); - throw Coordination::Exception(create_active_res, active_node_path); + throw Coordination::Exception::fromPath(create_active_res, active_node_path); } /// Status dirs were not created in enqueueQuery(...) or someone is removing entry diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 13cac5afb1b..dad455d487b 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -14,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -23,6 +25,7 @@ #include #include +#include "Interpreters/Context_fwd.h" #include "config.h" #if USE_MYSQL @@ -35,7 +38,6 @@ # include #endif - namespace CurrentMetrics { extern const Metric TablesToDropQueueSize; @@ -59,6 +61,29 @@ namespace ErrorCodes extern const int UNFINISHED; } +class DatabaseNameHints : public IHints<1, DatabaseNameHints> +{ +public: + explicit DatabaseNameHints(const DatabaseCatalog & database_catalog_) + : database_catalog(database_catalog_) + { + } + Names getAllRegisteredNames() const override + { + Names result; + auto databases_list = database_catalog.getDatabases(); + for (const auto & database_name : databases_list | boost::adaptors::map_keys) + { + if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) + continue; + result.emplace_back(database_name); + } + return result; + } +private: + const DatabaseCatalog & database_catalog; +}; + TemporaryTableHolder::TemporaryTableHolder(ContextPtr context_, const TemporaryTableHolder::Creator & creator, const ASTPtr & query) : WithContext(context_->getGlobalContext()) , temporary_tables(DatabaseCatalog::instance().getDatabaseForTemporaryTables().get()) @@ -313,7 +338,14 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( { assert(!db_and_table.first && !db_and_table.second); if (exception) - exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} doesn't exist", table_id.getNameForLogs())); + { + TableNameHints hints(this->tryGetDatabase(table_id.getDatabaseName()), getContext()); + std::vector names = hints.getHints(table_id.getTableName()); + if (names.empty()) + exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist", table_id.getNameForLogs())); + else + exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist. Maybe you meant {}?", table_id.getNameForLogs(), backQuoteIfNeed(names[0]))); + } return {}; } @@ -336,7 +368,6 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( return db_and_table; } - if (table_id.database_name == TEMPORARY_DATABASE) { /// For temporary tables UUIDs are set in Context::resolveStorageID(...). @@ -360,19 +391,58 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( std::lock_guard lock{databases_mutex}; auto it = databases.find(table_id.getDatabaseName()); - if (databases.end() == it) - { - if (exception) - exception->emplace(Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} doesn't exist", backQuoteIfNeed(table_id.getDatabaseName()))); - return {}; - } - database = it->second; + if (databases.end() != it) + database = it->second; } - auto table = database->tryGetTable(table_id.table_name, context_); - if (!table && exception) - exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} doesn't exist", table_id.getNameForLogs())); + if (!database) + { + if (exception) + { + DatabaseNameHints hints(*this); + std::vector names = hints.getHints(table_id.getDatabaseName()); + if (names.empty()) + { + exception->emplace(Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist", backQuoteIfNeed(table_id.getDatabaseName()))); + } + else + { + exception->emplace(Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist. Maybe you meant {}?", backQuoteIfNeed(table_id.getDatabaseName()), backQuoteIfNeed(names[0]))); + } + } + return {}; + } + StoragePtr table; + if (exception) + { + try + { + table = database->getTable(table_id.table_name, context_); + } + catch (const Exception & e) + { + exception->emplace(e); + } + } + else + { + table = database->tryGetTable(table_id.table_name, context_); + } + + if (!table && exception && !exception->has_value()) + { + TableNameHints hints(this->tryGetDatabase(table_id.getDatabaseName()), getContext()); + std::vector names = hints.getHints(table_id.getTableName()); + if (names.empty()) + { + exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist", table_id.getNameForLogs())); + } + else + { + exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist. Maybe you meant {}?", table_id.getNameForLogs(), backQuoteIfNeed(names[0]))); + } + } if (!table) database = nullptr; @@ -423,8 +493,26 @@ bool DatabaseCatalog::isPredefinedTable(const StorageID & table_id) const void DatabaseCatalog::assertDatabaseExists(const String & database_name) const { - std::lock_guard lock{databases_mutex}; - assertDatabaseExistsUnlocked(database_name); + DatabasePtr db; + { + std::lock_guard lock{databases_mutex}; + assert(!database_name.empty()); + if (auto it = databases.find(database_name); it != databases.end()) + db = it->second; + } + if (!db) + { + DatabaseNameHints hints(*this); + std::vector names = hints.getHints(database_name); + if (names.empty()) + { + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist", backQuoteIfNeed(database_name)); + } + else + { + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist. Maybe you meant {}?", backQuoteIfNeed(database_name), backQuoteIfNeed(names[0])); + } + } } void DatabaseCatalog::assertDatabaseDoesntExist(const String & database_name) const @@ -433,19 +521,11 @@ void DatabaseCatalog::assertDatabaseDoesntExist(const String & database_name) co assertDatabaseDoesntExistUnlocked(database_name); } -void DatabaseCatalog::assertDatabaseExistsUnlocked(const String & database_name) const -{ - assert(!database_name.empty()); - if (databases.end() == databases.find(database_name)) - throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} doesn't exist", backQuoteIfNeed(database_name)); -} - - void DatabaseCatalog::assertDatabaseDoesntExistUnlocked(const String & database_name) const { assert(!database_name.empty()); if (databases.end() != databases.find(database_name)) - throw Exception(ErrorCodes::DATABASE_ALREADY_EXISTS, "Database {} already exists.", backQuoteIfNeed(database_name)); + throw Exception(ErrorCodes::DATABASE_ALREADY_EXISTS, "Database {} already exists", backQuoteIfNeed(database_name)); } void DatabaseCatalog::attachDatabase(const String & database_name, const DatabasePtr & database) @@ -465,18 +545,34 @@ DatabasePtr DatabaseCatalog::detachDatabase(ContextPtr local_context, const Stri { if (database_name == TEMPORARY_DATABASE) throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "Cannot detach database with temporary tables."); - + assert(!database_name.empty()); DatabasePtr db; { std::lock_guard lock{databases_mutex}; - assertDatabaseExistsUnlocked(database_name); - db = databases.find(database_name)->second; - UUID db_uuid = db->getUUID(); - if (db_uuid != UUIDHelpers::Nil) - removeUUIDMapping(db_uuid); - databases.erase(database_name); - } + if (auto it = databases.find(database_name); it != databases.end()) + { + db = it->second; + UUID db_uuid = db->getUUID(); + if (db_uuid != UUIDHelpers::Nil) + removeUUIDMapping(db_uuid); + databases.erase(database_name); + + } + } + if (!db) + { + DatabaseNameHints hints(*this); + std::vector names = hints.getHints(database_name); + if (names.empty()) + { + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist", backQuoteIfNeed(database_name)); + } + else + { + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist. Maybe you meant {}?", backQuoteIfNeed(database_name), backQuoteIfNeed(names[0])); + } + } if (check_empty) { try @@ -512,7 +608,6 @@ DatabasePtr DatabaseCatalog::detachDatabase(ContextPtr local_context, const Stri if (db_uuid != UUIDHelpers::Nil) removeUUIDMappingFinally(db_uuid); } - return db; } @@ -538,9 +633,28 @@ void DatabaseCatalog::updateDatabaseName(const String & old_name, const String & DatabasePtr DatabaseCatalog::getDatabase(const String & database_name) const { - std::lock_guard lock{databases_mutex}; - assertDatabaseExistsUnlocked(database_name); - return databases.find(database_name)->second; + assert(!database_name.empty()); + DatabasePtr db; + { + std::lock_guard lock{databases_mutex}; + if (auto it = databases.find(database_name); it != databases.end()) + db = it->second; + } + + if (!db) + { + DatabaseNameHints hints(*this); + std::vector names = hints.getHints(database_name); + if (names.empty()) + { + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist", backQuoteIfNeed(database_name)); + } + else + { + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist. Maybe you meant {}?", backQuoteIfNeed(database_name), backQuoteIfNeed(names[0])); + } + } + return db; } DatabasePtr DatabaseCatalog::tryGetDatabase(const String & database_name) const diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 805d7786569..50a6ef437bf 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -6,7 +6,10 @@ #include #include #include +#include "Common/NamePrompter.h" #include +#include "Storages/IStorage.h" +#include "Databases/IDatabase.h" #include #include @@ -27,6 +30,32 @@ namespace fs = std::filesystem; namespace DB { +class TableNameHints : public IHints<1, TableNameHints> +{ +public: + TableNameHints(ConstDatabasePtr database_, ContextPtr context_) + : context(context_), + database(database_) + { + } + Names getAllRegisteredNames() const override + { + Names result; + if (database) + { + for (auto table_it = database->getTablesIterator(context); table_it->isValid(); table_it->next()) + { + const auto & storage_id = table_it->table()->getStorageID(); + result.emplace_back(storage_id.getTableName()); + } + } + return result; + } +private: + ContextPtr context; + ConstDatabasePtr database; +}; + class IDatabase; class Exception; class ColumnsDescription; @@ -262,7 +291,6 @@ private: static std::unique_ptr database_catalog; explicit DatabaseCatalog(ContextMutablePtr global_context_); - void assertDatabaseExistsUnlocked(const String & database_name) const TSA_REQUIRES(databases_mutex); void assertDatabaseDoesntExistUnlocked(const String & database_name) const TSA_REQUIRES(databases_mutex); void shutdownImpl(); @@ -280,7 +308,7 @@ private: static inline size_t getFirstLevelIdx(const UUID & uuid) { - return uuid.toUnderType().items[0] >> (64 - bits_for_first_level); + return UUIDHelpers::getHighBytes(uuid) >> (64 - bits_for_first_level); } void dropTableDataTask(); diff --git a/src/Interpreters/DirectJoin.h b/src/Interpreters/DirectJoin.h index e55ac278705..5f664314818 100644 --- a/src/Interpreters/DirectJoin.h +++ b/src/Interpreters/DirectJoin.h @@ -30,6 +30,7 @@ public: std::shared_ptr storage_, const Block & right_sample_block_with_storage_column_names_); + std::string getName() const override { return "DirectKeyValueJoin"; } virtual const TableJoin & getTableJoin() const override { return *table_join; } virtual bool addBlockToJoin(const Block &, bool) override; diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 3d821b60e81..ded99df9f74 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -98,7 +98,7 @@ static auto getQueryInterpreter(const ASTSubquery & subquery, ExecuteScalarSubqu void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr & ast, Data & data) { auto hash = subquery.getTreeHash(); - auto scalar_query_hash_str = toString(hash.first) + "_" + toString(hash.second); + const auto scalar_query_hash_str = toString(hash); std::unique_ptr interpreter = nullptr; bool hit = false; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 9aee61eb8f0..acedede579b 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1130,9 +1130,17 @@ JoinPtr SelectQueryExpressionAnalyzer::makeJoin( if (auto storage = analyzed_join->getStorageJoin()) { + auto joined_block_actions = analyzed_join->createJoinedBlockActions(getContext()); + NamesWithAliases required_columns_with_aliases = analyzed_join->getRequiredColumns( + Block(joined_block_actions->getResultColumns()), joined_block_actions->getRequiredColumns().getNames()); + + Names original_right_column_names; + for (auto & pr : required_columns_with_aliases) + original_right_column_names.push_back(pr.first); + auto right_columns = storage->getRightSampleBlock().getColumnsWithTypeAndName(); std::tie(left_convert_actions, right_convert_actions) = analyzed_join->createConvertingActions(left_columns, right_columns); - return storage->getJoinLocked(analyzed_join, getContext()); + return storage->getJoinLocked(analyzed_join, getContext(), original_right_column_names); } joined_plan = buildJoinedPlan(getContext(), join_element, *analyzed_join, query_options); diff --git a/src/Interpreters/FilesystemCacheLog.cpp b/src/Interpreters/FilesystemCacheLog.cpp index 17f0fda71ec..b660db064d1 100644 --- a/src/Interpreters/FilesystemCacheLog.cpp +++ b/src/Interpreters/FilesystemCacheLog.cpp @@ -40,6 +40,8 @@ NamesAndTypesList FilesystemCacheLogElement::getNamesAndTypes() {"source_file_path", std::make_shared()}, {"file_segment_range", std::make_shared(types)}, {"total_requested_range", std::make_shared(types)}, + {"key", std::make_shared()}, + {"offset", std::make_shared()}, {"size", std::make_shared()}, {"read_type", std::make_shared()}, {"read_from_cache_attempted", std::make_shared()}, @@ -60,6 +62,8 @@ void FilesystemCacheLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(source_file_path); columns[i++]->insert(Tuple{file_segment_range.first, file_segment_range.second}); columns[i++]->insert(Tuple{requested_range.first, requested_range.second}); + columns[i++]->insert(file_segment_key); + columns[i++]->insert(file_segment_offset); columns[i++]->insert(file_segment_size); columns[i++]->insert(typeToString(cache_type)); columns[i++]->insert(read_from_cache_attempted); diff --git a/src/Interpreters/FilesystemCacheLog.h b/src/Interpreters/FilesystemCacheLog.h index 1b22d561c51..0d088a922e0 100644 --- a/src/Interpreters/FilesystemCacheLog.h +++ b/src/Interpreters/FilesystemCacheLog.h @@ -11,16 +11,7 @@ namespace DB { -/// -/// -------- Column --------- Type ------ -/// | event_date | DateTime | -/// | event_time | UInt64 | -/// | query_id | String | -/// | remote_file_path | String | -/// | segment_range | Tuple | -/// | read_type | String | -/// ------------------------------------- -/// + struct FilesystemCacheLogElement { enum class CacheType @@ -39,6 +30,8 @@ struct FilesystemCacheLogElement std::pair file_segment_range{}; std::pair requested_range{}; CacheType cache_type{}; + std::string file_segment_key; + size_t file_segment_offset; size_t file_segment_size; bool read_from_cache_attempted; String read_buffer_id; diff --git a/src/Interpreters/FullSortingMergeJoin.h b/src/Interpreters/FullSortingMergeJoin.h index a6b53a51c04..3fc9f8920ed 100644 --- a/src/Interpreters/FullSortingMergeJoin.h +++ b/src/Interpreters/FullSortingMergeJoin.h @@ -28,6 +28,7 @@ public: LOG_TRACE(&Poco::Logger::get("FullSortingMergeJoin"), "Will use full sorting merge join"); } + std::string getName() const override { return "FullSortingMergeJoin"; } const TableJoin & getTableJoin() const override { return *table_join; } bool addBlockToJoin(const Block & /* block */, bool /* check_limits */) override diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 5b633fee9b6..08378c3158b 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -115,7 +115,7 @@ public: if (alias.empty()) { auto hash = subquery_or_table_name->getTreeHash(); - external_table_name = fmt::format("_data_{}_{}", hash.first, hash.second); + external_table_name = fmt::format("_data_{}", toString(hash)); } else external_table_name = alias; diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 5d72cf20740..89ea3a326cc 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -1,15 +1,11 @@ #include #include #include -#include #include #include #include -#include -#include -#include #include #include @@ -18,6 +14,9 @@ #include +#include + + namespace CurrentMetrics { extern const Metric TemporaryFilesForJoin; diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h index ce519892b0e..44949440467 100644 --- a/src/Interpreters/GraceHashJoin.h +++ b/src/Interpreters/GraceHashJoin.h @@ -60,6 +60,7 @@ public: ~GraceHashJoin() override; + std::string getName() const override { return "GraceHashJoin"; } const TableJoin & getTableJoin() const override { return *table_join; } void initialize(const Block & sample_block) override; diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index 56dea98c1f1..9f55945816c 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -151,6 +151,7 @@ public: ~HashJoin() override; + std::string getName() const override { return "HashJoin"; } const TableJoin & getTableJoin() const override { return *table_join; } /** Add block of data from right hand of JOIN to the map. diff --git a/src/Interpreters/IJoin.h b/src/Interpreters/IJoin.h index 97b119bd795..493a5dd2126 100644 --- a/src/Interpreters/IJoin.h +++ b/src/Interpreters/IJoin.h @@ -48,6 +48,8 @@ class IJoin public: virtual ~IJoin() = default; + virtual std::string getName() const = 0; + virtual const TableJoin & getTableJoin() const = 0; /// Add block of data from right hand of JOIN. diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index e82415f1aca..7d1f9948238 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -132,6 +132,21 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot MATERIALIZE TTL as there is no TTL set for table {}", table->getStorageID().getNameForLogs()); + if (mut_command->type == MutationCommand::UPDATE || mut_command->type == MutationCommand::DELETE) + { + /// TODO: add a check for result query size. + auto rewritten_command_ast = replaceNonDeterministicToScalars(*command_ast, getContext()); + if (rewritten_command_ast) + { + auto * new_alter_command = rewritten_command_ast->as(); + mut_command = MutationCommand::parse(new_alter_command); + if (!mut_command) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Alter command '{}' is rewritten to invalid command '{}'", + queryToString(*command_ast), queryToString(*rewritten_command_ast)); + } + } + mutation_commands.emplace_back(std::move(*mut_command)); } else diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.cpp b/src/Interpreters/InterpreterCreateFunctionQuery.cpp index d56b5029e41..3e87f4fe440 100644 --- a/src/Interpreters/InterpreterCreateFunctionQuery.cpp +++ b/src/Interpreters/InterpreterCreateFunctionQuery.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -18,7 +19,8 @@ namespace ErrorCodes BlockIO InterpreterCreateFunctionQuery::execute() { - ASTCreateFunctionQuery & create_function_query = query_ptr->as(); + const auto updated_query_ptr = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + ASTCreateFunctionQuery & create_function_query = updated_query_ptr->as(); AccessRightsElements access_rights_elements; access_rights_elements.emplace_back(AccessType::CREATE_FUNCTION); @@ -35,7 +37,7 @@ BlockIO InterpreterCreateFunctionQuery::execute() DDLQueryOnClusterParams params; params.access_to_check = std::move(access_rights_elements); - return executeDDLQueryOnCluster(query_ptr, current_context, params); + return executeDDLQueryOnCluster(updated_query_ptr, current_context, params); } current_context->checkAccess(access_rights_elements); @@ -44,7 +46,7 @@ BlockIO InterpreterCreateFunctionQuery::execute() bool throw_if_exists = !create_function_query.if_not_exists && !create_function_query.or_replace; bool replace_if_exists = create_function_query.or_replace; - UserDefinedSQLFunctionFactory::instance().registerFunction(current_context, function_name, query_ptr, throw_if_exists, replace_if_exists); + UserDefinedSQLFunctionFactory::instance().registerFunction(current_context, function_name, updated_query_ptr, throw_if_exists, replace_if_exists); return {}; } diff --git a/src/Interpreters/InterpreterCreateIndexQuery.cpp b/src/Interpreters/InterpreterCreateIndexQuery.cpp index 752bc6200ce..3b47a002e50 100644 --- a/src/Interpreters/InterpreterCreateIndexQuery.cpp +++ b/src/Interpreters/InterpreterCreateIndexQuery.cpp @@ -16,6 +16,7 @@ namespace ErrorCodes { extern const int TABLE_IS_READ_ONLY; extern const int INCORRECT_QUERY; + extern const int NOT_IMPLEMENTED; } @@ -24,6 +25,15 @@ BlockIO InterpreterCreateIndexQuery::execute() auto current_context = getContext(); const auto & create_index = query_ptr->as(); + if (create_index.unique) + { + if (!current_context->getSettingsRef().create_index_ignore_unique) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "CREATE UNIQUE INDEX is not supported." + " SET create_index_ignore_unique=1 to ignore this UNIQUE keyword."); + } + + } // Noop if allow_create_index_without_type = true. throw otherwise if (!create_index.index_decl->as()->type) { diff --git a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp index bac59998062..3b0fba5fd9f 100644 --- a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp @@ -1,5 +1,4 @@ #include - #include #include #include diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 745dda34828..92d74f4f18a 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -71,7 +72,6 @@ #include #include -#include #include #include @@ -704,6 +704,9 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti if (index_desc.type == "annoy" && !settings.allow_experimental_annoy_index) throw Exception(ErrorCodes::INCORRECT_QUERY, "Annoy index is disabled. Turn on allow_experimental_annoy_index"); + if (index_desc.type == "usearch" && !settings.allow_experimental_usearch_index) + throw Exception(ErrorCodes::INCORRECT_QUERY, "USearch index is disabled. Turn on allow_experimental_usearch_index"); + properties.indices.push_back(index_desc); } if (create.columns_list->projections) @@ -764,7 +767,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti /// Table function without columns list. auto table_function_ast = create.as_table_function->ptr(); auto table_function = TableFunctionFactory::instance().get(table_function_ast, getContext()); - properties.columns = table_function->getActualTableStructure(getContext()); + properties.columns = table_function->getActualTableStructure(getContext(), /*is_insert_query*/ true); } else if (create.is_dictionary) { @@ -1329,10 +1332,32 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, } data_path = database->getTableDataPath(create); + auto full_data_path = fs::path{getContext()->getPath()} / data_path; - if (!create.attach && !data_path.empty() && fs::exists(fs::path{getContext()->getPath()} / data_path)) - throw Exception(storage_already_exists_error_code, - "Directory for {} data {} already exists", Poco::toLower(storage_name), String(data_path)); + if (!create.attach && !data_path.empty() && fs::exists(full_data_path)) + { + if (getContext()->getZooKeeperMetadataTransaction() && + !getContext()->getZooKeeperMetadataTransaction()->isInitialQuery() && + !DatabaseCatalog::instance().hasUUIDMapping(create.uuid) && + Context::getGlobalContextInstance()->isServerCompletelyStarted() && + Context::getGlobalContextInstance()->getConfigRef().getBool("allow_moving_table_directory_to_trash", false)) + { + /// This is a secondary query from a Replicated database. It cannot be retried with another UUID, we must execute it as is. + /// We don't have a table with this UUID (and all metadata is loaded), + /// so the existing directory probably contains some leftovers from previous unsuccessful attempts to create the table + + fs::path trash_path = fs::path{getContext()->getPath()} / "trash" / data_path / getHexUIntLowercase(thread_local_rng()); + LOG_WARNING(&Poco::Logger::get("InterpreterCreateQuery"), "Directory for {} data {} already exists. Will move it to {}", + Poco::toLower(storage_name), String(data_path), trash_path); + fs::create_directories(trash_path.parent_path()); + renameNoReplace(full_data_path, trash_path); + } + else + { + throw Exception(storage_already_exists_error_code, + "Directory for {} data {} already exists", Poco::toLower(storage_name), String(data_path)); + } + } bool from_path = create.attach_from_path.has_value(); String actual_data_path = data_path; diff --git a/src/Interpreters/InterpreterDescribeCacheQuery.cpp b/src/Interpreters/InterpreterDescribeCacheQuery.cpp index 7822ecdb8be..f9c61afb4d8 100644 --- a/src/Interpreters/InterpreterDescribeCacheQuery.cpp +++ b/src/Interpreters/InterpreterDescribeCacheQuery.cpp @@ -25,7 +25,6 @@ static Block getSampleBlock() ColumnWithTypeAndName{std::make_shared(), "current_size"}, ColumnWithTypeAndName{std::make_shared(), "current_elements"}, ColumnWithTypeAndName{std::make_shared(), "path"}, - ColumnWithTypeAndName{std::make_shared>(), "delayed_cleanup_interval_ms"}, ColumnWithTypeAndName{std::make_shared>(), "background_download_threads"}, ColumnWithTypeAndName{std::make_shared>(), "enable_bypass_cache_with_threshold"}, }; @@ -54,7 +53,6 @@ BlockIO InterpreterDescribeCacheQuery::execute() res_columns[i++]->insert(cache->getUsedCacheSize()); res_columns[i++]->insert(cache->getFileSegmentsNum()); res_columns[i++]->insert(cache->getBasePath()); - res_columns[i++]->insert(settings.delayed_cleanup_interval_ms); res_columns[i++]->insert(settings.background_download_threads); res_columns[i++]->insert(settings.enable_bypass_cache_with_threashold); diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index 0b9eea86b46..26d9e5254f3 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -96,7 +96,7 @@ BlockIO InterpreterDescribeQuery::execute() else if (table_expression.table_function) { TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression.table_function, getContext()); - auto table_function_column_descriptions = table_function_ptr->getActualTableStructure(getContext()); + auto table_function_column_descriptions = table_function_ptr->getActualTableStructure(getContext(), /*is_insert_query*/ true); for (const auto & table_function_column_description : table_function_column_descriptions) columns.emplace_back(table_function_column_description); } diff --git a/src/Interpreters/InterpreterDropFunctionQuery.cpp b/src/Interpreters/InterpreterDropFunctionQuery.cpp index df81ae661c7..af60d9c5df7 100644 --- a/src/Interpreters/InterpreterDropFunctionQuery.cpp +++ b/src/Interpreters/InterpreterDropFunctionQuery.cpp @@ -1,12 +1,13 @@ -#include +#include #include #include #include #include #include -#include #include +#include +#include namespace DB @@ -20,7 +21,9 @@ namespace ErrorCodes BlockIO InterpreterDropFunctionQuery::execute() { FunctionNameNormalizer().visit(query_ptr.get()); - ASTDropFunctionQuery & drop_function_query = query_ptr->as(); + + const auto updated_query_ptr = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + ASTDropFunctionQuery & drop_function_query = updated_query_ptr->as(); AccessRightsElements access_rights_elements; access_rights_elements.emplace_back(AccessType::DROP_FUNCTION); @@ -34,7 +37,7 @@ BlockIO InterpreterDropFunctionQuery::execute() DDLQueryOnClusterParams params; params.access_to_check = std::move(access_rights_elements); - return executeDDLQueryOnCluster(query_ptr, current_context, params); + return executeDDLQueryOnCluster(updated_query_ptr, current_context, params); } current_context->checkAccess(access_rights_elements); diff --git a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp index cc3444bb4df..fe49b1cfd7c 100644 --- a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp @@ -22,11 +22,7 @@ BlockIO InterpreterDropNamedCollectionQuery::execute() return executeDDLQueryOnCluster(query_ptr, current_context, params); } - if (query.if_exists) - NamedCollectionUtils::removeIfExistsFromSQL(query.collection_name, current_context); - else - NamedCollectionUtils::removeFromSQL(query.collection_name, current_context); - + NamedCollectionUtils::removeFromSQL(query, current_context); return {}; } diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index ed927d550a8..f746732ca9d 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -247,10 +247,10 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue DatabaseCatalog::instance().removeDependencies(table_id, check_ref_deps, check_loading_deps, is_drop_or_detach_database); database->dropTable(context_, table_id.table_name, query.sync); - /// We have to drop mmapio cache when dropping table from Ordinary database + /// We have to clear mmapio cache when dropping table from Ordinary database /// to avoid reading old data if new table with the same name is created if (database->getUUID() == UUIDHelpers::Nil) - context_->dropMMappedFileCache(); + context_->clearMMappedFileCache(); } db = database; @@ -329,13 +329,12 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, database = tryGetDatabase(database_name, query.if_exists); if (database) { - if (query.kind == ASTDropQuery::Kind::Truncate) - { - throw Exception(ErrorCodes::SYNTAX_ERROR, "Unable to truncate database"); - } - else if (query.kind == ASTDropQuery::Kind::Detach || query.kind == ASTDropQuery::Kind::Drop) + if (query.kind == ASTDropQuery::Kind::Detach || query.kind == ASTDropQuery::Kind::Drop + || query.kind == ASTDropQuery::Kind::Truncate) { bool drop = query.kind == ASTDropQuery::Kind::Drop; + bool truncate = query.kind == ASTDropQuery::Kind::Truncate; + getContext()->checkAccess(AccessType::DROP_DATABASE, database_name); if (query.kind == ASTDropQuery::Kind::Detach && query.permanently) @@ -348,6 +347,9 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, { ASTDropQuery query_for_table; query_for_table.kind = query.kind; + // For truncate operation on database, drop the tables + if (truncate) + query_for_table.kind = ASTDropQuery::Kind::Drop; query_for_table.if_exists = true; query_for_table.setDatabase(database_name); query_for_table.sync = query.sync; @@ -375,8 +377,8 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, uuids_to_wait.push_back(table_to_wait); } } - - if (!drop && query.sync) + // only if operation is DETACH + if ((!drop || !truncate) && query.sync) { /// Avoid "some tables are still in use" when sync mode is enabled for (const auto & table_uuid : uuids_to_wait) @@ -385,12 +387,13 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, /// Protects from concurrent CREATE TABLE queries auto db_guard = DatabaseCatalog::instance().getExclusiveDDLGuardForDatabase(database_name); - - if (!drop) + // only if operation is DETACH + if (!drop || !truncate) database->assertCanBeDetached(true); - /// DETACH or DROP database itself - DatabaseCatalog::instance().detachDatabase(getContext(), database_name, drop, database->shouldBeEmptyOnDetach()); + /// DETACH or DROP database itself. If TRUNCATE skip dropping/erasing the database. + if (!truncate) + DatabaseCatalog::instance().detachDatabase(getContext(), database_name, drop, database->shouldBeEmptyOnDetach()); } } @@ -432,8 +435,9 @@ AccessRightsElements InterpreterDropQuery::getRequiredAccessForDDLOnCluster() co } void InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind kind, ContextPtr global_context, ContextPtr current_context, - const StorageID & target_table_id, bool sync, bool ignore_sync_setting) + const StorageID & target_table_id, bool sync, bool ignore_sync_setting, bool need_ddl_guard) { + auto ddl_guard = (need_ddl_guard ? DatabaseCatalog::instance().getDDLGuard(target_table_id.database_name, target_table_id.table_name) : nullptr); if (DatabaseCatalog::instance().tryGetTable(target_table_id, current_context)) { /// We create and execute `drop` query for internal table. diff --git a/src/Interpreters/InterpreterDropQuery.h b/src/Interpreters/InterpreterDropQuery.h index af7a4ddef25..7ae544a7356 100644 --- a/src/Interpreters/InterpreterDropQuery.h +++ b/src/Interpreters/InterpreterDropQuery.h @@ -25,7 +25,7 @@ public: BlockIO execute() override; static void executeDropQuery(ASTDropQuery::Kind kind, ContextPtr global_context, ContextPtr current_context, - const StorageID & target_table_id, bool sync, bool ignore_sync_setting = false); + const StorageID & target_table_id, bool sync, bool ignore_sync_setting = false, bool need_ddl_guard = false); bool supportsTransactions() const override; diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 3a381cd8dab..39cc4df5c2d 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -541,13 +541,13 @@ QueryPipeline InterpreterExplainQuery::executeImpl() InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), getContext(), SelectQueryOptions()); interpreter.buildQueryPlan(plan); context = interpreter.getContext(); - // collect the selected marks, rows, parts during build query pipeline. - plan.buildQueryPipeline( + // Collect the selected marks, rows, parts during build query pipeline. + // Hold on to the returned QueryPipelineBuilderPtr because `plan` may have pointers into + // it (through QueryPlanResourceHolder). + auto builder = plan.buildQueryPipeline( QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); - if (settings.optimize) - plan.optimize(QueryPlanOptimizationSettings::fromContext(context)); plan.explainEstimate(res_columns); insert_buf = false; break; diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index 9cd1f2a251c..27c94119750 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -96,6 +97,7 @@ #include #include #include +#include #include #include #include @@ -314,6 +316,10 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut { return std::make_unique(query, context); } + else if (query->as()) + { + return std::make_unique(query, context); + } else if (query->as()) { return std::make_unique(query, context); diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 3330159aff5..590b7fe37b8 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -24,7 +24,6 @@ #include #include #include -#include #include @@ -296,7 +295,7 @@ BlockIO InterpreterKillQueryQuery::execute() if (res_columns[0]->empty() && access_denied) throw Exception(ErrorCodes::ACCESS_DENIED, "Not allowed to kill mutation. " - "To execute this query it's necessary to have the grant {}", required_access_rights.toString()); + "To execute this query, it's necessary to have the grant {}", required_access_rights.toString()); res_io.pipeline = QueryPipeline(Pipe(std::make_shared(header.cloneWithColumns(std::move(res_columns))))); @@ -360,7 +359,7 @@ BlockIO InterpreterKillQueryQuery::execute() if (res_columns[0]->empty() && access_denied) throw Exception(ErrorCodes::ACCESS_DENIED, "Not allowed to kill move partition. " - "To execute this query it's necessary to have the grant {}", required_access_rights.toString()); + "To execute this query, it's necessary to have the grant {}", required_access_rights.toString()); res_io.pipeline = QueryPipeline(Pipe(std::make_shared(header.cloneWithColumns(std::move(res_columns))))); diff --git a/src/Interpreters/InterpreterRenameQuery.h b/src/Interpreters/InterpreterRenameQuery.h index 25c707d9962..79a6c0ddef4 100644 --- a/src/Interpreters/InterpreterRenameQuery.h +++ b/src/Interpreters/InterpreterRenameQuery.h @@ -10,7 +10,7 @@ namespace DB class AccessRightsElements; class DDLGuard; -/// To avoid deadlocks, we must acquire locks for tables in same order in any different RENAMES. +/// To avoid deadlocks, we must acquire locks for tables in same order in any different RENAMEs. struct UniqueTableName { String database_name; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 20fca1b1e76..0241f5f105f 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -299,7 +299,7 @@ void checkAccessRightsForSelect( } throw Exception( ErrorCodes::ACCESS_DENIED, - "{}: Not enough privileges. To execute this query it's necessary to have grant SELECT for at least one column on {}", + "{}: Not enough privileges. To execute this query, it's necessary to have the grant SELECT for at least one column on {}", context->getUserName(), table_id.getFullTableName()); } @@ -1193,12 +1193,12 @@ static InterpolateDescriptionPtr getInterpolateDescription( } col_set.clear(); - for (const auto & column : source_block) + for (const auto & column : result_block) { source_columns.emplace_back(column.name, column.type); col_set.insert(column.name); } - for (const auto & column : result_block) + for (const auto & column : source_block) if (!col_set.contains(column.name)) source_columns.emplace_back(column.name, column.type); } diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp index 8db1d27c073..b8cace5e0ad 100644 --- a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp @@ -184,7 +184,7 @@ InterpreterSelectQueryAnalyzer::InterpreterSelectQueryAnalyzer( , context(buildContext(context_, select_query_options_)) , select_query_options(select_query_options_) , query_tree(query_tree_) - , planner(query_tree_, select_query_options_) + , planner(query_tree_, select_query_options) { } diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp index e9118b747e5..2c0baa0d4b3 100644 --- a/src/Interpreters/InterpreterSetQuery.cpp +++ b/src/Interpreters/InterpreterSetQuery.cpp @@ -15,7 +15,7 @@ namespace DB BlockIO InterpreterSetQuery::execute() { const auto & ast = query_ptr->as(); - getContext()->checkSettingsConstraints(ast.changes); + getContext()->checkSettingsConstraints(ast.changes, SettingSource::QUERY); auto session_context = getContext()->getSessionContext(); session_context->applySettingsChanges(ast.changes); session_context->addQueryParameters(ast.query_parameters); @@ -28,7 +28,7 @@ void InterpreterSetQuery::executeForCurrentContext(bool ignore_setting_constrain { const auto & ast = query_ptr->as(); if (!ignore_setting_constraints) - getContext()->checkSettingsConstraints(ast.changes); + getContext()->checkSettingsConstraints(ast.changes, SettingSource::QUERY); getContext()->applySettingsChanges(ast.changes); getContext()->resetSettingsToDefaultValue(ast.default_settings); } diff --git a/src/Interpreters/InterpreterShowTablesQuery.cpp b/src/Interpreters/InterpreterShowTablesQuery.cpp index 2f1a4a32bee..5fe0a862e05 100644 --- a/src/Interpreters/InterpreterShowTablesQuery.cpp +++ b/src/Interpreters/InterpreterShowTablesQuery.cpp @@ -116,10 +116,10 @@ String InterpreterShowTablesQuery::getRewrittenQuery() return rewritten_query.str(); } - if (query.temporary && !query.from.empty()) + if (query.temporary && !query.getFrom().empty()) throw Exception(ErrorCodes::SYNTAX_ERROR, "The `FROM` and `TEMPORARY` cannot be used together in `SHOW TABLES`"); - String database = getContext()->resolveDatabase(query.from); + String database = getContext()->resolveDatabase(query.getFrom()); DatabaseCatalog::instance().assertDatabaseExists(database); WriteBufferFromOwnString rewritten_query; diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 3207da9941a..dd0ee6b4444 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -89,53 +89,48 @@ namespace ErrorCodes namespace ActionLocks { - extern StorageActionBlockType PartsMerge; - extern StorageActionBlockType PartsFetch; - extern StorageActionBlockType PartsSend; - extern StorageActionBlockType ReplicationQueue; - extern StorageActionBlockType DistributedSend; - extern StorageActionBlockType PartsTTLMerge; - extern StorageActionBlockType PartsMove; + extern const StorageActionBlockType PartsMerge; + extern const StorageActionBlockType PartsFetch; + extern const StorageActionBlockType PartsSend; + extern const StorageActionBlockType ReplicationQueue; + extern const StorageActionBlockType DistributedSend; + extern const StorageActionBlockType PartsTTLMerge; + extern const StorageActionBlockType PartsMove; + extern const StorageActionBlockType PullReplicationLog; } namespace { -ExecutionStatus getOverallExecutionStatusOfCommands() +/// Sequentially tries to execute all commands and throws exception with info about failed commands +void executeCommandsAndThrowIfError(std::vector> commands) { - return ExecutionStatus(0); -} + ExecutionStatus result(0); + for (auto & command : commands) + { + try + { + command(); + } + catch (...) + { + ExecutionStatus current_result = ExecutionStatus::fromCurrentException(); -/// Consequently tries to execute all commands and generates final exception message for failed commands -template -ExecutionStatus getOverallExecutionStatusOfCommands(Callable && command, Callables && ... commands) -{ - ExecutionStatus status_head(0); - try - { - command(); - } - catch (...) - { - status_head = ExecutionStatus::fromCurrentException(); + if (result.code == 0) + result.code = current_result.code; + + if (!current_result.message.empty()) + { + if (!result.message.empty()) + result.message += '\n'; + result.message += current_result.message; + } + } } - ExecutionStatus status_tail = getOverallExecutionStatusOfCommands(std::forward(commands)...); - - auto res_status = status_head.code != 0 ? status_head.code : status_tail.code; - auto res_message = status_head.message + (status_tail.message.empty() ? "" : ("\n" + status_tail.message)); - - return ExecutionStatus(res_status, res_message); -} - -/// Consequently tries to execute all commands and throws exception with info about failed commands -template -void executeCommandsAndThrowIfError(Callables && ... commands) -{ - auto status = getOverallExecutionStatusOfCommands(std::forward(commands)...); - if (status.code != 0) - throw Exception::createDeprecated(status.message, status.code); + if (result.code != 0) + throw Exception::createDeprecated(result.message, result.code); } @@ -155,6 +150,8 @@ AccessType getRequiredAccessType(StorageActionBlockType action_type) return AccessType::SYSTEM_TTL_MERGES; else if (action_type == ActionLocks::PartsMove) return AccessType::SYSTEM_MOVES; + else if (action_type == ActionLocks::PullReplicationLog) + return AccessType::SYSTEM_PULLING_REPLICATION_LOG; else throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown action type: {}", std::to_string(action_type)); } @@ -322,33 +319,33 @@ BlockIO InterpreterSystemQuery::execute() } case Type::DROP_MARK_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_MARK_CACHE); - system_context->dropMarkCache(); + system_context->clearMarkCache(); break; case Type::DROP_UNCOMPRESSED_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_UNCOMPRESSED_CACHE); - system_context->dropUncompressedCache(); + system_context->clearUncompressedCache(); break; case Type::DROP_INDEX_MARK_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_MARK_CACHE); - system_context->dropIndexMarkCache(); + system_context->clearIndexMarkCache(); break; case Type::DROP_INDEX_UNCOMPRESSED_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_UNCOMPRESSED_CACHE); - system_context->dropIndexUncompressedCache(); + system_context->clearIndexUncompressedCache(); break; case Type::DROP_MMAP_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_MMAP_CACHE); - system_context->dropMMappedFileCache(); + system_context->clearMMappedFileCache(); break; case Type::DROP_QUERY_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_QUERY_CACHE); - getContext()->dropQueryCache(); + getContext()->clearQueryCache(); break; #if USE_EMBEDDED_COMPILER case Type::DROP_COMPILED_EXPRESSION_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_COMPILED_EXPRESSION_CACHE); if (auto * cache = CompiledExpressionCacheFactory::instance().tryGetCache()) - cache->reset(); + cache->clear(); break; #endif #if USE_AWS_S3 @@ -371,7 +368,18 @@ BlockIO InterpreterSystemQuery::execute() else { auto cache = FileCacheFactory::instance().getByName(query.filesystem_cache_name).cache; - cache->removeAllReleasable(); + if (query.key_to_drop.empty()) + { + cache->removeAllReleasable(); + } + else + { + auto key = FileCacheKey::fromKeyString(query.key_to_drop); + if (query.offset_to_drop.has_value()) + cache->removeFileSegment(key, query.offset_to_drop.value()); + else + cache->removeKey(key); + } } break; } @@ -411,10 +419,10 @@ BlockIO InterpreterSystemQuery::execute() case Type::RELOAD_DICTIONARIES: { getContext()->checkAccess(AccessType::SYSTEM_RELOAD_DICTIONARY); - executeCommandsAndThrowIfError( + executeCommandsAndThrowIfError({ [&] { system_context->getExternalDictionariesLoader().reloadAllTriedToLoad(); }, [&] { system_context->getEmbeddedDictionaries().reload(); } - ); + }); ExternalDictionariesLoader::resetAll(); break; } @@ -502,6 +510,12 @@ BlockIO InterpreterSystemQuery::execute() case Type::START_DISTRIBUTED_SENDS: startStopAction(ActionLocks::DistributedSend, true); break; + case Type::STOP_PULLING_REPLICATION_LOG: + startStopAction(ActionLocks::PullReplicationLog, false); + break; + case Type::START_PULLING_REPLICATION_LOG: + startStopAction(ActionLocks::PullReplicationLog, true); + break; case Type::DROP_REPLICA: dropReplica(query); break; @@ -537,23 +551,14 @@ BlockIO InterpreterSystemQuery::execute() case Type::FLUSH_LOGS: { getContext()->checkAccess(AccessType::SYSTEM_FLUSH_LOGS); - executeCommandsAndThrowIfError( - [&] { if (auto query_log = getContext()->getQueryLog()) query_log->flush(true); }, - [&] { if (auto part_log = getContext()->getPartLog("")) part_log->flush(true); }, - [&] { if (auto query_thread_log = getContext()->getQueryThreadLog()) query_thread_log->flush(true); }, - [&] { if (auto trace_log = getContext()->getTraceLog()) trace_log->flush(true); }, - [&] { if (auto text_log = getContext()->getTextLog()) text_log->flush(true); }, - [&] { if (auto metric_log = getContext()->getMetricLog()) metric_log->flush(true); }, - [&] { if (auto asynchronous_metric_log = getContext()->getAsynchronousMetricLog()) asynchronous_metric_log->flush(true); }, - [&] { if (auto opentelemetry_span_log = getContext()->getOpenTelemetrySpanLog()) opentelemetry_span_log->flush(true); }, - [&] { if (auto query_views_log = getContext()->getQueryViewsLog()) query_views_log->flush(true); }, - [&] { if (auto zookeeper_log = getContext()->getZooKeeperLog()) zookeeper_log->flush(true); }, - [&] { if (auto session_log = getContext()->getSessionLog()) session_log->flush(true); }, - [&] { if (auto transactions_info_log = getContext()->getTransactionsInfoLog()) transactions_info_log->flush(true); }, - [&] { if (auto processors_profile_log = getContext()->getProcessorsProfileLog()) processors_profile_log->flush(true); }, - [&] { if (auto cache_log = getContext()->getFilesystemCacheLog()) cache_log->flush(true); }, - [&] { if (auto asynchronous_insert_log = getContext()->getAsynchronousInsertLog()) asynchronous_insert_log->flush(true); } - ); + + auto logs = getContext()->getSystemLogs(); + std::vector> commands; + commands.reserve(logs.size()); + for (auto * system_log : logs) + commands.emplace_back([system_log] { system_log->flush(true); }); + + executeCommandsAndThrowIfError(commands); break; } case Type::STOP_LISTEN: @@ -1079,6 +1084,15 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_MOVES, query.getDatabase(), query.getTable()); break; } + case Type::STOP_PULLING_REPLICATION_LOG: + case Type::START_PULLING_REPLICATION_LOG: + { + if (!query.table) + required_access.emplace_back(AccessType::SYSTEM_PULLING_REPLICATION_LOG); + else + required_access.emplace_back(AccessType::SYSTEM_PULLING_REPLICATION_LOG, query.getDatabase(), query.getTable()); + break; + } case Type::STOP_FETCHES: case Type::START_FETCHES: { diff --git a/src/Interpreters/InterpreterUseQuery.cpp b/src/Interpreters/InterpreterUseQuery.cpp index d8a5ae57470..b71f3a9cc1c 100644 --- a/src/Interpreters/InterpreterUseQuery.cpp +++ b/src/Interpreters/InterpreterUseQuery.cpp @@ -10,7 +10,7 @@ namespace DB BlockIO InterpreterUseQuery::execute() { - const String & new_database = query_ptr->as().database; + const String & new_database = query_ptr->as().getDatabase(); getContext()->checkAccess(AccessType::SHOW_DATABASES, new_database); getContext()->getSessionContext()->setCurrentDatabase(new_database); return {}; diff --git a/src/Interpreters/JIT/CompileDAG.cpp b/src/Interpreters/JIT/CompileDAG.cpp index 6da17fb4c67..9c56341eae0 100644 --- a/src/Interpreters/JIT/CompileDAG.cpp +++ b/src/Interpreters/JIT/CompileDAG.cpp @@ -160,9 +160,7 @@ UInt128 CompileDAG::hash() const } } - UInt128 result; - hash.get128(result); - return result; + return hash.get128(); } } diff --git a/src/Interpreters/JIT/CompiledExpressionCache.h b/src/Interpreters/JIT/CompiledExpressionCache.h index 21f7c67226c..8354b33bae9 100644 --- a/src/Interpreters/JIT/CompiledExpressionCache.h +++ b/src/Interpreters/JIT/CompiledExpressionCache.h @@ -19,7 +19,7 @@ public: size_t getCompiledExpressionSize() const { return compiled_expression_size; } - virtual ~CompiledExpressionCacheEntry() {} + virtual ~CompiledExpressionCacheEntry() = default; private: diff --git a/src/Interpreters/JoinSwitcher.h b/src/Interpreters/JoinSwitcher.h index fb5066b2d04..1d2ebc6b456 100644 --- a/src/Interpreters/JoinSwitcher.h +++ b/src/Interpreters/JoinSwitcher.h @@ -18,6 +18,7 @@ class JoinSwitcher : public IJoin public: JoinSwitcher(std::shared_ptr table_join_, const Block & right_sample_block_); + std::string getName() const override { return "JoinSwitcher"; } const TableJoin & getTableJoin() const override { return *table_join; } /// Add block of data from right hand of JOIN into current join object. diff --git a/src/Interpreters/JoinUtils.cpp b/src/Interpreters/JoinUtils.cpp index 67ee2a64264..33c9dfa76ca 100644 --- a/src/Interpreters/JoinUtils.cpp +++ b/src/Interpreters/JoinUtils.cpp @@ -303,6 +303,11 @@ ColumnPtr emptyNotNullableClone(const ColumnPtr & column) return column->cloneEmpty(); } +ColumnPtr materializeColumn(const ColumnPtr & column) +{ + return recursiveRemoveLowCardinality(recursiveRemoveSparse(column->convertToFullColumnIfConst())); +} + ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names) { ColumnRawPtrs ptrs; @@ -311,7 +316,7 @@ ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names) for (const auto & column_name : names) { auto & column = block.getByName(column_name).column; - column = recursiveRemoveLowCardinality(recursiveRemoveSparse(column->convertToFullColumnIfConst())); + column = materializeColumn(column); ptrs.push_back(column.get()); } @@ -326,12 +331,7 @@ ColumnPtrMap materializeColumnsInplaceMap(const Block & block, const Names & nam for (const auto & column_name : names) { ColumnPtr column = block.getByName(column_name).column; - - column = column->convertToFullColumnIfConst(); - column = recursiveRemoveLowCardinality(column); - column = recursiveRemoveSparse(column); - - ptrs[column_name] = column; + ptrs[column_name] = materializeColumn(column); } return ptrs; @@ -340,8 +340,7 @@ ColumnPtrMap materializeColumnsInplaceMap(const Block & block, const Names & nam ColumnPtr materializeColumn(const Block & block, const String & column_name) { const auto & src_column = block.getByName(column_name).column; - return recursiveRemoveLowCardinality( - recursiveRemoveSparse(src_column->convertToFullColumnIfConst())); + return materializeColumn(src_column); } Columns materializeColumns(const Block & block, const Names & names) @@ -539,7 +538,7 @@ JoinMask getColumnAsMask(const Block & block, const String & column_name) return JoinMask(const_cond->getBool(0), block.rows()); } - ColumnPtr join_condition_col = recursiveRemoveLowCardinality(src_col.column->convertToFullColumnIfConst()); + ColumnPtr join_condition_col = materializeColumn(src_col.column); if (const auto * nullable_col = typeid_cast(join_condition_col.get())) { if (isNothing(assert_cast(*col_type).getNestedType())) diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index ceef1371f16..6f0c8f1bff4 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -1034,7 +1034,7 @@ std::shared_ptr MergeJoin::loadRightBlock(size_t pos) const { auto load_func = [&]() -> std::shared_ptr { - TemporaryFileStreamLegacy input(flushed_right_blocks[pos]->getPath(), materializeBlock(right_sample_block)); + TemporaryFileStreamLegacy input(flushed_right_blocks[pos]->getAbsolutePath(), materializeBlock(right_sample_block)); return std::make_shared(input.block_in->read()); }; diff --git a/src/Interpreters/MergeJoin.h b/src/Interpreters/MergeJoin.h index 03a661c5b8a..98fae1d419f 100644 --- a/src/Interpreters/MergeJoin.h +++ b/src/Interpreters/MergeJoin.h @@ -22,6 +22,7 @@ class MergeJoin : public IJoin public: MergeJoin(std::shared_ptr table_join_, const Block & right_sample_block); + std::string getName() const override { return "PartialMergeJoin"; } const TableJoin & getTableJoin() const override { return *table_join; } bool addBlockToJoin(const Block & block, bool check_limits) override; void checkTypesOfKeys(const Block & block) const override; diff --git a/src/Interpreters/MetricLog.cpp b/src/Interpreters/MetricLog.cpp index 24e9e4487ae..91ba0f64e9b 100644 --- a/src/Interpreters/MetricLog.cpp +++ b/src/Interpreters/MetricLog.cpp @@ -16,7 +16,6 @@ NamesAndTypesList MetricLogElement::getNamesAndTypes() columns_with_type_and_name.emplace_back("event_date", std::make_shared()); columns_with_type_and_name.emplace_back("event_time", std::make_shared()); columns_with_type_and_name.emplace_back("event_time_microseconds", std::make_shared(6)); - columns_with_type_and_name.emplace_back("milliseconds", std::make_shared()); for (size_t i = 0, end = ProfileEvents::end(); i < end; ++i) { @@ -45,7 +44,6 @@ void MetricLogElement::appendToBlock(MutableColumns & columns) const columns[column_idx++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); columns[column_idx++]->insert(event_time); columns[column_idx++]->insert(event_time_microseconds); - columns[column_idx++]->insert(milliseconds); for (size_t i = 0, end = ProfileEvents::end(); i < end; ++i) columns[column_idx++]->insert(profile_events[i]); @@ -96,7 +94,6 @@ void MetricLog::metricThreadFunction() MetricLogElement elem; elem.event_time = std::chrono::system_clock::to_time_t(current_time); elem.event_time_microseconds = timeInMicroseconds(current_time); - elem.milliseconds = timeInMilliseconds(current_time) - timeInSeconds(current_time) * 1000; elem.profile_events.resize(ProfileEvents::end()); for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i) diff --git a/src/Interpreters/MetricLog.h b/src/Interpreters/MetricLog.h index aacdd4f49d2..a57f1cebf71 100644 --- a/src/Interpreters/MetricLog.h +++ b/src/Interpreters/MetricLog.h @@ -22,7 +22,6 @@ struct MetricLogElement { time_t event_time{}; Decimal64 event_time_microseconds{}; - UInt64 milliseconds{}; std::vector profile_events; std::vector current_metrics; diff --git a/src/Interpreters/MonotonicityCheckVisitor.h b/src/Interpreters/MonotonicityCheckVisitor.h index c95f5209760..c1838fa105c 100644 --- a/src/Interpreters/MonotonicityCheckVisitor.h +++ b/src/Interpreters/MonotonicityCheckVisitor.h @@ -39,8 +39,8 @@ public: bool canOptimize(const ASTFunction & ast_function) const { /// if GROUP BY contains the same function ORDER BY shouldn't be optimized - auto hash = ast_function.getTreeHash(); - String key = toString(hash.first) + '_' + toString(hash.second); + const auto hash = ast_function.getTreeHash(); + const auto key = toString(hash); if (group_by_function_hashes.count(key)) return false; diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 25c52ad8925..e50f8488cac 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -113,13 +113,14 @@ QueryTreeNodePtr prepareQueryAffectedQueryTree(const std::vector & has_index_or_projection) + const StorageInMemoryMetadata::HasDependencyCallback & has_dependency) { NameSet new_updated_columns = updated_columns; ColumnDependencies dependencies; + while (!new_updated_columns.empty()) { - auto new_dependencies = metadata_snapshot->getColumnDependencies(new_updated_columns, true, has_index_or_projection); + auto new_dependencies = metadata_snapshot->getColumnDependencies(new_updated_columns, true, has_dependency); new_updated_columns.clear(); for (const auto & dependency : new_dependencies) { @@ -292,9 +293,14 @@ bool MutationsInterpreter::Source::materializeTTLRecalculateOnly() const return data && data->getSettings()->materialize_ttl_recalculate_only; } -bool MutationsInterpreter::Source::hasIndexOrProjection(const String & file_name) const +bool MutationsInterpreter::Source::hasSecondaryIndex(const String & name) const { - return part && part->checksums.has(file_name); + return part && part->hasSecondaryIndex(name); +} + +bool MutationsInterpreter::Source::hasProjection(const String & name) const +{ + return part && part->hasProjection(name); } static Names getAvailableColumnsWithVirtuals(StorageMetadataPtr metadata_snapshot, const IStorage & storage) @@ -533,13 +539,24 @@ void MutationsInterpreter::prepare(bool dry_run) validateUpdateColumns(source, metadata_snapshot, updated_columns, column_to_affected_materialized); } - std::function has_index_or_projection - = [&](const String & file_name) { return source.hasIndexOrProjection(file_name); }; + StorageInMemoryMetadata::HasDependencyCallback has_dependency = + [&](const String & name, ColumnDependency::Kind kind) + { + if (kind == ColumnDependency::PROJECTION) + return source.hasProjection(name); + + if (kind == ColumnDependency::SKIP_INDEX) + return source.hasSecondaryIndex(name); + + return true; + }; if (settings.recalculate_dependencies_of_updated_columns) - dependencies = getAllColumnDependencies(metadata_snapshot, updated_columns, has_index_or_projection); + dependencies = getAllColumnDependencies(metadata_snapshot, updated_columns, has_dependency); + bool has_alter_delete = false; std::vector read_columns; + /// First, break a sequence of commands into stages. for (auto & command : commands) { @@ -558,6 +575,7 @@ void MutationsInterpreter::prepare(bool dry_run) predicate = makeASTFunction("isZeroOrNull", predicate); stages.back().filters.push_back(predicate); + has_alter_delete = true; } else if (command.type == MutationCommand::UPDATE) { @@ -692,8 +710,7 @@ void MutationsInterpreter::prepare(bool dry_run) if (it == std::cend(indices_desc)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown index: {}", command.index_name); - if (!source.hasIndexOrProjection("skp_idx_" + it->name + ".idx") - && !source.hasIndexOrProjection("skp_idx_" + it->name + ".idx2")) + if (!source.hasSecondaryIndex(it->name)) { auto query = (*it).expression_list_ast->clone(); auto syntax_result = TreeRewriter(context).analyze(query, all_columns); @@ -707,7 +724,7 @@ void MutationsInterpreter::prepare(bool dry_run) { mutation_kind.set(MutationKind::MUTATE_INDEX_PROJECTION); const auto & projection = projections_desc.get(command.projection_name); - if (!source.hasIndexOrProjection(projection.getDirectoryName())) + if (!source.hasProjection(projection.name)) { for (const auto & column : projection.required_columns) dependencies.emplace(column, ColumnDependency::PROJECTION); @@ -731,8 +748,9 @@ void MutationsInterpreter::prepare(bool dry_run) { // just recalculate ttl_infos without remove expired data auto all_columns_vec = all_columns.getNames(); - auto new_dependencies = metadata_snapshot->getColumnDependencies( - NameSet(all_columns_vec.begin(), all_columns_vec.end()), false, has_index_or_projection); + auto all_columns_set = NameSet(all_columns_vec.begin(), all_columns_vec.end()); + auto new_dependencies = metadata_snapshot->getColumnDependencies(all_columns_set, false, has_dependency); + for (const auto & dependency : new_dependencies) { if (dependency.kind == ColumnDependency::TTL_EXPRESSION) @@ -757,8 +775,8 @@ void MutationsInterpreter::prepare(bool dry_run) } auto all_columns_vec = all_columns.getNames(); - auto all_dependencies = getAllColumnDependencies( - metadata_snapshot, NameSet(all_columns_vec.begin(), all_columns_vec.end()), has_index_or_projection); + auto all_columns_set = NameSet(all_columns_vec.begin(), all_columns_vec.end()); + auto all_dependencies = getAllColumnDependencies(metadata_snapshot, all_columns_set, has_dependency); for (const auto & dependency : all_dependencies) { @@ -767,7 +785,7 @@ void MutationsInterpreter::prepare(bool dry_run) } /// Recalc only skip indices and projections of columns which could be updated by TTL. - auto new_dependencies = metadata_snapshot->getColumnDependencies(new_updated_columns, true, has_index_or_projection); + auto new_dependencies = metadata_snapshot->getColumnDependencies(new_updated_columns, true, has_dependency); for (const auto & dependency : new_dependencies) { if (dependency.kind == ColumnDependency::SKIP_INDEX || dependency.kind == ColumnDependency::PROJECTION) @@ -861,30 +879,44 @@ void MutationsInterpreter::prepare(bool dry_run) for (const auto & index : metadata_snapshot->getSecondaryIndices()) { - if (source.hasIndexOrProjection("skp_idx_" + index.name + ".idx") || source.hasIndexOrProjection("skp_idx_" + index.name + ".idx2")) + if (!source.hasSecondaryIndex(index.name)) + continue; + + if (has_alter_delete) { - const auto & index_cols = index.expression->getRequiredColumns(); - bool changed = std::any_of( - index_cols.begin(), - index_cols.end(), - [&](const auto & col) { return updated_columns.contains(col) || changed_columns.contains(col); }); - if (changed) - materialized_indices.insert(index.name); + materialized_indices.insert(index.name); + continue; } + + const auto & index_cols = index.expression->getRequiredColumns(); + bool changed = std::any_of( + index_cols.begin(), + index_cols.end(), + [&](const auto & col) { return updated_columns.contains(col) || changed_columns.contains(col); }); + + if (changed) + materialized_indices.insert(index.name); } for (const auto & projection : metadata_snapshot->getProjections()) { - if (source.hasIndexOrProjection(projection.getDirectoryName())) + if (!source.hasProjection(projection.name)) + continue; + + if (has_alter_delete) { - const auto & projection_cols = projection.required_columns; - bool changed = std::any_of( - projection_cols.begin(), - projection_cols.end(), - [&](const auto & col) { return updated_columns.contains(col) || changed_columns.contains(col); }); - if (changed) - materialized_projections.insert(projection.name); + materialized_projections.insert(projection.name); + continue; } + + const auto & projection_cols = projection.required_columns; + bool changed = std::any_of( + projection_cols.begin(), + projection_cols.end(), + [&](const auto & col) { return updated_columns.contains(col) || changed_columns.contains(col); }); + + if (changed) + materialized_projections.insert(projection.name); } /// Stages might be empty when we materialize skip indices or projections which don't add any diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h index d783b503531..9b4caaae231 100644 --- a/src/Interpreters/MutationsInterpreter.h +++ b/src/Interpreters/MutationsInterpreter.h @@ -120,7 +120,8 @@ public: bool supportsLightweightDelete() const; bool hasLightweightDeleteMask() const; bool materializeTTLRecalculateOnly() const; - bool hasIndexOrProjection(const String & file_name) const; + bool hasSecondaryIndex(const String & name) const; + bool hasProjection(const String & name) const; void read( Stage & first_stage, diff --git a/src/Interpreters/MutationsNonDeterministicHelpers.cpp b/src/Interpreters/MutationsNonDeterministicHelpers.cpp index 46c203e15bc..119759265ef 100644 --- a/src/Interpreters/MutationsNonDeterministicHelpers.cpp +++ b/src/Interpreters/MutationsNonDeterministicHelpers.cpp @@ -63,6 +63,71 @@ public: using FirstNonDeterministicFunctionFinder = InDepthNodeVisitor; using FirstNonDeterministicFunctionData = FirstNonDeterministicFunctionMatcher::Data; +/// Executes and replaces with literals +/// non-deterministic functions in query. +/// Similar to ExecuteScalarSubqueriesVisitor. +class ExecuteNonDeterministicConstFunctionsMatcher +{ +public: + + struct Data + { + ContextPtr context; + std::optional max_literal_size; + }; + + static bool needChildVisit(const ASTPtr & ast, const ASTPtr & /*child*/) + { + /// Do not visit subqueries because they are executed separately. + return !ast->as(); + } + + static void visit(ASTPtr & ast, const Data & data) + { + if (auto * function = ast->as()) + visit(*function, ast, data); + } + + static void visit(ASTFunction & function, ASTPtr & ast, const Data & data) + { + if (!FunctionFactory::instance().has(function.name)) + return; + + /// It makes sense to execute functions which are deterministic + /// in scope of query because they are usually constant expressions. + auto builder = FunctionFactory::instance().get(function.name, data.context); + if (builder->isDeterministic() || !builder->isDeterministicInScopeOfQuery()) + return; + + Field field; + DataTypePtr type; + + try + { + std::tie(field, type) = evaluateConstantExpression(ast, data.context); + } + catch (...) + { + /// An exception can be thrown if the expression is not constant. + /// It's ok in that context and we just do nothing in that case. + /// It's bad pattern but it's quite hard to implement it in another way. + return; + } + + auto column = type->createColumn(); + column->insert(field); + + Block scalar{{std::move(column), type, "_constant"}}; + if (worthConvertingScalarToLiteral(scalar, data.max_literal_size)) + { + auto literal = std::make_unique(std::move(field)); + ast = addTypeConversionToAST(std::move(literal), type->getName()); + } + } +}; + +using ExecuteNonDeterministicConstFunctionsVisitor = InDepthNodeVisitor; + } FirstNonDeterministicFunctionResult findFirstNonDeterministicFunction(const MutationCommand & command, ContextPtr context) @@ -97,4 +162,51 @@ FirstNonDeterministicFunctionResult findFirstNonDeterministicFunction(const Muta return {}; } +ASTPtr replaceNonDeterministicToScalars(const ASTAlterCommand & alter_command, ContextPtr context) +{ + const auto & settings = context->getSettingsRef(); + if (!settings.mutations_execute_subqueries_on_initiator + && !settings.mutations_execute_nondeterministic_on_initiator) + return nullptr; + + auto query = alter_command.clone(); + auto & new_alter_command = *query->as(); + + if (settings.mutations_execute_subqueries_on_initiator) + { + Scalars scalars; + Scalars local_scalars; + + ExecuteScalarSubqueriesVisitor::Data data{ + WithContext{context}, + /*subquery_depth=*/ 0, + scalars, + local_scalars, + /*only_analyze=*/ false, + /*is_create_parameterized_view=*/ false, + /*replace_only_to_literals=*/ true, + settings.mutations_max_literal_size_to_replace}; + + ExecuteScalarSubqueriesVisitor visitor(data); + if (new_alter_command.update_assignments) + visitor.visit(new_alter_command.update_assignments); + if (new_alter_command.predicate) + visitor.visit(new_alter_command.predicate); + } + + if (settings.mutations_execute_nondeterministic_on_initiator) + { + ExecuteNonDeterministicConstFunctionsVisitor::Data data{ + context, settings.mutations_max_literal_size_to_replace}; + + ExecuteNonDeterministicConstFunctionsVisitor visitor(data); + if (new_alter_command.update_assignments) + visitor.visit(new_alter_command.update_assignments); + if (new_alter_command.predicate) + visitor.visit(new_alter_command.predicate); + } + + return query; +} + } diff --git a/src/Interpreters/MutationsNonDeterministicHelpers.h b/src/Interpreters/MutationsNonDeterministicHelpers.h index eb3a3b1c7a4..5d9aa9752f1 100644 --- a/src/Interpreters/MutationsNonDeterministicHelpers.h +++ b/src/Interpreters/MutationsNonDeterministicHelpers.h @@ -7,6 +7,7 @@ namespace DB { struct MutationCommand; +class ASTAlterCommand; struct FirstNonDeterministicFunctionResult { @@ -18,4 +19,9 @@ struct FirstNonDeterministicFunctionResult /// may also be non-deterministic in expressions of mutation command. FirstNonDeterministicFunctionResult findFirstNonDeterministicFunction(const MutationCommand & command, ContextPtr context); +/// Executes non-deterministic functions and subqueries in expressions of mutation +/// command and replaces them to the literals with a result of expressions. +/// Returns rewritten query if expressions were replaced, nullptr otherwise. +ASTPtr replaceNonDeterministicToScalars(const ASTAlterCommand & alter_command, ContextPtr context); + } diff --git a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp index 6a9251cec49..dd205ae6508 100644 --- a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp +++ b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp @@ -118,35 +118,37 @@ void OptimizeDateOrDateTimeConverterWithPreimageMatcher::visit(const ASTFunction {"greaterOrEquals", "lessOrEquals"}, }; - if (!swap_relations.contains(function.name)) return; + if (!swap_relations.contains(function.name)) + return; - if (!function.arguments || function.arguments->children.size() != 2) return; + if (!function.arguments || function.arguments->children.size() != 2) + return; size_t func_id = function.arguments->children.size(); for (size_t i = 0; i < function.arguments->children.size(); i++) - { if (const auto * func = function.arguments->children[i]->as()) - { func_id = i; - } - } - if (func_id == function.arguments->children.size()) return; + if (func_id == function.arguments->children.size()) + return; size_t literal_id = 1 - func_id; const auto * literal = function.arguments->children[literal_id]->as(); - if (!literal || literal->value.getType() != Field::Types::UInt64) return; + if (!literal || literal->value.getType() != Field::Types::UInt64) + return; String comparator = literal_id > func_id ? function.name : swap_relations.at(function.name); const auto * ast_func = function.arguments->children[func_id]->as(); /// Currently we only handle single-argument functions. - if (!ast_func || !ast_func->arguments || ast_func->arguments->children.size() != 1) return; + if (!ast_func || !ast_func->arguments || ast_func->arguments->children.size() != 1) + return; const auto * column_id = ast_func->arguments->children.at(0)->as(); - if (!column_id) return; + if (!column_id) + return; auto pos = IdentifierSemantic::getMembership(*column_id); if (!pos) @@ -158,21 +160,30 @@ void OptimizeDateOrDateTimeConverterWithPreimageMatcher::visit(const ASTFunction return; auto data_type_and_name = data.tables[*pos].columns.tryGetByName(column_id->shortName()); - if (!data_type_and_name) return; + if (!data_type_and_name) + return; + + const auto column_type = data_type_and_name->type; + if (!column_type || (!isDateOrDate32(*column_type) && !isDateTime(*column_type) && !isDateTime64(*column_type))) + return; const auto & converter = FunctionFactory::instance().tryGet(ast_func->name, data.context); - if (!converter) return; + if (!converter) + return; ColumnsWithTypeAndName args; - args.emplace_back(data_type_and_name->type, "tmp"); + args.emplace_back(column_type, "tmp"); auto converter_base = converter->build(args); - if (!converter_base || !converter_base->hasInformationAboutPreimage()) return; + if (!converter_base || !converter_base->hasInformationAboutPreimage()) + return; - auto preimage_range = converter_base->getPreimage(*(data_type_and_name->type), literal->value); - if (!preimage_range) return; + auto preimage_range = converter_base->getPreimage(*column_type, literal->value); + if (!preimage_range) + return; const auto new_ast = generateOptimizedDateFilterAST(comparator, *data_type_and_name, *preimage_range); - if (!new_ast) return; + if (!new_ast) + return; ast = new_ast; } diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp index a97f1f405bc..d0344468153 100644 --- a/src/Interpreters/PartLog.cpp +++ b/src/Interpreters/PartLog.cpp @@ -107,6 +107,7 @@ NamesAndTypesList PartLogElement::getNamesAndTypes() {"table_uuid", std::make_shared()}, {"part_name", std::make_shared()}, {"partition_id", std::make_shared()}, + {"partition", std::make_shared()}, {"part_type", std::make_shared()}, {"disk_name", std::make_shared()}, {"path_on_disk", std::make_shared()}, @@ -157,6 +158,7 @@ void PartLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(table_uuid); columns[i++]->insert(part_name); columns[i++]->insert(partition_id); + columns[i++]->insert(partition); columns[i++]->insert(part_type.toString()); columns[i++]->insert(disk_name); columns[i++]->insert(path_on_disk); @@ -229,6 +231,10 @@ bool PartLog::addNewParts( elem.table_name = table_id.table_name; elem.table_uuid = table_id.uuid; elem.partition_id = part->info.partition_id; + { + WriteBufferFromString out(elem.partition); + part->partition.serializeText(part->storage, out, {}); + } elem.part_name = part->name; elem.disk_name = part->getDataPartStorage().getDiskName(); elem.path_on_disk = part->getDataPartStorage().getFullPath(); diff --git a/src/Interpreters/PartLog.h b/src/Interpreters/PartLog.h index 843792d03a9..462314f2768 100644 --- a/src/Interpreters/PartLog.h +++ b/src/Interpreters/PartLog.h @@ -63,6 +63,7 @@ struct PartLogElement UUID table_uuid{UUIDHelpers::Nil}; String part_name; String partition_id; + String partition; String disk_name; String path_on_disk; diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp index 67822ecf440..e0551dff2ad 100644 --- a/src/Interpreters/PreparedSets.cpp +++ b/src/Interpreters/PreparedSets.cpp @@ -198,7 +198,11 @@ SetPtr FutureSetFromSubquery::buildOrderedSetInplace(const ContextPtr & context) CompletedPipelineExecutor executor(pipeline); executor.execute(); - set_and_key->set->checkIsCreated(); + /// SET may not be created successfully at this step because of the sub-query timeout, but if we have + /// timeout_overflow_mode set to `break`, no exception is thrown, and the executor just stops executing + /// the pipeline without setting `set_and_key->set->is_created` to true. + if (!set_and_key->set->isCreated()) + return nullptr; return set_and_key->set; } @@ -207,7 +211,7 @@ SetPtr FutureSetFromSubquery::buildOrderedSetInplace(const ContextPtr & context) String PreparedSets::toString(const PreparedSets::Hash & key, const DataTypes & types) { WriteBufferFromOwnString buf; - buf << "__set_" << key.first << "_" << key.second; + buf << "__set_" << DB::toString(key); if (!types.empty()) { buf << "("; diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h index cb240f5260a..b953b8470e1 100644 --- a/src/Interpreters/PreparedSets.h +++ b/src/Interpreters/PreparedSets.h @@ -127,10 +127,10 @@ class PreparedSets { public: - using Hash = std::pair; + using Hash = CityHash_v1_0_2::uint128; struct Hashing { - UInt64 operator()(const Hash & key) const { return key.first ^ key.second; } + UInt64 operator()(const Hash & key) const { return key.low64 ^ key.high64; } }; using SetsFromTuple = std::unordered_map>, Hashing>; diff --git a/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/src/Interpreters/ReplaceQueryParameterVisitor.cpp index e4a052388ac..46dcc6129bc 100644 --- a/src/Interpreters/ReplaceQueryParameterVisitor.cpp +++ b/src/Interpreters/ReplaceQueryParameterVisitor.cpp @@ -66,10 +66,11 @@ void ReplaceQueryParameterVisitor::visitChildren(ASTPtr & ast) const String & ReplaceQueryParameterVisitor::getParamValue(const String & name) { auto search = query_parameters.find(name); - if (search != query_parameters.end()) - return search->second; - else + if (search == query_parameters.end()) throw Exception(ErrorCodes::UNKNOWN_QUERY_PARAMETER, "Substitution {} is not set", backQuote(name)); + + ++num_replaced_parameters; + return search->second; } void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast) @@ -131,6 +132,7 @@ void ReplaceQueryParameterVisitor::visitIdentifier(ASTPtr & ast) if (ast_identifier->children.empty()) return; + bool replaced_parameter = false; auto & name_parts = ast_identifier->name_parts; for (size_t i = 0, j = 0, size = name_parts.size(); i < size; ++i) { @@ -138,9 +140,14 @@ void ReplaceQueryParameterVisitor::visitIdentifier(ASTPtr & ast) { const auto & ast_param = ast_identifier->children[j++]->as(); name_parts[i] = getParamValue(ast_param.name); + replaced_parameter = true; } } + /// Do not touch AST if there are no parameters + if (!replaced_parameter) + return; + /// FIXME: what should this mean? if (!ast_identifier->semantic->special && name_parts.size() >= 2) ast_identifier->semantic->table = ast_identifier->name_parts.end()[-2]; diff --git a/src/Interpreters/ReplaceQueryParameterVisitor.h b/src/Interpreters/ReplaceQueryParameterVisitor.h index dd785cd768e..7d5da7ea85b 100644 --- a/src/Interpreters/ReplaceQueryParameterVisitor.h +++ b/src/Interpreters/ReplaceQueryParameterVisitor.h @@ -20,8 +20,12 @@ public: void visit(ASTPtr & ast); + size_t getNumberOfReplacedParameters() const { return num_replaced_parameters; } + private: const NameToNameMap & query_parameters; + size_t num_replaced_parameters = 0; + const String & getParamValue(const String & name); void visitIdentifier(ASTPtr & ast); void visitQueryParameter(ASTPtr & ast); diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index 68411e80755..3e6a9309d38 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -24,6 +25,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int INVALID_SETTING_VALUE; +} + namespace { @@ -52,7 +58,11 @@ ServerAsynchronousMetrics::ServerAsynchronousMetrics( : AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_) , WithContext(global_context_) , heavy_metric_update_period(heavy_metrics_update_period_seconds) -{} +{ + /// sanity check + if (update_period_seconds == 0 || heavy_metrics_update_period_seconds == 0) + throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Setting asynchronous_metrics_update_period_s and asynchronous_heavy_metrics_update_period_s must not be zero"); +} void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values, TimePoint update_time, TimePoint current_time) { @@ -242,15 +252,26 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values size_t total_number_of_rows = 0; size_t total_number_of_parts = 0; + size_t total_number_of_tables_system = 0; + + size_t total_number_of_bytes_system = 0; + size_t total_number_of_rows_system = 0; + size_t total_number_of_parts_system = 0; + for (const auto & db : databases) { /// Check if database can contain MergeTree tables if (!db.second->canContainMergeTreeTables()) continue; + bool is_system = db.first == DatabaseCatalog::SYSTEM_DATABASE; + for (auto iterator = db.second->getTablesIterator(getContext()); iterator->isValid(); iterator->next()) { ++total_number_of_tables; + if (is_system) + ++total_number_of_tables_system; + const auto & table = iterator->table(); if (!table) continue; @@ -260,9 +281,21 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values const auto & settings = getContext()->getSettingsRef(); calculateMax(max_part_count_for_partition, table_merge_tree->getMaxPartsCountAndSizeForPartition().first); - total_number_of_bytes += table_merge_tree->totalBytes(settings).value(); - total_number_of_rows += table_merge_tree->totalRows(settings).value(); - total_number_of_parts += table_merge_tree->getActivePartsCount(); + + size_t bytes = table_merge_tree->totalBytes(settings).value(); + size_t rows = table_merge_tree->totalRows(settings).value(); + size_t parts = table_merge_tree->getActivePartsCount(); + + total_number_of_bytes += bytes; + total_number_of_rows += rows; + total_number_of_parts += parts; + + if (is_system) + { + total_number_of_bytes_system += bytes; + total_number_of_rows_system += rows; + total_number_of_parts_system += parts; + } } if (StorageReplicatedMergeTree * table_replicated_merge_tree = typeid_cast(table.get())) @@ -316,6 +349,12 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values new_values["TotalRowsOfMergeTreeTables"] = { total_number_of_rows, "Total amount of rows (records) stored in all tables of MergeTree family." }; new_values["TotalPartsOfMergeTreeTables"] = { total_number_of_parts, "Total amount of data parts in all tables of MergeTree family." " Numbers larger than 10 000 will negatively affect the server startup time and it may indicate unreasonable choice of the partition key." }; + + new_values["NumberOfTablesSystem"] = { total_number_of_tables_system, "Total number of tables in the system database on the server stored in tables of MergeTree family."}; + + new_values["TotalBytesOfMergeTreeTablesSystem"] = { total_number_of_bytes_system, "Total amount of bytes (compressed, including data and indices) stored in tables of MergeTree family in the system database." }; + new_values["TotalRowsOfMergeTreeTablesSystem"] = { total_number_of_rows_system, "Total amount of rows (records) stored in tables of MergeTree family in the system database." }; + new_values["TotalPartsOfMergeTreeTablesSystem"] = { total_number_of_parts_system, "Total amount of data parts in tables of MergeTree family in the system database." }; } #if USE_NURAFT diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index cadf619700c..e0b5db44593 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -3,11 +3,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -200,7 +202,6 @@ private: LOG_TEST(log, "Schedule closing session with session_id: {}, user_id: {}", session.key.second, session.key.first); - } void cleanThread() @@ -298,6 +299,7 @@ Session::~Session() if (notified_session_log_about_login) { + LOG_DEBUG(log, "{} Logout, user_id: {}", toString(auth_id), toString(*user_id)); if (auto session_log = getSessionLog()) { /// TODO: We have to ensure that the same info is added to the session log on a LoginSuccess event and on the corresponding Logout event. @@ -319,6 +321,7 @@ AuthenticationType Session::getAuthenticationTypeOrLogInFailure(const String & u } catch (const Exception & e) { + LOG_ERROR(log, "{} Authentication failed with error: {}", toString(auth_id), e.what()); if (auto session_log = getSessionLog()) session_log->addLoginFailure(auth_id, getClientInfo(), user_name, e); @@ -336,6 +339,9 @@ void Session::authenticate(const Credentials & credentials_, const Poco::Net::So if (session_context) throw Exception(ErrorCodes::LOGICAL_ERROR, "If there is a session context it must be created after authentication"); + if (session_tracker_handle) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session tracker handle was created before authentication finish"); + auto address = address_; if ((address == Poco::Net::SocketAddress{}) && (prepared_client_info->interface == ClientInfo::Interface::LOCAL)) address = Poco::Net::SocketAddress{"127.0.0.1", 0}; @@ -490,6 +496,8 @@ ContextMutablePtr Session::makeSessionContext() throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created before any query context"); if (!user_id) throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created after authentication"); + if (session_tracker_handle) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session tracker handle was created before making session"); LOG_DEBUG(log, "{} Creating session context with user_id: {}", toString(auth_id), toString(*user_id)); @@ -503,13 +511,19 @@ ContextMutablePtr Session::makeSessionContext() prepared_client_info.reset(); /// Set user information for the new context: current profiles, roles, access rights. - if (user_id) - new_session_context->setUser(*user_id); + new_session_context->setUser(*user_id); /// Session context is ready. session_context = new_session_context; user = session_context->getUser(); + session_tracker_handle = session_context->getSessionTracker().trackSession( + *user_id, + {}, + session_context->getSettingsRef().max_sessions_for_user); + + recordLoginSucess(session_context); + return session_context; } @@ -521,6 +535,8 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std: throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created before any query context"); if (!user_id) throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created after authentication"); + if (session_tracker_handle) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session tracker handle was created before making session"); LOG_DEBUG(log, "{} Creating named session context with name: {}, user_id: {}", toString(auth_id), session_name_, toString(*user_id)); @@ -541,9 +557,23 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std: new_session_context->setClientInfo(*prepared_client_info); prepared_client_info.reset(); + auto access = new_session_context->getAccess(); + UInt64 max_sessions_for_user = 0; /// Set user information for the new context: current profiles, roles, access rights. - if (user_id && !new_session_context->getAccess()->tryGetUser()) + if (!access->tryGetUser()) + { new_session_context->setUser(*user_id); + max_sessions_for_user = new_session_context->getSettingsRef().max_sessions_for_user; + } + else + { + // Always get setting from profile + // profile can be changed by ALTER PROFILE during single session + auto settings = access->getDefaultSettings(); + const Field * max_session_for_user_field = settings.tryGet("max_sessions_for_user"); + if (max_session_for_user_field) + max_sessions_for_user = max_session_for_user_field->safeGet(); + } /// Session context is ready. session_context = std::move(new_session_context); @@ -551,6 +581,13 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std: named_session_created = new_named_session_created; user = session_context->getUser(); + session_tracker_handle = session_context->getSessionTracker().trackSession( + *user_id, + { session_name_ }, + max_sessions_for_user); + + recordLoginSucess(session_context); + return session_context; } @@ -585,7 +622,7 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t if (auto query_context_user = query_context->getAccess()->tryGetUser()) { - LOG_DEBUG(log, "{} Creating query context from {} context, user_id: {}, parent context user: {}", + LOG_TRACE(log, "{} Creating query context from {} context, user_id: {}, parent context user: {}", toString(auth_id), from_session_context ? "session" : "global", toString(*user_id), @@ -624,24 +661,38 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t if (user_id) user = query_context->getUser(); - if (!notified_session_log_about_login) - { - if (auto session_log = getSessionLog()) - { - session_log->addLoginSuccess( - auth_id, - named_session ? std::optional(named_session->key.second) : std::nullopt, - *query_context, - user); - - notified_session_log_about_login = true; - } - } + /// Interserver does not create session context + recordLoginSucess(query_context); return query_context; } +void Session::recordLoginSucess(ContextPtr login_context) const +{ + if (notified_session_log_about_login) + return; + + if (!login_context) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session or query context must be created"); + + if (auto session_log = getSessionLog()) + { + const auto & settings = login_context->getSettingsRef(); + const auto access = login_context->getAccess(); + + session_log->addLoginSuccess(auth_id, + named_session ? named_session->key.second : "", + settings, + access, + getClientInfo(), + user); + } + + notified_session_log_about_login = true; +} + + void Session::releaseSessionID() { if (!named_session) diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index 36f811ccd24..81ef987b428 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -96,6 +97,8 @@ public: private: std::shared_ptr getSessionLog() const; ContextMutablePtr makeQueryContextImpl(const ClientInfo * client_info_to_copy, ClientInfo * client_info_to_move) const; + void recordLoginSucess(ContextPtr login_context) const; + mutable bool notified_session_log_about_login = false; const UUID auth_id; @@ -113,6 +116,8 @@ private: std::shared_ptr named_session; bool named_session_created = false; + SessionTracker::SessionTrackerHandle session_tracker_handle; + Poco::Logger * log = nullptr; }; diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index 0a8a7fc18c5..b847eaf9824 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -199,12 +199,13 @@ void SessionLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insertData(auth_failure_reason.data(), auth_failure_reason.length()); } -void SessionLog::addLoginSuccess(const UUID & auth_id, std::optional session_id, const Context & login_context, const UserPtr & login_user) +void SessionLog::addLoginSuccess(const UUID & auth_id, + const String & session_id, + const Settings & settings, + const ContextAccessPtr & access, + const ClientInfo & client_info, + const UserPtr & login_user) { - const auto access = login_context.getAccess(); - const auto & settings = login_context.getSettingsRef(); - const auto & client_info = login_context.getClientInfo(); - DB::SessionLogElement log_entry(auth_id, SESSION_LOGIN_SUCCESS); log_entry.client_info = client_info; @@ -215,8 +216,7 @@ void SessionLog::addLoginSuccess(const UUID & auth_id, std::optional ses } log_entry.external_auth_server = login_user ? login_user->auth_data.getLDAPServerName() : ""; - if (session_id) - log_entry.session_id = *session_id; + log_entry.session_id = session_id; if (const auto roles_info = access->getRolesInfo()) log_entry.roles = roles_info->getCurrentRolesNames(); diff --git a/src/Interpreters/SessionLog.h b/src/Interpreters/SessionLog.h index 1282ac09c4d..8757bc12270 100644 --- a/src/Interpreters/SessionLog.h +++ b/src/Interpreters/SessionLog.h @@ -20,6 +20,7 @@ enum SessionLogElementType : int8_t class ContextAccess; struct User; using UserPtr = std::shared_ptr; +using ContextAccessPtr = std::shared_ptr; /** A struct which will be inserted as row into session_log table. * @@ -72,7 +73,13 @@ class SessionLog : public SystemLog using SystemLog::SystemLog; public: - void addLoginSuccess(const UUID & auth_id, std::optional session_id, const Context & login_context, const UserPtr & login_user); + void addLoginSuccess(const UUID & auth_id, + const String & session_id, + const Settings & settings, + const ContextAccessPtr & access, + const ClientInfo & client_info, + const UserPtr & login_user); + void addLoginFailure(const UUID & auth_id, const ClientInfo & info, const std::optional & user, const Exception & reason); void addLogOut(const UUID & auth_id, const UserPtr & login_user, const ClientInfo & client_info); }; diff --git a/src/Interpreters/SessionTracker.cpp b/src/Interpreters/SessionTracker.cpp new file mode 100644 index 00000000000..4636766e288 --- /dev/null +++ b/src/Interpreters/SessionTracker.cpp @@ -0,0 +1,62 @@ +#include "SessionTracker.h" + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int USER_SESSION_LIMIT_EXCEEDED; +} + +SessionTracker::Session::Session(SessionTracker & tracker_, + const UUID& user_id_, + SessionInfos::const_iterator session_info_iter_) noexcept + : tracker(tracker_), user_id(user_id_), session_info_iter(session_info_iter_) +{ +} + +SessionTracker::Session::~Session() +{ + tracker.stopTracking(user_id, session_info_iter); +} + +SessionTracker::SessionTrackerHandle +SessionTracker::trackSession(const UUID & user_id, + const SessionInfo & session_info, + size_t max_sessions_for_user) +{ + std::lock_guard lock(mutex); + + auto sessions_for_user_iter = sessions_for_user.find(user_id); + if (sessions_for_user_iter == sessions_for_user.end()) + sessions_for_user_iter = sessions_for_user.emplace(user_id, SessionInfos()).first; + + SessionInfos & session_infos = sessions_for_user_iter->second; + if (max_sessions_for_user && session_infos.size() >= max_sessions_for_user) + { + throw Exception(ErrorCodes::USER_SESSION_LIMIT_EXCEEDED, + "User {} has overflown session count {}", + toString(user_id), + max_sessions_for_user); + } + + session_infos.emplace_front(session_info); + + return std::make_unique(*this, user_id, session_infos.begin()); +} + +void SessionTracker::stopTracking(const UUID& user_id, SessionInfos::const_iterator session_info_iter) +{ + std::lock_guard lock(mutex); + + auto sessions_for_user_iter = sessions_for_user.find(user_id); + chassert(sessions_for_user_iter != sessions_for_user.end()); + + sessions_for_user_iter->second.erase(session_info_iter); + if (sessions_for_user_iter->second.empty()) + sessions_for_user.erase(sessions_for_user_iter); +} + +} diff --git a/src/Interpreters/SessionTracker.h b/src/Interpreters/SessionTracker.h new file mode 100644 index 00000000000..0827213aeed --- /dev/null +++ b/src/Interpreters/SessionTracker.h @@ -0,0 +1,60 @@ +#pragma once + +#include "ClientInfo.h" + +#include +#include +#include +#include + +namespace DB +{ + +struct SessionInfo +{ + const String session_id; +}; + +using SessionInfos = std::list; + +using SessionsForUser = std::unordered_map; + +class SessionTracker; + +class SessionTracker +{ +public: + class Session : boost::noncopyable + { + public: + explicit Session(SessionTracker & tracker_, + const UUID & user_id_, + SessionInfos::const_iterator session_info_iter_) noexcept; + + ~Session(); + + private: + friend class SessionTracker; + + SessionTracker & tracker; + const UUID user_id; + const SessionInfos::const_iterator session_info_iter; + }; + + using SessionTrackerHandle = std::unique_ptr; + + SessionTrackerHandle trackSession(const UUID & user_id, + const SessionInfo & session_info, + size_t max_sessions_for_user); + +private: + /// disallow manual messing with session tracking + friend class Session; + + std::mutex mutex; + SessionsForUser sessions_for_user TSA_GUARDED_BY(mutex); + + void stopTracking(const UUID& user_id, SessionInfos::const_iterator session_info_iter); +}; + +} diff --git a/src/Interpreters/SortedBlocksWriter.cpp b/src/Interpreters/SortedBlocksWriter.cpp index e09a66a38e6..c9f48ee1be9 100644 --- a/src/Interpreters/SortedBlocksWriter.cpp +++ b/src/Interpreters/SortedBlocksWriter.cpp @@ -39,7 +39,7 @@ namespace TemporaryFileOnDiskHolder flushToFile(const DiskPtr & disk, const Block & header, QueryPipelineBuilder pipeline, const String & codec) { auto tmp_file = std::make_unique(disk, CurrentMetrics::TemporaryFilesForJoin); - auto write_stat = TemporaryFileStreamLegacy::write(tmp_file->getPath(), header, std::move(pipeline), codec); + auto write_stat = TemporaryFileStreamLegacy::write(tmp_file->getAbsolutePath(), header, std::move(pipeline), codec); ProfileEvents::increment(ProfileEvents::ExternalProcessingCompressedBytesTotal, write_stat.compressed_bytes); ProfileEvents::increment(ProfileEvents::ExternalProcessingUncompressedBytesTotal, write_stat.uncompressed_bytes); @@ -267,7 +267,7 @@ SortedBlocksWriter::SortedFiles SortedBlocksWriter::finishMerge(std::function(file->getPath(), materializeBlock(sample_block))); + return Pipe(std::make_shared(file->getAbsolutePath(), materializeBlock(sample_block))); } diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index be0468aa876..23a5a96584c 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -48,6 +48,11 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } +namespace ActionLocks +{ + extern const StorageActionBlockType PartsMerge; +} + namespace { class StorageWithComment : public IAST @@ -124,6 +129,7 @@ std::shared_ptr createSystemLog( "Creating {}.{} from {}", default_database_name, default_table_name, config_prefix); SystemLogSettings log_settings; + log_settings.queue_settings.database = config.getString(config_prefix + ".database", default_database_name); log_settings.queue_settings.table = config.getString(config_prefix + ".table", default_table_name); @@ -560,6 +566,10 @@ void SystemLog::prepareTable() rename->elements.emplace_back(std::move(elem)); + ActionLock merges_lock; + if (DatabaseCatalog::instance().getDatabase(table_id.database_name)->getUUID() == UUIDHelpers::Nil) + merges_lock = table->getActionLock(ActionLocks::PartsMerge); + auto query_context = Context::createCopy(context); /// As this operation is performed automatically we don't want it to fail because of user dependencies on log tables query_context->setSetting("check_table_dependencies", Field{false}); diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 437b1b2a6bb..cf287ad7775 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -111,7 +111,7 @@ public: * and new table get created - as if previous table was not exist. */ SystemLog(ContextPtr context_, - const SystemLogSettings& settings_, + const SystemLogSettings & settings_, std::shared_ptr> queue_ = nullptr); /** Append a record into log. @@ -131,8 +131,6 @@ protected: using Base::queue; private: - - /* Saving thread data */ const StorageID table_id; const String storage_def; diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 5d14a57759f..af3b027b2db 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -189,7 +189,6 @@ private: template void inferJoinKeyCommonType(const LeftNamesAndTypes & left, const RightNamesAndTypes & right, bool allow_right, bool strict); - NamesAndTypesList correctedColumnsAddedByJoin() const; void deduplicateAndQualifyColumnNames(const NameSet & left_table_columns, const String & right_table_prefix); @@ -331,7 +330,7 @@ public: const ColumnsWithTypeAndName & right_sample_columns); void setAsofInequality(ASOFJoinInequality inequality) { asof_inequality = inequality; } - ASOFJoinInequality getAsofInequality() { return asof_inequality; } + ASOFJoinInequality getAsofInequality() const { return asof_inequality; } ASTPtr leftKeysList() const; ASTPtr rightKeysList() const; /// For ON syntax only @@ -371,6 +370,8 @@ public: bool isSpecialStorage() const { return !right_storage_name.empty() || right_storage_join || right_kv_storage; } std::shared_ptr getStorageKeyValue() { return right_kv_storage; } + + NamesAndTypesList correctedColumnsAddedByJoin() const; }; } diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp index 69fef21dbab..0aa2a0b9f4a 100644 --- a/src/Interpreters/TemporaryDataOnDisk.cpp +++ b/src/Interpreters/TemporaryDataOnDisk.cpp @@ -235,9 +235,9 @@ TemporaryFileStream::TemporaryFileStream(TemporaryFileOnDiskHolder file_, const : parent(parent_) , header(header_) , file(std::move(file_)) - , out_writer(std::make_unique(std::make_unique(file->getPath()), header)) + , out_writer(std::make_unique(std::make_unique(file->getAbsolutePath()), header)) { - LOG_TEST(&Poco::Logger::get("TemporaryFileStream"), "Writing to temporary file {}", file->getPath()); + LOG_TEST(&Poco::Logger::get("TemporaryFileStream"), "Writing to temporary file {}", file->getAbsolutePath()); } TemporaryFileStream::TemporaryFileStream(FileSegmentsHolderPtr segments_, const Block & header_, TemporaryDataOnDisk * parent_) @@ -365,7 +365,7 @@ void TemporaryFileStream::release() String TemporaryFileStream::getPath() const { if (file) - return file->getPath(); + return file->getAbsolutePath(); if (segment_holder && !segment_holder->empty()) return segment_holder->front().getPathInLocalCache(); diff --git a/src/Interpreters/TextLog.cpp b/src/Interpreters/TextLog.cpp index 3951a41f0c5..36c33a5e7cc 100644 --- a/src/Interpreters/TextLog.cpp +++ b/src/Interpreters/TextLog.cpp @@ -36,7 +36,6 @@ NamesAndTypesList TextLogElement::getNamesAndTypes() {"event_date", std::make_shared()}, {"event_time", std::make_shared()}, {"event_time_microseconds", std::make_shared(6)}, - {"microseconds", std::make_shared()}, {"thread_name", std::make_shared(std::make_shared())}, {"thread_id", std::make_shared()}, @@ -62,7 +61,6 @@ void TextLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); columns[i++]->insert(event_time); columns[i++]->insert(event_time_microseconds); - columns[i++]->insert(microseconds); columns[i++]->insertData(thread_name.data(), thread_name.size()); columns[i++]->insert(thread_id); diff --git a/src/Interpreters/TextLog.h b/src/Interpreters/TextLog.h index 4bfed5327f3..bfeca324fde 100644 --- a/src/Interpreters/TextLog.h +++ b/src/Interpreters/TextLog.h @@ -14,7 +14,6 @@ struct TextLogElement { time_t event_time{}; Decimal64 event_time_microseconds{}; - UInt32 microseconds{}; String thread_name; UInt64 thread_id{}; diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 1b39bb6c0dc..6ee8ec987db 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -382,12 +382,10 @@ void ThreadStatus::finalizePerformanceCounters() updatePerformanceCounters(); // We want to close perf file descriptors if the perf events were enabled for - // one query. What this code does in practice is less clear -- e.g., if I run - // 'select 1 settings metrics_perf_events_enabled = 1', I still get - // query_context->getSettingsRef().metrics_perf_events_enabled == 0 *shrug*. + // one query. bool close_perf_descriptors = true; - if (auto query_context_ptr = query_context.lock()) - close_perf_descriptors = !query_context_ptr->getSettingsRef().metrics_perf_events_enabled; + if (auto global_context_ptr = global_context.lock()) + close_perf_descriptors = !global_context_ptr->getSettingsRef().metrics_perf_events_enabled; try { @@ -410,7 +408,7 @@ void ThreadStatus::finalizePerformanceCounters() if (settings.log_queries && settings.log_query_threads) { const auto now = std::chrono::system_clock::now(); - Int64 query_duration_ms = std::chrono::duration_cast(now - query_start_time.point).count(); + Int64 query_duration_ms = std::chrono::duration_cast(now - query_start_time.point).count(); if (query_duration_ms >= settings.log_queries_min_query_duration_ms.totalMilliseconds()) { if (auto thread_log = global_context_ptr->getQueryThreadLog()) diff --git a/src/Interpreters/TraceCollector.cpp b/src/Interpreters/TraceCollector.cpp index 19cc5c4e6bd..30fbe26d038 100644 --- a/src/Interpreters/TraceCollector.cpp +++ b/src/Interpreters/TraceCollector.cpp @@ -112,6 +112,9 @@ void TraceCollector::run() Int64 size; readPODBinary(size, in); + UInt64 ptr; + readPODBinary(ptr, in); + ProfileEvents::Event event; readPODBinary(event, in); @@ -127,7 +130,8 @@ void TraceCollector::run() UInt64 time = static_cast(ts.tv_sec * 1000000000LL + ts.tv_nsec); UInt64 time_in_microseconds = static_cast((ts.tv_sec * 1000000LL) + (ts.tv_nsec / 1000)); - TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size, event, increment}; + + TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size, ptr, event, increment}; trace_log->add(std::move(element)); } } diff --git a/src/Interpreters/TraceLog.cpp b/src/Interpreters/TraceLog.cpp index 0408ebe504b..cd5f965a679 100644 --- a/src/Interpreters/TraceLog.cpp +++ b/src/Interpreters/TraceLog.cpp @@ -38,6 +38,7 @@ NamesAndTypesList TraceLogElement::getNamesAndTypes() {"query_id", std::make_shared()}, {"trace", std::make_shared(std::make_shared())}, {"size", std::make_shared()}, + {"ptr", std::make_shared()}, {"event", std::make_shared(std::make_shared())}, {"increment", std::make_shared()}, }; @@ -57,6 +58,7 @@ void TraceLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insertData(query_id.data(), query_id.size()); columns[i++]->insert(trace); columns[i++]->insert(size); + columns[i++]->insert(ptr); String event_name; if (event != ProfileEvents::end()) diff --git a/src/Interpreters/TraceLog.h b/src/Interpreters/TraceLog.h index c481f033a72..71aec0b50c4 100644 --- a/src/Interpreters/TraceLog.h +++ b/src/Interpreters/TraceLog.h @@ -27,8 +27,10 @@ struct TraceLogElement UInt64 thread_id{}; String query_id{}; Array trace{}; - /// Allocation size in bytes for TraceType::Memory. + /// Allocation size in bytes for TraceType::Memory and TraceType::MemorySample. Int64 size{}; + /// Allocation ptr for TraceType::MemorySample. + UInt64 ptr{}; /// ProfileEvent for TraceType::ProfileEvent. ProfileEvents::Event event{ProfileEvents::end()}; /// Increment of profile event for TraceType::ProfileEvent. diff --git a/src/Interpreters/TransactionLog.cpp b/src/Interpreters/TransactionLog.cpp index 631e7f5c746..66b23f09ba0 100644 --- a/src/Interpreters/TransactionLog.cpp +++ b/src/Interpreters/TransactionLog.cpp @@ -409,7 +409,7 @@ CSN TransactionLog::commitTransaction(const MergeTreeTransactionPtr & txn, bool { std::bernoulli_distribution fault(fault_probability_before_commit); if (fault(thread_local_rng)) - throw Coordination::Exception("Fault injected (before commit)", Coordination::Error::ZCONNECTIONLOSS); + throw Coordination::Exception::fromMessage(Coordination::Error::ZCONNECTIONLOSS, "Fault injected (before commit)"); } /// Commit point @@ -419,7 +419,7 @@ CSN TransactionLog::commitTransaction(const MergeTreeTransactionPtr & txn, bool { std::bernoulli_distribution fault(fault_probability_after_commit); if (fault(thread_local_rng)) - throw Coordination::Exception("Fault injected (after commit)", Coordination::Error::ZCONNECTIONLOSS); + throw Coordination::Exception::fromMessage(Coordination::Error::ZCONNECTIONLOSS, "Fault injected (after commit)"); } } catch (const Coordination::Exception & e) diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 54ae939dbd1..9612a4d6c3f 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -409,8 +409,8 @@ void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, Context { for (auto & elem : set->children) { - auto hash = elem->getTreeHash(); - String key = toString(hash.first) + '_' + toString(hash.second); + const auto hash = elem->getTreeHash(); + const auto key = toString(hash); group_by_hashes.insert(key); } } @@ -419,8 +419,8 @@ void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, Context { for (auto & elem : group_by->children) { - auto hash = elem->getTreeHash(); - String key = toString(hash.first) + '_' + toString(hash.second); + const auto hash = elem->getTreeHash(); + const auto key = toString(hash); group_by_hashes.insert(key); } } diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 246843d39c2..5b384a5f74b 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -110,6 +110,9 @@ using CustomizeCountDistinctVisitor = InDepthNodeVisitor>, true>; +char countdistinctif[] = "countdistinctif"; +using CustomizeCountDistinctIfVisitor = InDepthNodeVisitor>, true>; + char in[] = "in"; using CustomizeInVisitor = InDepthNodeVisitor>, true>; @@ -1368,6 +1371,12 @@ void TreeRewriter::normalize( CustomizeIfDistinctVisitor::Data data_distinct_if{"DistinctIf"}; CustomizeIfDistinctVisitor(data_distinct_if).visit(query); + if (settings.rewrite_count_distinct_if_with_count_distinct_implementation) + { + CustomizeCountDistinctIfVisitor::Data data_count_distinct_if{settings.count_distinct_implementation.toString() + "If"}; + CustomizeCountDistinctIfVisitor(data_count_distinct_if).visit(query); + } + ExistsExpressionVisitor::Data exists; ExistsExpressionVisitor(exists).visit(query); diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp index 7ed7788cf1d..8a7a5024d69 100644 --- a/src/Interpreters/WindowDescription.cpp +++ b/src/Interpreters/WindowDescription.cpp @@ -91,34 +91,30 @@ void WindowFrame::toString(WriteBuffer & buf) const void WindowFrame::checkValid() const { // Check the validity of offsets. - if (type == WindowFrame::FrameType::ROWS - || type == WindowFrame::FrameType::GROUPS) + if (begin_type == BoundaryType::Offset + && !((begin_offset.getType() == Field::Types::UInt64 + || begin_offset.getType() == Field::Types::Int64) + && begin_offset.get() >= 0 + && begin_offset.get() < INT_MAX)) { - if (begin_type == BoundaryType::Offset - && !((begin_offset.getType() == Field::Types::UInt64 - || begin_offset.getType() == Field::Types::Int64) - && begin_offset.get() >= 0 - && begin_offset.get() < INT_MAX)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Frame start offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", - type, - applyVisitor(FieldVisitorToString(), begin_offset), - begin_offset.getType()); - } + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Frame start offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", + type, + applyVisitor(FieldVisitorToString(), begin_offset), + begin_offset.getType()); + } - if (end_type == BoundaryType::Offset - && !((end_offset.getType() == Field::Types::UInt64 - || end_offset.getType() == Field::Types::Int64) - && end_offset.get() >= 0 - && end_offset.get() < INT_MAX)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Frame end offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", - type, - applyVisitor(FieldVisitorToString(), end_offset), - end_offset.getType()); - } + if (end_type == BoundaryType::Offset + && !((end_offset.getType() == Field::Types::UInt64 + || end_offset.getType() == Field::Types::Int64) + && end_offset.get() >= 0 + && end_offset.get() < INT_MAX)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Frame end offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", + type, + applyVisitor(FieldVisitorToString(), end_offset), + end_offset.getType()); } // Check relative positioning of offsets. diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 89c73117960..0e810748ab1 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -32,6 +33,7 @@ namespace DB namespace ErrorCodes { extern const int ARGUMENT_OUT_OF_BOUND; + extern const int LOGICAL_ERROR; extern const int TYPE_MISMATCH; extern const int UNEXPECTED_DATA_AFTER_PARSED_VALUE; } @@ -565,4 +567,39 @@ Field convertFieldToTypeOrThrow(const Field & from_value, const IDataType & to_t return converted; } +template +static bool decimalEqualsFloat(Field field, Float64 float_value) +{ + auto decimal_field = field.get>(); + auto decimal_to_float = DecimalUtils::convertTo(decimal_field.getValue(), decimal_field.getScale()); + return decimal_to_float == float_value; +} + +std::optional convertFieldToTypeStrict(const Field & from_value, const IDataType & to_type) +{ + Field result_value = convertFieldToType(from_value, to_type); + + if (Field::isDecimal(from_value.getType()) && Field::isDecimal(result_value.getType())) + { + bool is_equal = applyVisitor(FieldVisitorAccurateEquals{}, from_value, result_value); + return is_equal ? result_value : std::optional{}; + } + + if (from_value.getType() == Field::Types::Float64 && Field::isDecimal(result_value.getType())) + { + /// Convert back to Float64 and compare + if (result_value.getType() == Field::Types::Decimal32) + return decimalEqualsFloat(result_value, from_value.get()) ? result_value : std::optional{}; + if (result_value.getType() == Field::Types::Decimal64) + return decimalEqualsFloat(result_value, from_value.get()) ? result_value : std::optional{}; + if (result_value.getType() == Field::Types::Decimal128) + return decimalEqualsFloat(result_value, from_value.get()) ? result_value : std::optional{}; + if (result_value.getType() == Field::Types::Decimal256) + return decimalEqualsFloat(result_value, from_value.get()) ? result_value : std::optional{}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown decimal type {}", result_value.getTypeName()); + } + + return result_value; +} + } diff --git a/src/Interpreters/convertFieldToType.h b/src/Interpreters/convertFieldToType.h index 91b631d0e12..7f49ea5479d 100644 --- a/src/Interpreters/convertFieldToType.h +++ b/src/Interpreters/convertFieldToType.h @@ -20,4 +20,8 @@ Field convertFieldToType(const Field & from_value, const IDataType & to_type, co /// Does the same, but throws ARGUMENT_OUT_OF_BOUND if value does not fall into the range. Field convertFieldToTypeOrThrow(const Field & from_value, const IDataType & to_type, const IDataType * from_type_hint = nullptr); +/// Applies stricter rules than convertFieldToType, doesn't allow loss of precision converting to Decimal. +/// Returns `Field` if the conversion was successful and the result is equal to the original value, otherwise returns nullopt. +std::optional convertFieldToTypeStrict(const Field & from_value, const IDataType & to_type); + } diff --git a/src/Interpreters/examples/hash_map_string_small.cpp b/src/Interpreters/examples/hash_map_string_small.cpp index b58cdfbacd0..5f0312b3bdd 100644 --- a/src/Interpreters/examples/hash_map_string_small.cpp +++ b/src/Interpreters/examples/hash_map_string_small.cpp @@ -64,8 +64,8 @@ inline bool operator==(SmallStringRef lhs, SmallStringRef rhs) if (lhs.size == 0) return true; -#ifdef __SSE2__ - return memequalSSE2Wide(lhs.data(), rhs.data(), lhs.size); +#if defined(__SSE2__) || (defined(__aarch64__) && defined(__ARM_NEON)) + return memequalWide(lhs.data(), rhs.data(), lhs.size); #else return 0 == memcmp(lhs.data(), rhs.data(), lhs.size); #endif diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index 04c1d545207..750affdfe71 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -49,7 +49,7 @@ static ZooKeeperRetriesInfo getRetriesInfo() ); } -bool isSupportedAlterType(int type) +bool isSupportedAlterTypeForOnClusterDDLQuery(int type) { assert(type != ASTAlterCommand::NO_TYPE); static const std::unordered_set unsupported_alter_types{ @@ -90,7 +90,7 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, { for (const auto & command : query_alter->command_list->children) { - if (!isSupportedAlterType(command->as().type)) + if (!isSupportedAlterTypeForOnClusterDDLQuery(command->as().type)) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported type of ALTER query"); } } @@ -559,7 +559,7 @@ Strings DDLQueryStatusSource::getChildrenAllowNoNode(const std::shared_ptrtryGetChildren(node_path, res); if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNONODE) - throw Coordination::Exception(code, node_path); + throw Coordination::Exception::fromPath(code, node_path); return res; } diff --git a/src/Interpreters/executeDDLQueryOnCluster.h b/src/Interpreters/executeDDLQueryOnCluster.h index 40db13d7ef5..7daf9babf9f 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.h +++ b/src/Interpreters/executeDDLQueryOnCluster.h @@ -21,7 +21,7 @@ class Cluster; using ClusterPtr = std::shared_ptr; /// Returns true if provided ALTER type can be executed ON CLUSTER -bool isSupportedAlterType(int type); +bool isSupportedAlterTypeForOnClusterDDLQuery(int type); struct DDLQueryOnClusterParams { diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 578ca3b41f9..bac6807b682 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -728,11 +729,16 @@ static std::tuple executeQueryImpl( is_create_parameterized_view = create_query->isParameterizedView(); /// Replace ASTQueryParameter with ASTLiteral for prepared statements. - if (!is_create_parameterized_view && context->hasQueryParameters()) + /// Even if we don't have parameters in query_context, check that AST doesn't have unknown parameters + bool probably_has_params = find_first_symbols<'{'>(begin, end) != end; + if (!is_create_parameterized_view && probably_has_params) { ReplaceQueryParameterVisitor visitor(context->getQueryParameters()); visitor.visit(ast); - query = serializeAST(*ast); + if (visitor.getNumberOfReplacedParameters()) + query = serializeAST(*ast); + else + query.assign(begin, query_end); } else { @@ -1033,6 +1039,11 @@ static std::tuple executeQueryImpl( } + // InterpreterSelectQueryAnalyzer does not build QueryPlan in the constructor. + // We need to force to build it here to check if we need to ignore quota. + if (auto * interpreter_with_analyzer = dynamic_cast(interpreter.get())) + interpreter_with_analyzer->getQueryPlan(); + if (!interpreter->ignoreQuota() && !quota_checked) { quota = context->getQuota(); diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp index 5bbd2667f55..4cac2f0e20c 100644 --- a/src/Interpreters/inplaceBlockConversions.cpp +++ b/src/Interpreters/inplaceBlockConversions.cpp @@ -306,7 +306,9 @@ void fillMissingColumns( return; size_t level = ISerialization::getArrayLevel(subpath); - assert(level < num_dimensions); + /// It can happen if element of Array is Map. + if (level >= num_dimensions) + return; auto stream_name = ISerialization::getFileNameForStream(*requested_column, subpath); auto it = offsets_columns.find(stream_name); diff --git a/src/Interpreters/loadMetadata.cpp b/src/Interpreters/loadMetadata.cpp index 83af2684322..aeb45c08bea 100644 --- a/src/Interpreters/loadMetadata.cpp +++ b/src/Interpreters/loadMetadata.cpp @@ -45,10 +45,10 @@ namespace ErrorCodes namespace ActionLocks { - extern StorageActionBlockType PartsMerge; - extern StorageActionBlockType PartsFetch; - extern StorageActionBlockType PartsSend; - extern StorageActionBlockType DistributedSend; + extern const StorageActionBlockType PartsMerge; + extern const StorageActionBlockType PartsFetch; + extern const StorageActionBlockType PartsSend; + extern const StorageActionBlockType DistributedSend; } static void executeCreateQuery( @@ -250,6 +250,9 @@ static void loadSystemDatabaseImpl(ContextMutablePtr context, const String & dat { String path = context->getPath() + "metadata/" + database_name; String metadata_file = path + ".sql"; + if (fs::exists(metadata_file + ".tmp")) + fs::remove(metadata_file + ".tmp"); + if (fs::exists(fs::path(metadata_file))) { /// 'has_force_restore_data_flag' is true, to not fail on loading query_log table, if it is corrupted. diff --git a/src/Interpreters/removeOnClusterClauseIfNeeded.cpp b/src/Interpreters/removeOnClusterClauseIfNeeded.cpp new file mode 100644 index 00000000000..7dc452a0fcb --- /dev/null +++ b/src/Interpreters/removeOnClusterClauseIfNeeded.cpp @@ -0,0 +1,59 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + + +static bool isUserDefinedFunctionQuery(const ASTPtr & query) +{ + return query->as() + || query->as(); +} + +static bool isAccessControlQuery(const ASTPtr & query) +{ + return query->as() + || query->as() + || query->as() + || query->as() + || query->as() + || query->as(); +} + +ASTPtr removeOnClusterClauseIfNeeded(const ASTPtr & query, ContextPtr context, const WithoutOnClusterASTRewriteParams & params) +{ + auto * query_on_cluster = dynamic_cast(query.get()); + + if (!query_on_cluster || query_on_cluster->cluster.empty()) + return query; + + if ((isUserDefinedFunctionQuery(query) + && context->getSettings().ignore_on_cluster_for_replicated_udf_queries + && context->getUserDefinedSQLObjectsLoader().isReplicated()) + || (isAccessControlQuery(query) + && context->getSettings().ignore_on_cluster_for_replicated_access_entities_queries + && context->getAccessControl().containsStorage(ReplicatedAccessStorage::STORAGE_TYPE))) + { + LOG_DEBUG(&Poco::Logger::get("removeOnClusterClauseIfNeeded"), "ON CLUSTER clause was ignored for query {}", query->getID()); + return query_on_cluster->getRewrittenASTWithoutOnCluster(params); + } + + return query; +} +} diff --git a/src/Interpreters/removeOnClusterClauseIfNeeded.h b/src/Interpreters/removeOnClusterClauseIfNeeded.h new file mode 100644 index 00000000000..0cbc196c9f2 --- /dev/null +++ b/src/Interpreters/removeOnClusterClauseIfNeeded.h @@ -0,0 +1,12 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +ASTPtr removeOnClusterClauseIfNeeded(const ASTPtr & query_ptr, ContextPtr context, const WithoutOnClusterASTRewriteParams & params = {}); + +} diff --git a/src/Interpreters/tests/gtest_lru_file_cache.cpp b/src/Interpreters/tests/gtest_lru_file_cache.cpp index dab14a66ed7..aba6e0750fa 100644 --- a/src/Interpreters/tests/gtest_lru_file_cache.cpp +++ b/src/Interpreters/tests/gtest_lru_file_cache.cpp @@ -209,7 +209,7 @@ TEST_F(FileCacheTest, get) { std::cerr << "Step 1\n"; - auto cache = DB::FileCache(settings); + auto cache = DB::FileCache("1", settings); cache.initialize(); auto key = cache.createKeyForPath("key1"); @@ -568,7 +568,7 @@ TEST_F(FileCacheTest, get) { /// Test LRUCache::restore(). - auto cache2 = DB::FileCache(settings); + auto cache2 = DB::FileCache("2", settings); cache2.initialize(); auto key = cache2.createKeyForPath("key1"); @@ -587,7 +587,7 @@ TEST_F(FileCacheTest, get) settings2.max_file_segment_size = 10; settings2.base_path = caches_dir / "cache2"; fs::create_directories(settings2.base_path); - auto cache2 = DB::FileCache(settings2); + auto cache2 = DB::FileCache("3", settings2); cache2.initialize(); auto key = cache2.createKeyForPath("key1"); @@ -600,11 +600,10 @@ TEST_F(FileCacheTest, get) std::cerr << "Step 13\n"; { - /// Test delated cleanup + /// Test delayed cleanup - auto cache = FileCache(settings); + auto cache = FileCache("4", settings); cache.initialize(); - cache.cleanup(); const auto key = cache.createKeyForPath("key10"); const auto key_path = cache.getPathInLocalCache(key); @@ -619,21 +618,15 @@ TEST_F(FileCacheTest, get) cache.removeAllReleasable(); ASSERT_EQ(cache.getUsedCacheSize(), 0); - ASSERT_TRUE(fs::exists(key_path)); - ASSERT_TRUE(!fs::exists(cache.getPathInLocalCache(key, 0, FileSegmentKind::Regular))); - - cache.cleanup(); ASSERT_TRUE(!fs::exists(key_path)); - ASSERT_TRUE(!fs::exists(fs::path(key_path).parent_path())); + ASSERT_TRUE(!fs::exists(cache.getPathInLocalCache(key, 0, FileSegmentKind::Regular))); } std::cerr << "Step 14\n"; { /// Test background thread delated cleanup - auto settings2{settings}; - settings2.delayed_cleanup_interval_ms = 0; - auto cache = DB::FileCache(settings2); + auto cache = DB::FileCache("5", settings); cache.initialize(); const auto key = cache.createKeyForPath("key10"); const auto key_path = cache.getPathInLocalCache(key); @@ -662,7 +655,7 @@ TEST_F(FileCacheTest, writeBuffer) settings.max_file_segment_size = 5; settings.base_path = cache_base_path; - FileCache cache(settings); + FileCache cache("6", settings); cache.initialize(); auto write_to_cache = [&cache](const String & key, const Strings & data, bool flush) @@ -767,7 +760,7 @@ TEST_F(FileCacheTest, temporaryData) settings.max_file_segment_size = 1_KiB; settings.base_path = cache_base_path; - DB::FileCache file_cache(settings); + DB::FileCache file_cache("7", settings); file_cache.initialize(); auto tmp_data_scope = std::make_shared(nullptr, &file_cache, 0); @@ -908,7 +901,7 @@ TEST_F(FileCacheTest, CachedReadBuffer) wb->next(); wb->finalize(); - auto cache = std::make_shared(settings); + auto cache = std::make_shared("8", settings); cache->initialize(); auto key = cache->createKeyForPath(file_path); diff --git a/src/Loggers/OwnSplitChannel.cpp b/src/Loggers/OwnSplitChannel.cpp index cdf8402745f..958be928509 100644 --- a/src/Loggers/OwnSplitChannel.cpp +++ b/src/Loggers/OwnSplitChannel.cpp @@ -1,7 +1,6 @@ #include "OwnSplitChannel.h" #include "OwnFormattingChannel.h" -#include #include #include #include @@ -118,7 +117,6 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg) elem.event_time = msg_ext.time_seconds; elem.event_time_microseconds = msg_ext.time_in_microseconds; - elem.microseconds = msg_ext.time_microseconds; elem.thread_name = getThreadName(); elem.thread_id = msg_ext.thread_id; diff --git a/src/Parsers/ASTAlterNamedCollectionQuery.cpp b/src/Parsers/ASTAlterNamedCollectionQuery.cpp index 7e95147ad75..6363a7306bd 100644 --- a/src/Parsers/ASTAlterNamedCollectionQuery.cpp +++ b/src/Parsers/ASTAlterNamedCollectionQuery.cpp @@ -15,6 +15,8 @@ ASTPtr ASTAlterNamedCollectionQuery::clone() const void ASTAlterNamedCollectionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const { settings.ostr << (settings.hilite ? hilite_keyword : "") << "Alter NAMED COLLECTION "; + if (if_exists) + settings.ostr << "IF EXISTS "; settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(collection_name) << (settings.hilite ? hilite_none : ""); formatOnCluster(settings); if (!changes.empty()) diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 61e5903fad5..955320c318c 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -13,7 +13,7 @@ namespace ErrorCodes String ASTAlterCommand::getID(char delim) const { - return String("AlterCommand") + delim + typeToString(type); + return fmt::format("AlterCommand{}{}", delim, type); } ASTPtr ASTAlterCommand::clone() const @@ -80,53 +80,6 @@ ASTPtr ASTAlterCommand::clone() const return res; } -const char * ASTAlterCommand::typeToString(ASTAlterCommand::Type type) -{ - switch (type) - { - case ADD_COLUMN: return "ADD_COLUMN"; - case DROP_COLUMN: return "DROP_COLUMN"; - case MODIFY_COLUMN: return "MODIFY_COLUMN"; - case COMMENT_COLUMN: return "COMMENT_COLUMN"; - case RENAME_COLUMN: return "RENAME_COLUMN"; - case MATERIALIZE_COLUMN: return "MATERIALIZE_COLUMN"; - case MODIFY_ORDER_BY: return "MODIFY_ORDER_BY"; - case MODIFY_SAMPLE_BY: return "MODIFY_SAMPLE_BY"; - case MODIFY_TTL: return "MODIFY_TTL"; - case MATERIALIZE_TTL: return "MATERIALIZE_TTL"; - case MODIFY_SETTING: return "MODIFY_SETTING"; - case RESET_SETTING: return "RESET_SETTING"; - case MODIFY_QUERY: return "MODIFY_QUERY"; - case REMOVE_TTL: return "REMOVE_TTL"; - case REMOVE_SAMPLE_BY: return "REMOVE_SAMPLE_BY"; - case ADD_INDEX: return "ADD_INDEX"; - case DROP_INDEX: return "DROP_INDEX"; - case MATERIALIZE_INDEX: return "MATERIALIZE_INDEX"; - case ADD_CONSTRAINT: return "ADD_CONSTRAINT"; - case DROP_CONSTRAINT: return "DROP_CONSTRAINT"; - case ADD_PROJECTION: return "ADD_PROJECTION"; - case DROP_PROJECTION: return "DROP_PROJECTION"; - case MATERIALIZE_PROJECTION: return "MATERIALIZE_PROJECTION"; - case DROP_PARTITION: return "DROP_PARTITION"; - case DROP_DETACHED_PARTITION: return "DROP_DETACHED_PARTITION"; - case ATTACH_PARTITION: return "ATTACH_PARTITION"; - case MOVE_PARTITION: return "MOVE_PARTITION"; - case REPLACE_PARTITION: return "REPLACE_PARTITION"; - case FETCH_PARTITION: return "FETCH_PARTITION"; - case FREEZE_PARTITION: return "FREEZE_PARTITION"; - case FREEZE_ALL: return "FREEZE_ALL"; - case UNFREEZE_PARTITION: return "UNFREEZE_PARTITION"; - case UNFREEZE_ALL: return "UNFREEZE_ALL"; - case DELETE: return "DELETE"; - case UPDATE: return "UPDATE"; - case NO_TYPE: return "NO_TYPE"; - case LIVE_VIEW_REFRESH: return "LIVE_VIEW_REFRESH"; - case MODIFY_DATABASE_SETTING: return "MODIFY_DATABASE_SETTING"; - case MODIFY_COMMENT: return "MODIFY_COMMENT"; - } - UNREACHABLE(); -} - void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { if (type == ASTAlterCommand::ADD_COLUMN) diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h index 1400113fa9c..30cf0cac4ce 100644 --- a/src/Parsers/ASTAlterQuery.h +++ b/src/Parsers/ASTAlterQuery.h @@ -208,8 +208,6 @@ public: ASTPtr clone() const override; - static const char * typeToString(Type type); - protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; diff --git a/src/Parsers/ASTCreateIndexQuery.cpp b/src/Parsers/ASTCreateIndexQuery.cpp index 0d580d5bb21..17d4b9a9d58 100644 --- a/src/Parsers/ASTCreateIndexQuery.cpp +++ b/src/Parsers/ASTCreateIndexQuery.cpp @@ -38,7 +38,7 @@ void ASTCreateIndexQuery::formatQueryImpl(const FormatSettings & settings, Forma settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str; - settings.ostr << "CREATE INDEX " << (if_not_exists ? "IF NOT EXISTS " : ""); + settings.ostr << "CREATE " << (unique ? "UNIQUE " : "") << "INDEX " << (if_not_exists ? "IF NOT EXISTS " : ""); index_name->formatImpl(settings, state, frame); settings.ostr << " ON "; diff --git a/src/Parsers/ASTCreateIndexQuery.h b/src/Parsers/ASTCreateIndexQuery.h index 424a0e493d9..b7577f2634e 100644 --- a/src/Parsers/ASTCreateIndexQuery.h +++ b/src/Parsers/ASTCreateIndexQuery.h @@ -20,6 +20,7 @@ public: ASTPtr index_decl; bool if_not_exists{false}; + bool unique{false}; String getID(char delim) const override; diff --git a/src/Parsers/ASTCreateNamedCollectionQuery.cpp b/src/Parsers/ASTCreateNamedCollectionQuery.cpp index 97e83541f05..45ef8565148 100644 --- a/src/Parsers/ASTCreateNamedCollectionQuery.cpp +++ b/src/Parsers/ASTCreateNamedCollectionQuery.cpp @@ -18,6 +18,8 @@ ASTPtr ASTCreateNamedCollectionQuery::clone() const void ASTCreateNamedCollectionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const { settings.ostr << (settings.hilite ? hilite_keyword : "") << "CREATE NAMED COLLECTION "; + if (if_not_exists) + settings.ostr << "IF NOT EXISTS "; settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(collection_name) << (settings.hilite ? hilite_none : ""); formatOnCluster(settings); diff --git a/src/Parsers/ASTCreateNamedCollectionQuery.h b/src/Parsers/ASTCreateNamedCollectionQuery.h index 303d1901931..785aeb781b4 100644 --- a/src/Parsers/ASTCreateNamedCollectionQuery.h +++ b/src/Parsers/ASTCreateNamedCollectionQuery.h @@ -13,6 +13,7 @@ class ASTCreateNamedCollectionQuery : public IAST, public ASTQueryWithOnCluster public: std::string collection_name; SettingsChanges changes; + bool if_not_exists = false; String getID(char) const override { return "CreateNamedCollectionQuery"; } diff --git a/src/Parsers/ASTDropNamedCollectionQuery.cpp b/src/Parsers/ASTDropNamedCollectionQuery.cpp index 3b8568cfd70..e317681d33d 100644 --- a/src/Parsers/ASTDropNamedCollectionQuery.cpp +++ b/src/Parsers/ASTDropNamedCollectionQuery.cpp @@ -13,6 +13,8 @@ ASTPtr ASTDropNamedCollectionQuery::clone() const void ASTDropNamedCollectionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const { settings.ostr << (settings.hilite ? hilite_keyword : "") << "DROP NAMED COLLECTION "; + if (if_exists) + settings.ostr << "IF EXISTS "; settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(collection_name) << (settings.hilite ? hilite_none : ""); formatOnCluster(settings); } diff --git a/src/Parsers/ASTIndexDeclaration.h b/src/Parsers/ASTIndexDeclaration.h index 6ed241f75ab..1fbf5e12695 100644 --- a/src/Parsers/ASTIndexDeclaration.h +++ b/src/Parsers/ASTIndexDeclaration.h @@ -14,6 +14,7 @@ class ASTIndexDeclaration : public IAST public: static const auto DEFAULT_INDEX_GRANULARITY = 1uz; static const auto DEFAULT_ANNOY_INDEX_GRANULARITY = 100'000'000uz; + static const auto DEFAULT_USEARCH_INDEX_GRANULARITY = 100'000'000uz; String name; IAST * expr; diff --git a/src/Parsers/ASTLiteral.cpp b/src/Parsers/ASTLiteral.cpp index 5c76f6f33bf..4a9a3d8df5b 100644 --- a/src/Parsers/ASTLiteral.cpp +++ b/src/Parsers/ASTLiteral.cpp @@ -93,7 +93,7 @@ void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr) const void ASTLiteral::appendColumnNameImplLegacy(WriteBuffer & ostr) const { - /// 100 - just arbitrary value. + /// 100 - just arbitrary value. constexpr auto min_elements_for_hashing = 100; /// Special case for very large arrays. Instead of listing all elements, will use hash of them. @@ -118,9 +118,31 @@ void ASTLiteral::appendColumnNameImplLegacy(WriteBuffer & ostr) const } } +/// Use different rules for escaping backslashes and quotes +class FieldVisitorToStringPostgreSQL : public StaticVisitor +{ +public: + template + String operator() (const T & x) const { return visitor(x); } + +private: + FieldVisitorToString visitor; +}; + +template<> +String FieldVisitorToStringPostgreSQL::operator() (const String & x) const +{ + WriteBufferFromOwnString wb; + writeQuotedStringPostgreSQL(x, wb); + return wb.str(); +} + void ASTLiteral::formatImplWithoutAlias(const FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const { - settings.ostr << applyVisitor(FieldVisitorToString(), value); + if (settings.literal_escaping_style == LiteralEscapingStyle::Regular) + settings.ostr << applyVisitor(FieldVisitorToString(), value); + else + settings.ostr << applyVisitor(FieldVisitorToStringPostgreSQL(), value); } } diff --git a/src/Parsers/ASTProjectionSelectQuery.cpp b/src/Parsers/ASTProjectionSelectQuery.cpp index 0cfdc3762a1..90d9ede7337 100644 --- a/src/Parsers/ASTProjectionSelectQuery.cpp +++ b/src/Parsers/ASTProjectionSelectQuery.cpp @@ -73,11 +73,11 @@ void ASTProjectionSelectQuery::formatImpl(const FormatSettings & s, FormatState if (orderBy()) { - /// Let's convert the ASTFunction into ASTExpressionList, which generates consistent format + /// Let's convert tuple ASTFunction into ASTExpressionList, which generates consistent format /// between GROUP BY and ORDER BY projection definition. s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "ORDER BY " << (s.hilite ? hilite_none : ""); ASTPtr order_by; - if (auto * func = orderBy()->as()) + if (auto * func = orderBy()->as(); func && func->name == "tuple") order_by = func->arguments; else { diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp index 9550752b1f3..48b4ae3c38d 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.cpp +++ b/src/Parsers/ASTSelectWithUnionQuery.cpp @@ -5,7 +5,6 @@ #include #include -#include namespace DB { diff --git a/src/Parsers/ASTSetQuery.cpp b/src/Parsers/ASTSetQuery.cpp index 76ad812e713..1b7b76fe231 100644 --- a/src/Parsers/ASTSetQuery.cpp +++ b/src/Parsers/ASTSetQuery.cpp @@ -69,9 +69,9 @@ void ASTSetQuery::appendColumnName(WriteBuffer & ostr) const Hash hash = getTreeHash(); writeCString("__settings_", ostr); - writeText(hash.first, ostr); + writeText(hash.low64, ostr); ostr.write('_'); - writeText(hash.second, ostr); + writeText(hash.high64, ostr); } } diff --git a/src/Parsers/ASTShowTablesQuery.cpp b/src/Parsers/ASTShowTablesQuery.cpp index 7b4e052bc86..03ae96fa288 100644 --- a/src/Parsers/ASTShowTablesQuery.cpp +++ b/src/Parsers/ASTShowTablesQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -10,10 +11,20 @@ ASTPtr ASTShowTablesQuery::clone() const { auto res = std::make_shared(*this); res->children.clear(); + if (from) + res->set(res->from, from->clone()); + cloneOutputOptions(*res); return res; } +String ASTShowTablesQuery::getFrom() const +{ + String name; + tryGetIdentifierNameInto(from, name); + return name; +} + void ASTShowTablesQuery::formatLike(const FormatSettings & settings) const { if (!like.empty()) @@ -72,9 +83,11 @@ void ASTShowTablesQuery::formatQueryImpl(const FormatSettings & settings, Format settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW " << (temporary ? "TEMPORARY " : "") << (dictionaries ? "DICTIONARIES" : "TABLES") << (settings.hilite ? hilite_none : ""); - if (!from.empty()) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : "") - << backQuoteIfNeed(from); + if (from) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : ""); + from->formatImpl(settings, state, frame); + } formatLike(settings); diff --git a/src/Parsers/ASTShowTablesQuery.h b/src/Parsers/ASTShowTablesQuery.h index 2878df54bcc..2fd4c3dac96 100644 --- a/src/Parsers/ASTShowTablesQuery.h +++ b/src/Parsers/ASTShowTablesQuery.h @@ -24,8 +24,9 @@ public: bool caches = false; bool full = false; + IAST * from; + String cluster_str; - String from; String like; bool not_like = false; @@ -38,6 +39,8 @@ public: ASTPtr clone() const override; QueryKind getQueryKind() const override { return QueryKind::Show; } + String getFrom() const; + protected: void formatLike(const FormatSettings & settings) const; void formatLimit(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const; diff --git a/src/Parsers/ASTSubquery.cpp b/src/Parsers/ASTSubquery.cpp index a3408f12330..92adad666ed 100644 --- a/src/Parsers/ASTSubquery.cpp +++ b/src/Parsers/ASTSubquery.cpp @@ -19,11 +19,9 @@ void ASTSubquery::appendColumnNameImpl(WriteBuffer & ostr) const } else { - Hash hash = getTreeHash(); + const auto hash = getTreeHash(); writeCString("__subquery_", ostr); - writeText(hash.first, ostr); - ostr.write('_'); - writeText(hash.second, ostr); + writeString(toString(hash), ostr); } } diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index 754eb825dcc..9be01719d8c 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -162,7 +162,9 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, || type == Type::STOP_REPLICATION_QUEUES || type == Type::START_REPLICATION_QUEUES || type == Type::STOP_DISTRIBUTED_SENDS - || type == Type::START_DISTRIBUTED_SENDS) + || type == Type::START_DISTRIBUTED_SENDS + || type == Type::STOP_PULLING_REPLICATION_LOG + || type == Type::START_PULLING_REPLICATION_LOG) { if (table) print_database_table(); @@ -202,7 +204,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, } else if (type == Type::SUSPEND) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " FOR " + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FOR " << (settings.hilite ? hilite_none : "") << seconds << (settings.hilite ? hilite_keyword : "") << " SECOND" << (settings.hilite ? hilite_none : ""); @@ -210,7 +212,15 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, else if (type == Type::DROP_FILESYSTEM_CACHE) { if (!filesystem_cache_name.empty()) + { settings.ostr << (settings.hilite ? hilite_none : "") << " " << filesystem_cache_name; + if (!key_to_drop.empty()) + { + settings.ostr << (settings.hilite ? hilite_none : "") << " KEY " << key_to_drop; + if (offset_to_drop.has_value()) + settings.ostr << (settings.hilite ? hilite_none : "") << " OFFSET " << offset_to_drop.value(); + } + } } else if (type == Type::UNFREEZE) { @@ -222,12 +232,50 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, } else if (type == Type::START_LISTEN || type == Type::STOP_LISTEN) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " " << ServerType::serverTypeToString(server_type.type) - << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << " " + << ServerType::serverTypeToString(server_type.type) << (settings.hilite ? hilite_none : ""); - if (server_type.type == ServerType::CUSTOM) + if (server_type.type == ServerType::Type::CUSTOM) { - settings.ostr << (settings.hilite ? hilite_identifier : "") << " " << backQuoteIfNeed(server_type.custom_name); + settings.ostr << " " << quoteString(server_type.custom_name); + } + + bool comma = false; + + if (!server_type.exclude_types.empty()) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") + << " EXCEPT" << (settings.hilite ? hilite_none : ""); + + for (auto cur_type : server_type.exclude_types) + { + if (cur_type == ServerType::Type::CUSTOM) + continue; + + if (comma) + settings.ostr << ","; + else + comma = true; + + settings.ostr << (settings.hilite ? hilite_keyword : "") << " " + << ServerType::serverTypeToString(cur_type) << (settings.hilite ? hilite_none : ""); + } + + if (server_type.exclude_types.contains(ServerType::Type::CUSTOM)) + { + for (const auto & cur_name : server_type.exclude_custom_names) + { + if (comma) + settings.ostr << ","; + else + comma = true; + + settings.ostr << (settings.hilite ? hilite_keyword : "") << " " + << ServerType::serverTypeToString(ServerType::Type::CUSTOM) << (settings.hilite ? hilite_none : ""); + + settings.ostr << " " << quoteString(cur_name); + } + } } } diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index ebaf357c0ab..9d15e7cacee 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -80,6 +80,8 @@ public: UNFREEZE, ENABLE_FAILPOINT, DISABLE_FAILPOINT, + STOP_PULLING_REPLICATION_LOG, + START_PULLING_REPLICATION_LOG, END }; @@ -108,6 +110,8 @@ public: UInt64 seconds{}; String filesystem_cache_name; + std::string key_to_drop; + std::optional offset_to_drop; String backup_name; diff --git a/src/Parsers/ASTUseQuery.h b/src/Parsers/ASTUseQuery.h index f767a6bbdb7..873a316e653 100644 --- a/src/Parsers/ASTUseQuery.h +++ b/src/Parsers/ASTUseQuery.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -14,19 +15,34 @@ namespace DB class ASTUseQuery : public IAST { public: - String database; + IAST * database; + + String getDatabase() const + { + String name; + tryGetIdentifierNameInto(database, name); + return name; + } /** Get the text that identifies this element. */ - String getID(char delim) const override { return "UseQuery" + (delim + database); } + String getID(char delim) const override { return "UseQuery" + (delim + getDatabase()); } - ASTPtr clone() const override { return std::make_shared(*this); } + ASTPtr clone() const override + { + auto res = std::make_shared(*this); + res->children.clear(); + if (database) + res->set(res->database, database->clone()); + return res; + } QueryKind getQueryKind() const override { return QueryKind::Use; } protected: - void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "USE " << (settings.hilite ? hilite_none : "") << backQuoteIfNeed(database); + settings.ostr << (settings.hilite ? hilite_keyword : "") << "USE " << (settings.hilite ? hilite_none : ""); + database->formatImpl(settings, state, frame); } }; diff --git a/src/Parsers/Access/ASTCreateQuotaQuery.cpp b/src/Parsers/Access/ASTCreateQuotaQuery.cpp index 56abedf5235..091b62b0a9f 100644 --- a/src/Parsers/Access/ASTCreateQuotaQuery.cpp +++ b/src/Parsers/Access/ASTCreateQuotaQuery.cpp @@ -170,6 +170,12 @@ void ASTCreateQuotaQuery::formatImpl(const FormatSettings & settings, FormatStat settings.ostr << (settings.hilite ? hilite_keyword : "") << " OR REPLACE" << (settings.hilite ? hilite_none : ""); formatNames(names, settings); + + if (!storage_name.empty()) + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") + << " IN " << (settings.hilite ? IAST::hilite_none : "") + << backQuoteIfNeed(storage_name); + formatOnCluster(settings); if (!new_name.empty()) diff --git a/src/Parsers/Access/ASTCreateQuotaQuery.h b/src/Parsers/Access/ASTCreateQuotaQuery.h index f5eb59800ec..aecbbb00f9a 100644 --- a/src/Parsers/Access/ASTCreateQuotaQuery.h +++ b/src/Parsers/Access/ASTCreateQuotaQuery.h @@ -38,6 +38,7 @@ public: Strings names; String new_name; std::optional key_type; + String storage_name; struct Limits { diff --git a/src/Parsers/Access/ASTCreateRoleQuery.cpp b/src/Parsers/Access/ASTCreateRoleQuery.cpp index d624b9a9157..eeeb34c97e4 100644 --- a/src/Parsers/Access/ASTCreateRoleQuery.cpp +++ b/src/Parsers/Access/ASTCreateRoleQuery.cpp @@ -71,6 +71,12 @@ void ASTCreateRoleQuery::formatImpl(const FormatSettings & format, FormatState & format.ostr << (format.hilite ? hilite_keyword : "") << " OR REPLACE" << (format.hilite ? hilite_none : ""); formatNames(names, format); + + if (!storage_name.empty()) + format.ostr << (format.hilite ? IAST::hilite_keyword : "") + << " IN " << (format.hilite ? IAST::hilite_none : "") + << backQuoteIfNeed(storage_name); + formatOnCluster(format); if (!new_name.empty()) diff --git a/src/Parsers/Access/ASTCreateRoleQuery.h b/src/Parsers/Access/ASTCreateRoleQuery.h index 42d1a4031b6..4e465553164 100644 --- a/src/Parsers/Access/ASTCreateRoleQuery.h +++ b/src/Parsers/Access/ASTCreateRoleQuery.h @@ -28,6 +28,7 @@ public: Strings names; String new_name; + String storage_name; std::shared_ptr settings; diff --git a/src/Parsers/Access/ASTCreateRowPolicyQuery.cpp b/src/Parsers/Access/ASTCreateRowPolicyQuery.cpp index ca888be2cfe..e95959703ee 100644 --- a/src/Parsers/Access/ASTCreateRowPolicyQuery.cpp +++ b/src/Parsers/Access/ASTCreateRowPolicyQuery.cpp @@ -168,6 +168,11 @@ void ASTCreateRowPolicyQuery::formatImpl(const FormatSettings & settings, Format settings.ostr << " "; names->format(settings); + if (!storage_name.empty()) + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") + << " IN " << (settings.hilite ? IAST::hilite_none : "") + << backQuoteIfNeed(storage_name); + formatOnCluster(settings); assert(names->cluster.empty()); diff --git a/src/Parsers/Access/ASTCreateRowPolicyQuery.h b/src/Parsers/Access/ASTCreateRowPolicyQuery.h index 32d98fab822..3f2418e7287 100644 --- a/src/Parsers/Access/ASTCreateRowPolicyQuery.h +++ b/src/Parsers/Access/ASTCreateRowPolicyQuery.h @@ -35,6 +35,7 @@ public: bool if_exists = false; bool if_not_exists = false; bool or_replace = false; + String storage_name; std::shared_ptr names; String new_short_name; diff --git a/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp b/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp index 56ddef433ef..3379486d789 100644 --- a/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp +++ b/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp @@ -81,6 +81,12 @@ void ASTCreateSettingsProfileQuery::formatImpl(const FormatSettings & format, Fo format.ostr << (format.hilite ? hilite_keyword : "") << " OR REPLACE" << (format.hilite ? hilite_none : ""); formatNames(names, format); + + if (!storage_name.empty()) + format.ostr << (format.hilite ? IAST::hilite_keyword : "") + << " IN " << (format.hilite ? IAST::hilite_none : "") + << backQuoteIfNeed(storage_name); + formatOnCluster(format); if (!new_name.empty()) diff --git a/src/Parsers/Access/ASTCreateSettingsProfileQuery.h b/src/Parsers/Access/ASTCreateSettingsProfileQuery.h index c1a64998f29..be01aae1e26 100644 --- a/src/Parsers/Access/ASTCreateSettingsProfileQuery.h +++ b/src/Parsers/Access/ASTCreateSettingsProfileQuery.h @@ -28,6 +28,7 @@ public: bool if_exists = false; bool if_not_exists = false; bool or_replace = false; + String storage_name; Strings names; String new_name; diff --git a/src/Parsers/Access/ASTCreateUserQuery.cpp b/src/Parsers/Access/ASTCreateUserQuery.cpp index 0611545adf0..b7290c16013 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.cpp +++ b/src/Parsers/Access/ASTCreateUserQuery.cpp @@ -208,6 +208,11 @@ void ASTCreateUserQuery::formatImpl(const FormatSettings & format, FormatState & format.ostr << " "; names->format(format); + if (!storage_name.empty()) + format.ostr << (format.hilite ? IAST::hilite_keyword : "") + << " IN " << (format.hilite ? IAST::hilite_none : "") + << backQuoteIfNeed(storage_name); + formatOnCluster(format); if (new_name) diff --git a/src/Parsers/Access/ASTCreateUserQuery.h b/src/Parsers/Access/ASTCreateUserQuery.h index 62ddbfd0040..06940e67d91 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.h +++ b/src/Parsers/Access/ASTCreateUserQuery.h @@ -45,6 +45,7 @@ public: std::shared_ptr names; std::optional new_name; + String storage_name; std::shared_ptr auth_data; diff --git a/src/Parsers/Access/ASTDropAccessEntityQuery.cpp b/src/Parsers/Access/ASTDropAccessEntityQuery.cpp index 88f2d7bce63..bcd7105d0e9 100644 --- a/src/Parsers/Access/ASTDropAccessEntityQuery.cpp +++ b/src/Parsers/Access/ASTDropAccessEntityQuery.cpp @@ -53,6 +53,11 @@ void ASTDropAccessEntityQuery::formatImpl(const FormatSettings & settings, Forma else formatNames(names, settings); + if (!storage_name.empty()) + settings.ostr << (settings.hilite ? hilite_keyword : "") + << " FROM " << (settings.hilite ? hilite_none : "") + << backQuoteIfNeed(storage_name); + formatOnCluster(settings); } diff --git a/src/Parsers/Access/ASTDropAccessEntityQuery.h b/src/Parsers/Access/ASTDropAccessEntityQuery.h index f3a065c50df..32f4a8f8047 100644 --- a/src/Parsers/Access/ASTDropAccessEntityQuery.h +++ b/src/Parsers/Access/ASTDropAccessEntityQuery.h @@ -21,6 +21,7 @@ public: AccessEntityType type; bool if_exists = false; Strings names; + String storage_name; std::shared_ptr row_policy_names; String getID(char) const override; diff --git a/src/Parsers/Access/ASTMoveAccessEntityQuery.cpp b/src/Parsers/Access/ASTMoveAccessEntityQuery.cpp new file mode 100644 index 00000000000..285f07854c0 --- /dev/null +++ b/src/Parsers/Access/ASTMoveAccessEntityQuery.cpp @@ -0,0 +1,64 @@ +#include +#include +#include +#include + + +namespace DB +{ +namespace +{ + void formatNames(const Strings & names, const IAST::FormatSettings & settings) + { + bool need_comma = false; + for (const auto & name : names) + { + if (std::exchange(need_comma, true)) + settings.ostr << ','; + settings.ostr << ' ' << backQuoteIfNeed(name); + } + } +} + +String ASTMoveAccessEntityQuery::getID(char) const +{ + return String("MOVE ") + toString(type) + " query"; +} + +ASTPtr ASTMoveAccessEntityQuery::clone() const +{ + auto res = std::make_shared(*this); + + if (row_policy_names) + res->row_policy_names = std::static_pointer_cast(row_policy_names->clone()); + + return res; +} + +void ASTMoveAccessEntityQuery::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : "") + << "MOVE " << AccessEntityTypeInfo::get(type).name + << (settings.hilite ? hilite_none : ""); + + if (type == AccessEntityType::ROW_POLICY) + { + settings.ostr << " "; + row_policy_names->format(settings); + } + else + formatNames(names, settings); + + settings.ostr << (settings.hilite ? hilite_keyword : "") + << " TO " << (settings.hilite ? hilite_none : "") + << backQuoteIfNeed(storage_name); + + formatOnCluster(settings); +} + +void ASTMoveAccessEntityQuery::replaceEmptyDatabase(const String & current_database) const +{ + if (row_policy_names) + row_policy_names->replaceEmptyDatabase(current_database); +} +} diff --git a/src/Parsers/Access/ASTMoveAccessEntityQuery.h b/src/Parsers/Access/ASTMoveAccessEntityQuery.h new file mode 100644 index 00000000000..aa2b3b0f98c --- /dev/null +++ b/src/Parsers/Access/ASTMoveAccessEntityQuery.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ +class ASTRowPolicyNames; + +/** MOVE {USER | ROLE | QUOTA | [ROW] POLICY | [SETTINGS] PROFILE} [IF EXISTS] name [,...] [ON [database.]table [,...]] TO storage_name + */ +class ASTMoveAccessEntityQuery : public IAST, public ASTQueryWithOnCluster +{ +public: + AccessEntityType type; + Strings names; + std::shared_ptr row_policy_names; + + String storage_name; + + String getID(char) const override; + ASTPtr clone() const override; + void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; + ASTPtr getRewrittenASTWithoutOnCluster(const WithoutOnClusterASTRewriteParams &) const override { return removeOnCluster(clone()); } + + void replaceEmptyDatabase(const String & current_database) const; + + QueryKind getQueryKind() const override { return QueryKind::Move; } +}; +} diff --git a/src/Parsers/Access/ParserCreateQuotaQuery.cpp b/src/Parsers/Access/ParserCreateQuotaQuery.cpp index 2330ba7771c..bc3512e3fc0 100644 --- a/src/Parsers/Access/ParserCreateQuotaQuery.cpp +++ b/src/Parsers/Access/ParserCreateQuotaQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -108,7 +109,8 @@ namespace }); } - template || std::is_same_v>> + template + requires std::same_as || std::same_as T fieldToNumber(const Field & f) { if (f.getType() == Field::Types::String) @@ -288,6 +290,7 @@ bool ParserCreateQuotaQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe std::optional key_type; std::vector all_limits; String cluster; + String storage_name; while (true) { @@ -310,6 +313,9 @@ bool ParserCreateQuotaQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (cluster.empty() && parseOnCluster(pos, expected, cluster)) continue; + if (storage_name.empty() && ParserKeyword{"IN"}.ignore(pos, expected) && parseAccessStorageName(pos, expected, storage_name)) + continue; + break; } @@ -332,6 +338,7 @@ bool ParserCreateQuotaQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe query->key_type = key_type; query->all_limits = std::move(all_limits); query->roles = std::move(roles); + query->storage_name = std::move(storage_name); return true; } diff --git a/src/Parsers/Access/ParserCreateRoleQuery.cpp b/src/Parsers/Access/ParserCreateRoleQuery.cpp index da9749958ee..99a97f6901d 100644 --- a/src/Parsers/Access/ParserCreateRoleQuery.cpp +++ b/src/Parsers/Access/ParserCreateRoleQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -91,6 +92,7 @@ bool ParserCreateRoleQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec String new_name; std::shared_ptr settings; String cluster; + String storage_name; while (true) { @@ -110,6 +112,9 @@ bool ParserCreateRoleQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (cluster.empty() && parseOnCluster(pos, expected, cluster)) continue; + if (storage_name.empty() && ParserKeyword{"IN"}.ignore(pos, expected) && parseAccessStorageName(pos, expected, storage_name)) + continue; + break; } @@ -125,6 +130,7 @@ bool ParserCreateRoleQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec query->names = std::move(names); query->new_name = std::move(new_name); query->settings = std::move(settings); + query->storage_name = std::move(storage_name); return true; } diff --git a/src/Parsers/Access/ParserCreateRowPolicyQuery.cpp b/src/Parsers/Access/ParserCreateRowPolicyQuery.cpp index 2c25fc14e7d..f9e9466e35d 100644 --- a/src/Parsers/Access/ParserCreateRowPolicyQuery.cpp +++ b/src/Parsers/Access/ParserCreateRowPolicyQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -245,6 +246,7 @@ bool ParserCreateRowPolicyQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & String new_short_name; std::optional is_restrictive; std::vector> filters; + String storage_name; while (true) { @@ -271,6 +273,9 @@ bool ParserCreateRowPolicyQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & if (cluster.empty() && parseOnCluster(pos, expected, cluster)) continue; + if (storage_name.empty() && ParserKeyword{"IN"}.ignore(pos, expected) && parseAccessStorageName(pos, expected, storage_name)) + continue; + break; } @@ -294,6 +299,7 @@ bool ParserCreateRowPolicyQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & query->is_restrictive = is_restrictive; query->filters = std::move(filters); query->roles = std::move(roles); + query->storage_name = std::move(storage_name); return true; } diff --git a/src/Parsers/Access/ParserCreateSettingsProfileQuery.cpp b/src/Parsers/Access/ParserCreateSettingsProfileQuery.cpp index c58a3035dc6..fcb6ebd8806 100644 --- a/src/Parsers/Access/ParserCreateSettingsProfileQuery.cpp +++ b/src/Parsers/Access/ParserCreateSettingsProfileQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -111,6 +112,7 @@ bool ParserCreateSettingsProfileQuery::parseImpl(Pos & pos, ASTPtr & node, Expec String new_name; std::shared_ptr settings; String cluster; + String storage_name; while (true) { @@ -130,6 +132,9 @@ bool ParserCreateSettingsProfileQuery::parseImpl(Pos & pos, ASTPtr & node, Expec if (cluster.empty() && parseOnCluster(pos, expected, cluster)) continue; + if (storage_name.empty() && ParserKeyword{"IN"}.ignore(pos, expected) && parseAccessStorageName(pos, expected, storage_name)) + continue; + break; } @@ -152,6 +157,7 @@ bool ParserCreateSettingsProfileQuery::parseImpl(Pos & pos, ASTPtr & node, Expec query->new_name = std::move(new_name); query->settings = std::move(settings); query->to_roles = std::move(to_roles); + query->storage_name = std::move(storage_name); return true; } diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index 0344fb99c04..577b6772ab2 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -414,6 +415,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec std::shared_ptr grantees; std::shared_ptr default_database; String cluster; + String storage_name; while (true) { @@ -480,6 +482,9 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec } } + if (storage_name.empty() && ParserKeyword{"IN"}.ignore(pos, expected) && parseAccessStorageName(pos, expected, storage_name)) + continue; + break; } @@ -514,6 +519,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec query->settings = std::move(settings); query->grantees = std::move(grantees); query->default_database = std::move(default_database); + query->storage_name = std::move(storage_name); if (query->auth_data) query->children.push_back(query->auth_data); diff --git a/src/Parsers/Access/ParserDropAccessEntityQuery.cpp b/src/Parsers/Access/ParserDropAccessEntityQuery.cpp index d21164bc1a2..7fd34a6f8e4 100644 --- a/src/Parsers/Access/ParserDropAccessEntityQuery.cpp +++ b/src/Parsers/Access/ParserDropAccessEntityQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -53,6 +54,7 @@ bool ParserDropAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & Strings names; std::shared_ptr row_policy_names; + String storage_name; String cluster; if ((type == AccessEntityType::USER) || (type == AccessEntityType::ROLE)) @@ -76,6 +78,9 @@ bool ParserDropAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & return false; } + if (ParserKeyword{"FROM"}.ignore(pos, expected)) + parseAccessStorageName(pos, expected, storage_name); + if (cluster.empty()) parseOnCluster(pos, expected, cluster); @@ -87,6 +92,7 @@ bool ParserDropAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & query->cluster = std::move(cluster); query->names = std::move(names); query->row_policy_names = std::move(row_policy_names); + query->storage_name = std::move(storage_name); return true; } diff --git a/src/Parsers/Access/ParserMoveAccessEntityQuery.cpp b/src/Parsers/Access/ParserMoveAccessEntityQuery.cpp new file mode 100644 index 00000000000..798e200bfb3 --- /dev/null +++ b/src/Parsers/Access/ParserMoveAccessEntityQuery.cpp @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace +{ + bool parseEntityType(IParserBase::Pos & pos, Expected & expected, AccessEntityType & type) + { + for (auto i : collections::range(AccessEntityType::MAX)) + { + const auto & type_info = AccessEntityTypeInfo::get(i); + if (ParserKeyword{type_info.name}.ignore(pos, expected) + || (!type_info.alias.empty() && ParserKeyword{type_info.alias}.ignore(pos, expected))) + { + type = i; + return true; + } + } + return false; + } + + + bool parseOnCluster(IParserBase::Pos & pos, Expected & expected, String & cluster) + { + return IParserBase::wrapParseImpl(pos, [&] + { + return ParserKeyword{"ON"}.ignore(pos, expected) && ASTQueryWithOnCluster::parse(pos, cluster, expected); + }); + } +} + + +bool ParserMoveAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (!ParserKeyword{"MOVE"}.ignore(pos, expected)) + return false; + + AccessEntityType type; + if (!parseEntityType(pos, expected, type)) + return false; + + Strings names; + std::shared_ptr row_policy_names; + String storage_name; + String cluster; + + if ((type == AccessEntityType::USER) || (type == AccessEntityType::ROLE)) + { + if (!parseUserNames(pos, expected, names)) + return false; + } + else if (type == AccessEntityType::ROW_POLICY) + { + ParserRowPolicyNames parser; + ASTPtr ast; + parser.allowOnCluster(); + if (!parser.parse(pos, ast, expected)) + return false; + row_policy_names = typeid_cast>(ast); + cluster = std::exchange(row_policy_names->cluster, ""); + } + else + { + if (!parseIdentifiersOrStringLiterals(pos, expected, names)) + return false; + } + + if (!ParserKeyword{"TO"}.ignore(pos, expected) || !parseAccessStorageName(pos, expected, storage_name)) + return false; + + if (cluster.empty()) + parseOnCluster(pos, expected, cluster); + + auto query = std::make_shared(); + node = query; + + query->type = type; + query->cluster = std::move(cluster); + query->names = std::move(names); + query->row_policy_names = std::move(row_policy_names); + query->storage_name = std::move(storage_name); + + return true; +} +} diff --git a/src/Parsers/Access/ParserMoveAccessEntityQuery.h b/src/Parsers/Access/ParserMoveAccessEntityQuery.h new file mode 100644 index 00000000000..3accec883dd --- /dev/null +++ b/src/Parsers/Access/ParserMoveAccessEntityQuery.h @@ -0,0 +1,17 @@ +#pragma once + +#include + + +namespace DB +{ +/** Parses queries like + * MOVE {USER | ROLE | QUOTA | [ROW] POLICY | [SETTINGS] PROFILE} [IF EXISTS] name [,...] [ON [database.]table [,...]] TO storage_name + */ +class ParserMoveAccessEntityQuery : public IParserBase +{ +protected: + const char * getName() const override { return "MOVE access entity query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; +} diff --git a/src/Parsers/Access/parseUserName.h b/src/Parsers/Access/parseUserName.h index fbad78e7bce..10c548d8826 100644 --- a/src/Parsers/Access/parseUserName.h +++ b/src/Parsers/Access/parseUserName.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 0149526da79..587908eb49c 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -243,6 +243,38 @@ bool ParserIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } +bool ParserTableAsStringLiteralIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (pos->type != TokenType::StringLiteral) + return false; + + ReadBufferFromMemory in(pos->begin, pos->size()); + String s; + + if (!tryReadQuotedStringInto(s, in)) + { + expected.add(pos, "string literal"); + return false; + } + + if (in.count() != pos->size()) + { + expected.add(pos, "string literal"); + return false; + } + + if (s.empty()) + { + expected.add(pos, "non-empty string literal"); + return false; + } + + node = std::make_shared(s); + ++pos; + return true; +} + + bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr id_list; diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index f33f2d99f71..2d06b26c6dc 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -34,6 +34,19 @@ protected: }; +/** An identifier for tables written as string literal, for example, 'mytable.avro' + */ +class ParserTableAsStringLiteralIdentifier : public IParserBase +{ +public: + explicit ParserTableAsStringLiteralIdentifier() {} + +protected: + const char * getName() const override { return "string literal table identifier"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + /** An identifier, possibly containing a dot, for example, x_yz123 or `something special` or Hits.EventTime, * possibly with UUID clause like `db name`.`table name` UUID 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx' */ diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp index bf4d6fc9dec..a494a528cd2 100644 --- a/src/Parsers/IAST.cpp +++ b/src/Parsers/IAST.cpp @@ -118,9 +118,7 @@ IAST::Hash IAST::getTreeHash() const { SipHash hash_state; updateTreeHash(hash_state); - IAST::Hash res; - hash_state.get128(res); - return res; + return getSipHash128AsPair(hash_state); } diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index d217876459f..812fd082476 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -78,7 +79,7 @@ public: /** Get hash code, identifying this element and its subtree. */ - using Hash = std::pair; + using Hash = CityHash_v1_0_2::uint128; Hash getTreeHash() const; void updateTreeHash(SipHash & hash_state) const; virtual void updateTreeHashImpl(SipHash & hash_state) const; @@ -197,6 +198,7 @@ public: IdentifierQuotingStyle identifier_quoting_style; bool show_secrets; /// Show secret parts of the AST (e.g. passwords, encryption keys). char nl_or_ws; /// Newline or whitespace. + LiteralEscapingStyle literal_escaping_style; explicit FormatSettings( WriteBuffer & ostr_, @@ -204,7 +206,8 @@ public: bool hilite_ = false, bool always_quote_identifiers_ = false, IdentifierQuotingStyle identifier_quoting_style_ = IdentifierQuotingStyle::Backticks, - bool show_secrets_ = true) + bool show_secrets_ = true, + LiteralEscapingStyle literal_escaping_style_ = LiteralEscapingStyle::Regular) : ostr(ostr_) , one_line(one_line_) , hilite(hilite_) @@ -212,6 +215,7 @@ public: , identifier_quoting_style(identifier_quoting_style_) , show_secrets(show_secrets_) , nl_or_ws(one_line ? ' ' : '\n') + , literal_escaping_style(literal_escaping_style_) { } @@ -223,6 +227,7 @@ public: , identifier_quoting_style(other.identifier_quoting_style) , show_secrets(other.show_secrets) , nl_or_ws(other.nl_or_ws) + , literal_escaping_style(other.literal_escaping_style) { } @@ -302,6 +307,7 @@ public: Alter, Grant, Revoke, + Move, System, Set, Use, diff --git a/src/Parsers/LiteralEscapingStyle.h b/src/Parsers/LiteralEscapingStyle.h new file mode 100644 index 00000000000..10d4d84a85d --- /dev/null +++ b/src/Parsers/LiteralEscapingStyle.h @@ -0,0 +1,14 @@ +#pragma once + + +namespace DB +{ + +/// Method to escape single quotes. +enum class LiteralEscapingStyle +{ + Regular, /// Escape backslashes with backslash (\\) and quotes with backslash (\') + PostgreSQL, /// Do not escape backslashes (\), escape quotes with quote ('') +}; + +} diff --git a/src/Parsers/ParserAlterNamedCollectionQuery.cpp b/src/Parsers/ParserAlterNamedCollectionQuery.cpp index 9108747ad82..8fb84f86246 100644 --- a/src/Parsers/ParserAlterNamedCollectionQuery.cpp +++ b/src/Parsers/ParserAlterNamedCollectionQuery.cpp @@ -13,8 +13,9 @@ bool ParserAlterNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & nod { ParserKeyword s_alter("ALTER"); ParserKeyword s_collection("NAMED COLLECTION"); + ParserKeyword s_if_exists("IF EXISTS"); + ParserKeyword s_on("ON"); ParserKeyword s_delete("DELETE"); - ParserIdentifier name_p; ParserSetQuery set_p; ParserToken s_comma(TokenType::Comma); @@ -32,10 +33,13 @@ bool ParserAlterNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & nod if (!s_collection.ignore(pos, expected)) return false; + if (s_if_exists.ignore(pos, expected)) + if_exists = true; + if (!name_p.parse(pos, collection_name, expected)) return false; - if (ParserKeyword{"ON"}.ignore(pos, expected)) + if (s_on.ignore(pos, expected)) { if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) return false; diff --git a/src/Parsers/ParserCreateIndexQuery.cpp b/src/Parsers/ParserCreateIndexQuery.cpp index d2ae7f972b7..81954e3c247 100644 --- a/src/Parsers/ParserCreateIndexQuery.cpp +++ b/src/Parsers/ParserCreateIndexQuery.cpp @@ -66,6 +66,8 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected { if (index->type && index->type->name == "annoy") index->granularity = ASTIndexDeclaration::DEFAULT_ANNOY_INDEX_GRANULARITY; + else if (index->type && index->type->name == "usearch") + index->granularity = ASTIndexDeclaration::DEFAULT_USEARCH_INDEX_GRANULARITY; else index->granularity = ASTIndexDeclaration::DEFAULT_INDEX_GRANULARITY; } @@ -80,6 +82,7 @@ bool ParserCreateIndexQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expect node = query; ParserKeyword s_create("CREATE"); + ParserKeyword s_unique("UNIQUE"); ParserKeyword s_index("INDEX"); ParserKeyword s_if_not_exists("IF NOT EXISTS"); ParserKeyword s_on("ON"); @@ -91,10 +94,14 @@ bool ParserCreateIndexQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expect String cluster_str; bool if_not_exists = false; + bool unique = false; if (!s_create.ignore(pos, expected)) return false; + if (s_unique.ignore(pos, expected)) + unique = true; + if (!s_index.ignore(pos, expected)) return false; @@ -131,6 +138,7 @@ bool ParserCreateIndexQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expect query->children.push_back(index_decl); query->if_not_exists = if_not_exists; + query->unique = unique; query->cluster = cluster_str; if (query->database) diff --git a/src/Parsers/ParserCreateIndexQuery.h b/src/Parsers/ParserCreateIndexQuery.h index 3cb91cd03c6..701586d6e11 100644 --- a/src/Parsers/ParserCreateIndexQuery.h +++ b/src/Parsers/ParserCreateIndexQuery.h @@ -6,7 +6,7 @@ namespace DB { /** Query like this: - * CREATE INDEX [IF NOT EXISTS] name ON [db].name (expression) TYPE type GRANULARITY value + * CREATE [UNIQUE] INDEX [IF NOT EXISTS] name ON [db].name (expression) TYPE type GRANULARITY value */ class ParserCreateIndexQuery : public IParserBase diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 415d3321eb5..9e40e031c51 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -148,6 +148,8 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe { if (index->type->name == "annoy") index->granularity = ASTIndexDeclaration::DEFAULT_ANNOY_INDEX_GRANULARITY; + else if (index->type->name == "usearch") + index->granularity = ASTIndexDeclaration::DEFAULT_USEARCH_INDEX_GRANULARITY; else index->granularity = ASTIndexDeclaration::DEFAULT_INDEX_GRANULARITY; } @@ -502,7 +504,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe ParserKeyword s_temporary("TEMPORARY"); ParserKeyword s_table("TABLE"); ParserKeyword s_if_not_exists("IF NOT EXISTS"); - ParserCompoundIdentifier table_name_p(true, true); + ParserCompoundIdentifier table_name_p(/*table_name_with_optional_uuid*/ true, /*allow_query_parameter*/ true); ParserKeyword s_from("FROM"); ParserKeyword s_on("ON"); ParserToken s_dot(TokenType::Dot); @@ -740,7 +742,7 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e ParserKeyword s_create("CREATE"); ParserKeyword s_attach("ATTACH"); ParserKeyword s_if_not_exists("IF NOT EXISTS"); - ParserCompoundIdentifier table_name_p(true, true); + ParserCompoundIdentifier table_name_p(/*table_name_with_optional_uuid*/ true, /*allow_query_parameter*/ true); ParserKeyword s_as("AS"); ParserKeyword s_view("VIEW"); ParserKeyword s_live("LIVE"); @@ -878,7 +880,7 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ParserKeyword s_temporary("TEMPORARY"); ParserKeyword s_attach("ATTACH"); ParserKeyword s_if_not_exists("IF NOT EXISTS"); - ParserCompoundIdentifier table_name_p(true); + ParserCompoundIdentifier table_name_p(/*table_name_with_optional_uuid*/ true, /*allow_query_parameter*/ true); ParserKeyword s_as("AS"); ParserKeyword s_view("VIEW"); ParserKeyword s_window("WINDOW"); @@ -1015,12 +1017,17 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & query->if_not_exists = if_not_exists; query->is_window_view = true; - StorageID table_id = table->as()->getTableId(); - query->setDatabase(table_id.database_name); - query->setTable(table_id.table_name); - query->uuid = table_id.uuid; + auto * table_id = table->as(); + query->database = table_id->getDatabase(); + query->table = table_id->getTable(); + query->uuid = table_id->uuid; query->cluster = cluster_str; + if (query->database) + query->children.push_back(query->database); + if (query->table) + query->children.push_back(query->table); + if (to_table) query->to_table_id = to_table->as()->getTableId(); @@ -1265,7 +1272,8 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserKeyword s_create("CREATE"); ParserKeyword s_attach("ATTACH"); ParserKeyword s_if_not_exists("IF NOT EXISTS"); - ParserCompoundIdentifier table_name_p(true, true); + ParserCompoundIdentifier table_name_p(/*table_name_with_optional_uuid*/ true, /*allow_query_parameter*/ true); + ParserCompoundIdentifier to_table_name_p(/*table_name_with_optional_uuid*/ true, /*allow_query_parameter*/ false); ParserKeyword s_as("AS"); ParserKeyword s_view("VIEW"); ParserKeyword s_materialized("MATERIALIZED"); @@ -1421,15 +1429,17 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec bool ParserCreateNamedCollectionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_create("CREATE"); - ParserKeyword s_attach("ATTACH"); ParserKeyword s_named_collection("NAMED COLLECTION"); + ParserKeyword s_if_not_exists("IF NOT EXISTS"); + ParserKeyword s_on("ON"); ParserKeyword s_as("AS"); - - ParserToken s_comma(TokenType::Comma); ParserIdentifier name_p; + ParserToken s_comma(TokenType::Comma); + + String cluster_str; + bool if_not_exists = false; ASTPtr collection_name; - String cluster_str; if (!s_create.ignore(pos, expected)) return false; @@ -1437,10 +1447,13 @@ bool ParserCreateNamedCollectionQuery::parseImpl(Pos & pos, ASTPtr & node, Expec if (!s_named_collection.ignore(pos, expected)) return false; + if (s_if_not_exists.ignore(pos, expected)) + if_not_exists = true; + if (!name_p.parse(pos, collection_name, expected)) return false; - if (ParserKeyword{"ON"}.ignore(pos, expected)) + if (s_on.ignore(pos, expected)) { if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) return false; @@ -1465,7 +1478,9 @@ bool ParserCreateNamedCollectionQuery::parseImpl(Pos & pos, ASTPtr & node, Expec auto query = std::make_shared(); tryGetIdentifierNameInto(collection_name, query->collection_name); + query->if_not_exists = if_not_exists; query->changes = changes; + query->cluster = std::move(cluster_str); node = query; return true; @@ -1480,7 +1495,7 @@ bool ParserCreateDictionaryQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, E ParserKeyword s_dictionary("DICTIONARY"); ParserKeyword s_if_not_exists("IF NOT EXISTS"); ParserKeyword s_on("ON"); - ParserCompoundIdentifier dict_name_p(true, true); + ParserCompoundIdentifier dict_name_p(/*table_name_with_optional_uuid*/ true, /*allow_query_parameter*/ true); ParserToken s_left_paren(TokenType::OpeningRoundBracket); ParserToken s_right_paren(TokenType::ClosingRoundBracket); ParserToken s_dot(TokenType::Dot); diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 09935e2b608..0a98923436c 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -548,6 +548,7 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +/// CREATE NAMED COLLECTION name [ON CLUSTER cluster] class ParserCreateNamedCollectionQuery : public IParserBase { protected: diff --git a/src/Parsers/ParserDropNamedCollectionQuery.cpp b/src/Parsers/ParserDropNamedCollectionQuery.cpp index 1ea8aa6d75d..b0b010b5ef6 100644 --- a/src/Parsers/ParserDropNamedCollectionQuery.cpp +++ b/src/Parsers/ParserDropNamedCollectionQuery.cpp @@ -12,6 +12,7 @@ bool ParserDropNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node ParserKeyword s_drop("DROP"); ParserKeyword s_collection("NAMED COLLECTION"); ParserKeyword s_if_exists("IF EXISTS"); + ParserKeyword s_on("ON"); ParserIdentifier name_p; String cluster_str; @@ -31,7 +32,7 @@ bool ParserDropNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node if (!name_p.parse(pos, collection_name, expected)) return false; - if (ParserKeyword{"ON"}.ignore(pos, expected)) + if (s_on.ignore(pos, expected)) { if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) return false; diff --git a/src/Parsers/ParserQuery.cpp b/src/Parsers/ParserQuery.cpp index 77e7b58e6b1..7ed69940bed 100644 --- a/src/Parsers/ParserQuery.cpp +++ b/src/Parsers/ParserQuery.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -54,6 +55,7 @@ bool ParserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserCreateIndexQuery create_index_p; ParserDropIndexQuery drop_index_p; ParserDropAccessEntityQuery drop_access_entity_p; + ParserMoveAccessEntityQuery move_access_entity_p; ParserGrantQuery grant_p; ParserSetRoleQuery set_role_p; ParserExternalDDLQuery external_ddl_p; @@ -80,6 +82,7 @@ bool ParserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) || create_index_p.parse(pos, node, expected) || drop_index_p.parse(pos, node, expected) || drop_access_entity_p.parse(pos, node, expected) + || move_access_entity_p.parse(pos, node, expected) || grant_p.parse(pos, node, expected) || external_ddl_p.parse(pos, node, expected) || transaction_control_p.parse(pos, node, expected) diff --git a/src/Parsers/ParserSetQuery.cpp b/src/Parsers/ParserSetQuery.cpp index 727d037112f..4df74c2dd82 100644 --- a/src/Parsers/ParserSetQuery.cpp +++ b/src/Parsers/ParserSetQuery.cpp @@ -215,7 +215,7 @@ bool ParserSetQuery::parseNameValuePair(SettingChange & change, IParser::Pos & p else if (ParserKeyword("FALSE").ignore(pos, expected)) value = std::make_shared(Field(static_cast(0))); /// for SETTINGS disk=disk(type='s3', path='', ...) - else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name.starts_with("disk")) + else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name == "disk") { tryGetIdentifierNameInto(name, change.name); change.value = createFieldFromAST(function_ast); @@ -280,7 +280,7 @@ bool ParserSetQuery::parseNameValuePairWithParameterOrDefault( node = std::make_shared(Field(static_cast(1))); else if (ParserKeyword("FALSE").ignore(pos, expected)) node = std::make_shared(Field(static_cast(0))); - else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name.starts_with("disk")) + else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name == "disk") { change.name = name; change.value = createFieldFromAST(function_ast); diff --git a/src/Parsers/ParserShowTablesQuery.cpp b/src/Parsers/ParserShowTablesQuery.cpp index 3540a6d3fc8..e3728eb2cd6 100644 --- a/src/Parsers/ParserShowTablesQuery.cpp +++ b/src/Parsers/ParserShowTablesQuery.cpp @@ -36,7 +36,7 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserKeyword s_where("WHERE"); ParserKeyword s_limit("LIMIT"); ParserStringLiteral like_p; - ParserIdentifier name_p; + ParserIdentifier name_p(true); ParserExpressionWithOptionalAlias exp_elem(false); ASTPtr like; @@ -174,7 +174,7 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec return false; } - tryGetIdentifierNameInto(database, query->from); + query->set(query->from, database); if (like) query->like = like->as().value.safeGet(); diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 9aff0e8879e..ac3aa41048c 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -379,6 +379,8 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & case Type::START_REPLICATED_SENDS: case Type::STOP_REPLICATION_QUEUES: case Type::START_REPLICATION_QUEUES: + case Type::STOP_PULLING_REPLICATION_LOG: + case Type::START_PULLING_REPLICATION_LOG: if (!parseQueryWithOnCluster(res, pos, expected)) return false; parseDatabaseAndTableAsAST(pos, expected, res->database, res->table); @@ -405,7 +407,15 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & ParserLiteral path_parser; ASTPtr ast; if (path_parser.parse(pos, ast, expected)) + { res->filesystem_cache_name = ast->as()->value.safeGet(); + if (ParserKeyword{"KEY"}.ignore(pos, expected) && ParserIdentifier().parse(pos, ast, expected)) + { + res->key_to_drop = ast->as()->name(); + if (ParserKeyword{"OFFSET"}.ignore(pos, expected) && ParserLiteral().parse(pos, ast, expected)) + res->offset_to_drop = ast->as()->value.safeGet(); + } + } if (!parseQueryWithOnCluster(res, pos, expected)) return false; break; @@ -448,32 +458,71 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & if (!parseQueryWithOnCluster(res, pos, expected)) return false; - ServerType::Type current_type = ServerType::Type::END; - std::string current_custom_name; - - for (const auto & type : magic_enum::enum_values()) + auto parse_server_type = [&](ServerType::Type & type, std::string & custom_name) -> bool { - if (ParserKeyword{ServerType::serverTypeToString(type)}.ignore(pos, expected)) + type = ServerType::Type::END; + custom_name = ""; + + for (const auto & cur_type : magic_enum::enum_values()) { - current_type = type; - break; + if (ParserKeyword{ServerType::serverTypeToString(cur_type)}.ignore(pos, expected)) + { + type = cur_type; + break; + } + } + + if (type == ServerType::Type::END) + return false; + + if (type == ServerType::CUSTOM) + { + ASTPtr ast; + + if (!ParserStringLiteral{}.parse(pos, ast, expected)) + return false; + + custom_name = ast->as().value.get(); + } + + return true; + }; + + ServerType::Type base_type; + std::string base_custom_name; + + ServerType::Types exclude_type; + ServerType::CustomNames exclude_custom_names; + + if (!parse_server_type(base_type, base_custom_name)) + return false; + + if (ParserKeyword{"EXCEPT"}.ignore(pos, expected)) + { + if (base_type != ServerType::Type::QUERIES_ALL && + base_type != ServerType::Type::QUERIES_DEFAULT && + base_type != ServerType::Type::QUERIES_CUSTOM) + return false; + + ServerType::Type current_type; + std::string current_custom_name; + + while (true) + { + if (!exclude_type.empty() && !ParserToken(TokenType::Comma).ignore(pos, expected)) + break; + + if (!parse_server_type(current_type, current_custom_name)) + return false; + + exclude_type.insert(current_type); + + if (current_type == ServerType::Type::CUSTOM) + exclude_custom_names.insert(current_custom_name); } } - if (current_type == ServerType::Type::END) - return false; - - if (current_type == ServerType::CUSTOM) - { - ASTPtr ast; - - if (!ParserStringLiteral{}.parse(pos, ast, expected)) - return false; - - current_custom_name = ast->as().value.get(); - } - - res->server_type = ServerType(current_type, current_custom_name); + res->server_type = ServerType(base_type, base_custom_name, exclude_type, exclude_custom_names); break; } diff --git a/src/Parsers/ParserTablesInSelectQuery.cpp b/src/Parsers/ParserTablesInSelectQuery.cpp index 08b6f77fafa..b3ae6ca0bb9 100644 --- a/src/Parsers/ParserTablesInSelectQuery.cpp +++ b/src/Parsers/ParserTablesInSelectQuery.cpp @@ -24,6 +24,8 @@ bool ParserTableExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!ParserWithOptionalAlias(std::make_unique(), allow_alias_without_as_keyword).parse(pos, res->subquery, expected) && !ParserWithOptionalAlias(std::make_unique(false, true), allow_alias_without_as_keyword).parse(pos, res->table_function, expected) && !ParserWithOptionalAlias(std::make_unique(true, true), allow_alias_without_as_keyword) + .parse(pos, res->database_and_table_name, expected) + && !ParserWithOptionalAlias(std::make_unique(), allow_alias_without_as_keyword) .parse(pos, res->database_and_table_name, expected)) return false; diff --git a/src/Parsers/ParserUseQuery.cpp b/src/Parsers/ParserUseQuery.cpp index 40511d8c142..350e573281c 100644 --- a/src/Parsers/ParserUseQuery.cpp +++ b/src/Parsers/ParserUseQuery.cpp @@ -11,7 +11,7 @@ namespace DB bool ParserUseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_use("USE"); - ParserIdentifier name_p; + ParserIdentifier name_p{/*allow_query_parameter*/ true}; if (!s_use.ignore(pos, expected)) return false; @@ -21,7 +21,7 @@ bool ParserUseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; auto query = std::make_shared(); - tryGetIdentifierNameInto(database, query->database); + query->set(query->database, database); node = query; return true; diff --git a/src/Parsers/QueryParameterVisitor.cpp b/src/Parsers/QueryParameterVisitor.cpp index 1282c12cce6..b8679cc3b96 100644 --- a/src/Parsers/QueryParameterVisitor.cpp +++ b/src/Parsers/QueryParameterVisitor.cpp @@ -31,7 +31,7 @@ private: void visitQueryParameter(const ASTQueryParameter & query_parameter) { - query_parameters[query_parameter.name]= query_parameter.type; + query_parameters[query_parameter.name] = query_parameter.type; } }; diff --git a/src/Parsers/examples/CMakeLists.txt b/src/Parsers/examples/CMakeLists.txt index e411574bd65..261f234081c 100644 --- a/src/Parsers/examples/CMakeLists.txt +++ b/src/Parsers/examples/CMakeLists.txt @@ -4,7 +4,7 @@ clickhouse_add_executable(lexer lexer.cpp ${SRCS}) target_link_libraries(lexer PRIVATE clickhouse_parsers) clickhouse_add_executable(select_parser select_parser.cpp ${SRCS} "../../Server/ServerType.cpp") -target_link_libraries(select_parser PRIVATE clickhouse_parsers) +target_link_libraries(select_parser PRIVATE dbms) clickhouse_add_executable(create_parser create_parser.cpp ${SRCS} "../../Server/ServerType.cpp") -target_link_libraries(create_parser PRIVATE clickhouse_parsers) +target_link_libraries(create_parser PRIVATE dbms) diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 9f6c22f90f3..7cce495dfb8 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1047,7 +1047,7 @@ PlannerContextPtr buildPlannerContext(const QueryTreeNodePtr & query_tree_node, } Planner::Planner(const QueryTreeNodePtr & query_tree_, - const SelectQueryOptions & select_query_options_) + SelectQueryOptions & select_query_options_) : query_tree(query_tree_) , select_query_options(select_query_options_) , planner_context(buildPlannerContext(query_tree, select_query_options, std::make_shared())) @@ -1055,7 +1055,7 @@ Planner::Planner(const QueryTreeNodePtr & query_tree_, } Planner::Planner(const QueryTreeNodePtr & query_tree_, - const SelectQueryOptions & select_query_options_, + SelectQueryOptions & select_query_options_, GlobalPlannerContextPtr global_planner_context_) : query_tree(query_tree_) , select_query_options(select_query_options_) @@ -1064,7 +1064,7 @@ Planner::Planner(const QueryTreeNodePtr & query_tree_, } Planner::Planner(const QueryTreeNodePtr & query_tree_, - const SelectQueryOptions & select_query_options_, + SelectQueryOptions & select_query_options_, PlannerContextPtr planner_context_) : query_tree(query_tree_) , select_query_options(select_query_options_) diff --git a/src/Planner/Planner.h b/src/Planner/Planner.h index 783a07f6e99..f8d151365cf 100644 --- a/src/Planner/Planner.h +++ b/src/Planner/Planner.h @@ -22,16 +22,16 @@ class Planner public: /// Initialize planner with query tree after analysis phase Planner(const QueryTreeNodePtr & query_tree_, - const SelectQueryOptions & select_query_options_); + SelectQueryOptions & select_query_options_); /// Initialize planner with query tree after query analysis phase and global planner context Planner(const QueryTreeNodePtr & query_tree_, - const SelectQueryOptions & select_query_options_, + SelectQueryOptions & select_query_options_, GlobalPlannerContextPtr global_planner_context_); /// Initialize planner with query tree after query analysis phase and planner context Planner(const QueryTreeNodePtr & query_tree_, - const SelectQueryOptions & select_query_options_, + SelectQueryOptions & select_query_options_, PlannerContextPtr planner_context_); const QueryPlan & getQueryPlan() const @@ -66,7 +66,7 @@ private: void buildPlanForQueryNode(); QueryTreeNodePtr query_tree; - SelectQueryOptions select_query_options; + SelectQueryOptions & select_query_options; PlannerContextPtr planner_context; QueryPlan query_plan; StorageLimitsList storage_limits; diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 7575828e64d..9b6034cd0de 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -100,8 +100,9 @@ public: if (isNameOfInFunction(function_node.getFunctionName())) { + const auto & in_first_argument_node = function_node.getArguments().getNodes().at(0); const auto & in_second_argument_node = function_node.getArguments().getNodes().at(1); - in_function_second_argument_node_name = planner_context.createSetKey(in_second_argument_node); + in_function_second_argument_node_name = planner_context.createSetKey(in_first_argument_node->getResultType(), in_second_argument_node); } WriteBufferFromOwnString buffer; @@ -165,7 +166,7 @@ public: case QueryTreeNodeType::LAMBDA: { auto lambda_hash = node->getTreeHash(); - result = "__lambda_" + toString(lambda_hash.first) + '_' + toString(lambda_hash.second); + result = "__lambda_" + toString(lambda_hash); break; } default: @@ -628,8 +629,6 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma auto in_first_argument = function_node.getArguments().getNodes().at(0); auto in_second_argument = function_node.getArguments().getNodes().at(1); - //auto set_key = planner_context->createSetKey(in_second_argument); - DataTypes set_element_types; auto in_second_argument_node_type = in_second_argument->getNodeType(); @@ -665,7 +664,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma PreparedSets::toString(set_key, set_element_types)); ColumnWithTypeAndName column; - column.name = planner_context->createSetKey(in_second_argument); + column.name = planner_context->createSetKey(in_first_argument->getResultType(), in_second_argument); column.type = std::make_shared(); bool set_is_created = set->get() != nullptr; diff --git a/src/Planner/PlannerContext.cpp b/src/Planner/PlannerContext.cpp index 3c75d4fbea8..0fde034b87a 100644 --- a/src/Planner/PlannerContext.cpp +++ b/src/Planner/PlannerContext.cpp @@ -2,6 +2,7 @@ #include #include +#include namespace DB { @@ -112,10 +113,24 @@ const ColumnIdentifier * PlannerContext::getColumnNodeIdentifierOrNull(const Que return table_expression_data->getColumnIdentifierOrNull(column_name); } -PlannerContext::SetKey PlannerContext::createSetKey(const QueryTreeNodePtr & set_source_node) +PlannerContext::SetKey PlannerContext::createSetKey(const DataTypePtr & left_operand_type, const QueryTreeNodePtr & set_source_node) { - auto set_source_hash = set_source_node->getTreeHash(); - return "__set_" + toString(set_source_hash.first) + '_' + toString(set_source_hash.second); + const auto set_source_hash = set_source_node->getTreeHash(); + if (set_source_node->as()) + { + /* We need to hash the type of the left operand because we can build different sets for different types. + * (It's done for performance reasons. It's cheaper to convert a small set of values from literal to the type of the left operand.) + * + * For example in expression `(a :: Decimal(9, 1) IN (1.0, 2.5)) AND (b :: Decimal(9, 0) IN (1, 2.5))` + * we need to build two different sets: + * - `{1, 2.5} :: Set(Decimal(9, 1))` for a + * - `{1} :: Set(Decimal(9, 0))` for b (2.5 omitted because bercause it's not representable as Decimal(9, 0)). + */ + return "__set_" + left_operand_type->getName() + '_' + toString(set_source_hash); + } + + /// For other cases we will cast left operand to the type of the set source, so no difference in types. + return "__set_" + toString(set_source_hash); } } diff --git a/src/Planner/PlannerContext.h b/src/Planner/PlannerContext.h index aceb313d4b5..44b24d09c5d 100644 --- a/src/Planner/PlannerContext.h +++ b/src/Planner/PlannerContext.h @@ -132,7 +132,7 @@ public: using SetKey = std::string; /// Create set key for set source node - static SetKey createSetKey(const QueryTreeNodePtr & set_source_node); + static SetKey createSetKey(const DataTypePtr & left_operand_type, const QueryTreeNodePtr & set_source_node); PreparedSets & getPreparedSets() { return prepared_sets; } diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index c118fccded4..f6ce029a295 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -101,7 +101,7 @@ void checkAccessRights(const TableNode & table_node, const Names & column_names, } throw Exception(ErrorCodes::ACCESS_DENIED, - "{}: Not enough privileges. To execute this query it's necessary to have grant SELECT for at least one column on {}", + "{}: Not enough privileges. To execute this query, it's necessary to have the grant SELECT for at least one column on {}", query_context->getUserName(), storage_id.getFullTableName()); } @@ -113,6 +113,20 @@ void checkAccessRights(const TableNode & table_node, const Names & column_names, query_context->checkAccess(AccessType::SELECT, storage_id, column_names); } +bool shouldIgnoreQuotaAndLimits(const TableNode & table_node) +{ + const auto & storage_id = table_node.getStorageID(); + if (!storage_id.hasDatabase()) + return false; + if (storage_id.database_name == DatabaseCatalog::SYSTEM_DATABASE) + { + static const boost::container::flat_set tables_ignoring_quota{"quotas", "quota_limits", "quota_usage", "quotas_usage", "one"}; + if (tables_ignoring_quota.count(storage_id.table_name)) + return true; + } + return false; +} + NameAndTypePair chooseSmallestColumnToReadFromStorage(const StoragePtr & storage, const StorageSnapshotPtr & storage_snapshot) { /** We need to read at least one column to find the number of rows. @@ -828,8 +842,9 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres } else { + SelectQueryOptions analyze_query_options = SelectQueryOptions(from_stage).analyze(); Planner planner(select_query_info.query_tree, - SelectQueryOptions(from_stage).analyze(), + analyze_query_options, select_query_info.planner_context); planner.buildQueryPlanIfNeeded(); @@ -1375,7 +1390,7 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_ JoinTreeQueryPlan buildJoinTreeQueryPlan(const QueryTreeNodePtr & query_node, const SelectQueryInfo & select_query_info, - const SelectQueryOptions & select_query_options, + SelectQueryOptions & select_query_options, const ColumnIdentifierSet & outer_scope_columns, PlannerContextPtr & planner_context) { @@ -1386,6 +1401,16 @@ JoinTreeQueryPlan buildJoinTreeQueryPlan(const QueryTreeNodePtr & query_node, std::vector table_expressions_outer_scope_columns(table_expressions_stack_size); ColumnIdentifierSet current_outer_scope_columns = outer_scope_columns; + if (is_single_table_expression) + { + auto * table_node = table_expressions_stack[0]->as(); + if (table_node && shouldIgnoreQuotaAndLimits(*table_node)) + { + select_query_options.ignore_quota = true; + select_query_options.ignore_limits = true; + } + } + /// For each table, table function, query, union table expressions prepare before query plan build for (size_t i = 0; i < table_expressions_stack_size; ++i) { diff --git a/src/Planner/PlannerJoinTree.h b/src/Planner/PlannerJoinTree.h index acbc96ddae0..9d3b98175d0 100644 --- a/src/Planner/PlannerJoinTree.h +++ b/src/Planner/PlannerJoinTree.h @@ -20,7 +20,7 @@ struct JoinTreeQueryPlan /// Build JOIN TREE query plan for query node JoinTreeQueryPlan buildJoinTreeQueryPlan(const QueryTreeNodePtr & query_node, const SelectQueryInfo & select_query_info, - const SelectQueryOptions & select_query_options, + SelectQueryOptions & select_query_options, const ColumnIdentifierSet & outer_scope_columns, PlannerContextPtr & planner_context); diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index e495b0967e9..e87930a4d6b 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -635,6 +635,7 @@ std::shared_ptr chooseJoinAlgorithm(std::shared_ptr & table_jo /// JOIN with JOIN engine. if (auto storage = table_join->getStorageJoin()) { + Names required_column_names; for (const auto & result_column : right_table_expression_header) { const auto * source_column_name = right_table_expression_data.getColumnNameOrNull(result_column.name); @@ -644,8 +645,9 @@ std::shared_ptr chooseJoinAlgorithm(std::shared_ptr & table_jo fmt::join(storage->getKeyNames(), ", "), result_column.name); table_join->setRename(*source_column_name, result_column.name); + required_column_names.push_back(*source_column_name); } - return storage->getJoinLocked(table_join, planner_context->getQueryContext()); + return storage->getJoinLocked(table_join, planner_context->getQueryContext(), required_column_names); } /** JOIN with constant. diff --git a/src/Processors/DelayedPortsProcessor.cpp b/src/Processors/DelayedPortsProcessor.cpp index 24023529bca..f3edc91b162 100644 --- a/src/Processors/DelayedPortsProcessor.cpp +++ b/src/Processors/DelayedPortsProcessor.cpp @@ -75,10 +75,13 @@ void DelayedPortsProcessor::finishPair(PortsPair & pair) pair.input_port->close(); pair.is_finished = true; - ++num_finished_pairs; + ++num_finished_inputs; if (pair.output_port) ++num_finished_outputs; + + if (!pair.is_delayed) + ++num_finished_main_inputs; } } @@ -112,9 +115,15 @@ bool DelayedPortsProcessor::processPair(PortsPair & pair) return true; } + +bool DelayedPortsProcessor::shouldSkipDelayed() const +{ + return num_finished_main_inputs + num_delayed_ports < port_pairs.size(); +} + IProcessor::Status DelayedPortsProcessor::prepare(const PortNumbers & updated_inputs, const PortNumbers & updated_outputs) { - bool skip_delayed = (num_finished_pairs + num_delayed_ports) < port_pairs.size(); + bool skip_delayed = shouldSkipDelayed(); bool need_data = false; if (!are_inputs_initialized && !updated_outputs.empty()) @@ -154,14 +163,14 @@ IProcessor::Status DelayedPortsProcessor::prepare(const PortNumbers & updated_in } /// In case if main streams are finished at current iteration, start processing delayed streams. - if (skip_delayed && (num_finished_pairs + num_delayed_ports) >= port_pairs.size()) + if (skip_delayed && !shouldSkipDelayed()) { for (auto & pair : port_pairs) if (pair.is_delayed) need_data = processPair(pair) || need_data; } - if (num_finished_pairs == port_pairs.size()) + if (num_finished_inputs == port_pairs.size()) return Status::Finished; if (need_data) diff --git a/src/Processors/DelayedPortsProcessor.h b/src/Processors/DelayedPortsProcessor.h index 3909d533914..667667bbb91 100644 --- a/src/Processors/DelayedPortsProcessor.h +++ b/src/Processors/DelayedPortsProcessor.h @@ -29,14 +29,16 @@ private: std::vector port_pairs; const size_t num_delayed_ports; - size_t num_finished_pairs = 0; + size_t num_finished_inputs = 0; size_t num_finished_outputs = 0; + size_t num_finished_main_inputs = 0; std::vector output_to_pair; bool are_inputs_initialized = false; bool processPair(PortsPair & pair); void finishPair(PortsPair & pair); + bool shouldSkipDelayed() const; }; } diff --git a/src/Processors/Executors/PollingQueue.cpp b/src/Processors/Executors/PollingQueue.cpp index 781a7736336..40f968621b1 100644 --- a/src/Processors/Executors/PollingQueue.cpp +++ b/src/Processors/Executors/PollingQueue.cpp @@ -74,7 +74,7 @@ PollingQueue::TaskData PollingQueue::wait(std::unique_lock & lock) epoll_event event; event.data.ptr = nullptr; - epoll.getManyReady(1, &event, true); + epoll.getManyReady(1, &event, -1); lock.lock(); diff --git a/src/Processors/Executors/StreamingFormatExecutor.cpp b/src/Processors/Executors/StreamingFormatExecutor.cpp index 2223721439e..46818989032 100644 --- a/src/Processors/Executors/StreamingFormatExecutor.cpp +++ b/src/Processors/Executors/StreamingFormatExecutor.cpp @@ -1,6 +1,5 @@ #include #include -#include namespace DB { diff --git a/src/Processors/Formats/IInputFormat.h b/src/Processors/Formats/IInputFormat.h index 86f892b630d..384224ba1f7 100644 --- a/src/Processors/Formats/IInputFormat.h +++ b/src/Processors/Formats/IInputFormat.h @@ -10,6 +10,8 @@ namespace DB { +struct SelectQueryInfo; + using ColumnMappingPtr = std::shared_ptr; /** Input format is a source, that reads data from ReadBuffer. @@ -21,9 +23,13 @@ protected: ReadBuffer * in [[maybe_unused]] = nullptr; public: - // ReadBuffer can be nullptr for random-access formats. + /// ReadBuffer can be nullptr for random-access formats. IInputFormat(Block header, ReadBuffer * in_); + /// If the format is used by a SELECT query, this method may be called. + /// The format may use it for filter pushdown. + virtual void setQueryInfo(const SelectQueryInfo &, ContextPtr) {} + /** In some usecase (hello Kafka) we need to read a lot of tiny streams in exactly the same format. * The recreating of parser for each small stream takes too long, so we introduce a method * resetParser() which allow to reset the state of parser to continue reading of diff --git a/src/Processors/Formats/IRowInputFormat.cpp b/src/Processors/Formats/IRowInputFormat.cpp index 0728aecf61f..7395d9b916a 100644 --- a/src/Processors/Formats/IRowInputFormat.cpp +++ b/src/Processors/Formats/IRowInputFormat.cpp @@ -86,7 +86,21 @@ void IRowInputFormat::logError() Chunk IRowInputFormat::generate() { if (total_rows == 0) - readPrefix(); + { + try + { + readPrefix(); + } + catch (Exception & e) + { + auto file_name = getFileNameFromReadBuffer(getReadBuffer()); + if (!file_name.empty()) + e.addMessage(fmt::format("(in file/uri {})", file_name)); + + e.addMessage("(while reading header)"); + throw; + } + } const Block & header = getPort().getHeader(); @@ -97,7 +111,6 @@ Chunk IRowInputFormat::generate() size_t num_rows = 0; size_t chunk_start_offset = getDataOffsetMaybeCompressed(getReadBuffer()); - try { RowReadExtension info; diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp index 1fa520eaaee..15b53c2a499 100644 --- a/src/Processors/Formats/ISchemaReader.cpp +++ b/src/Processors/Formats/ISchemaReader.cpp @@ -115,21 +115,24 @@ NamesAndTypesList IRowSchemaReader::readSchema() "Cannot read rows to determine the schema, the maximum number of rows (or bytes) to read is set to 0. " "Most likely setting input_format_max_rows_to_read_for_schema_inference or input_format_max_bytes_to_read_for_schema_inference is set to 0"); - DataTypes data_types = readRowAndGetDataTypes(); + auto data_types_maybe = readRowAndGetDataTypes(); /// Check that we read at list one column. - if (data_types.empty()) + if (!data_types_maybe) throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Cannot read rows from the data"); + DataTypes data_types = std::move(*data_types_maybe); + /// If column names weren't set, use default names 'c1', 'c2', ... - if (column_names.empty()) + bool use_default_column_names = column_names.empty(); + if (use_default_column_names) { column_names.reserve(data_types.size()); for (size_t i = 0; i != data_types.size(); ++i) column_names.push_back("c" + std::to_string(i + 1)); } /// If column names were set, check that the number of names match the number of types. - else if (column_names.size() != data_types.size()) + else if (column_names.size() != data_types.size() && !allowVariableNumberOfColumns()) { throw Exception( ErrorCodes::INCORRECT_DATA, @@ -137,6 +140,9 @@ NamesAndTypesList IRowSchemaReader::readSchema() } else { + if (column_names.size() != data_types.size()) + data_types.resize(column_names.size()); + std::unordered_set names_set; for (const auto & name : column_names) { @@ -155,13 +161,39 @@ NamesAndTypesList IRowSchemaReader::readSchema() for (rows_read = 1; rows_read < max_rows_to_read && in.count() < max_bytes_to_read; ++rows_read) { - DataTypes new_data_types = readRowAndGetDataTypes(); - if (new_data_types.empty()) + auto new_data_types_maybe = readRowAndGetDataTypes(); + if (!new_data_types_maybe) /// We reached eof. break; + DataTypes new_data_types = std::move(*new_data_types_maybe); + if (new_data_types.size() != data_types.size()) - throw Exception(ErrorCodes::INCORRECT_DATA, "Rows have different amount of values"); + { + if (!allowVariableNumberOfColumns()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Rows have different amount of values"); + + if (use_default_column_names) + { + /// Current row contains new columns, add new default names. + if (new_data_types.size() > data_types.size()) + { + for (size_t i = data_types.size(); i < new_data_types.size(); ++i) + column_names.push_back("c" + std::to_string(i + 1)); + data_types.resize(new_data_types.size()); + } + /// Current row contain less columns than previous rows. + else + { + new_data_types.resize(data_types.size()); + } + } + /// If names were explicitly set, ignore all extra columns. + else + { + new_data_types.resize(column_names.size()); + } + } for (field_index = 0; field_index != data_types.size(); ++field_index) { diff --git a/src/Processors/Formats/ISchemaReader.h b/src/Processors/Formats/ISchemaReader.h index 40702198a57..0cc8b98f05e 100644 --- a/src/Processors/Formats/ISchemaReader.h +++ b/src/Processors/Formats/ISchemaReader.h @@ -93,11 +93,13 @@ protected: /// Read one row and determine types of columns in it. /// Return types in the same order in which the values were in the row. /// If it's impossible to determine the type for some column, return nullptr for it. - /// Return empty list if can't read more data. - virtual DataTypes readRowAndGetDataTypes() = 0; + /// Return std::nullopt if can't read more data. + virtual std::optional readRowAndGetDataTypes() = 0; void setColumnNames(const std::vector & names) { column_names = names; } + virtual bool allowVariableNumberOfColumns() const { return false; } + size_t field_index; private: diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index 2fadc09e80f..4293407379e 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -143,7 +143,6 @@ void ArrowBlockInputFormat::prepareReader() arrow_column_to_ch_column = std::make_unique( getPort().getHeader(), "Arrow", - format_settings.arrow.import_nested, format_settings.arrow.allow_missing_columns, format_settings.null_as_default, format_settings.arrow.case_insensitive_column_matching); @@ -190,7 +189,6 @@ void registerInputFormatArrow(FormatFactory & factory) { return std::make_shared(buf, sample, false, format_settings); }); - factory.markFormatSupportsSubcolumns("Arrow"); factory.markFormatSupportsSubsetOfColumns("Arrow"); factory.registerInputFormat( "ArrowStream", diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 5a7306111a5..74d4553a58b 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -1032,13 +1032,11 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader( ArrowColumnToCHColumn::ArrowColumnToCHColumn( const Block & header_, const std::string & format_name_, - bool import_nested_, bool allow_missing_columns_, bool null_as_default_, bool case_insensitive_matching_) : header(header_) , format_name(format_name_) - , import_nested(import_nested_) , allow_missing_columns(allow_missing_columns_) , null_as_default(null_as_default_) , case_insensitive_matching(case_insensitive_matching_) @@ -1080,42 +1078,40 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & if (!name_to_column_ptr.contains(search_column_name)) { bool read_from_nested = false; - /// Check if it's a column from nested table. - if (import_nested) + /// Check if it's a subcolumn from some struct. + String nested_table_name = Nested::extractTableName(header_column.name); + String search_nested_table_name = nested_table_name; + if (case_insensitive_matching) + boost::to_lower(search_nested_table_name); + if (name_to_column_ptr.contains(search_nested_table_name)) { - String nested_table_name = Nested::extractTableName(header_column.name); - String search_nested_table_name = nested_table_name; - if (case_insensitive_matching) - boost::to_lower(search_nested_table_name); - if (name_to_column_ptr.contains(search_nested_table_name)) + if (!nested_tables.contains(search_nested_table_name)) { - if (!nested_tables.contains(search_nested_table_name)) + NamesAndTypesList nested_columns; + for (const auto & name_and_type : header.getNamesAndTypesList()) { - NamesAndTypesList nested_columns; - for (const auto & name_and_type : header.getNamesAndTypesList()) - { - if (name_and_type.name.starts_with(nested_table_name + ".")) - nested_columns.push_back(name_and_type); - } - auto nested_table_type = Nested::collect(nested_columns).front().type; + if (name_and_type.name.starts_with(nested_table_name + ".")) + nested_columns.push_back(name_and_type); + } + auto nested_table_type = Nested::collect(nested_columns).front().type; - std::shared_ptr arrow_column = name_to_column_ptr[search_nested_table_name]; - ColumnsWithTypeAndName cols = {readColumnFromArrowColumn( - arrow_column, nested_table_name, format_name, false, dictionary_infos, true, false, skipped, nested_table_type)}; - BlockPtr block_ptr = std::make_shared(cols); - auto column_extractor = std::make_shared(*block_ptr, case_insensitive_matching); - nested_tables[search_nested_table_name] = {block_ptr, column_extractor}; - } - auto nested_column = nested_tables[search_nested_table_name].second->extractColumn(search_column_name); - if (nested_column) - { - column = *nested_column; - if (case_insensitive_matching) - column.name = header_column.name; - read_from_nested = true; - } + std::shared_ptr arrow_column = name_to_column_ptr[search_nested_table_name]; + ColumnsWithTypeAndName cols = {readColumnFromArrowColumn( + arrow_column, nested_table_name, format_name, false, dictionary_infos, true, false, skipped, nested_table_type)}; + BlockPtr block_ptr = std::make_shared(cols); + auto column_extractor = std::make_shared(*block_ptr, case_insensitive_matching); + nested_tables[search_nested_table_name] = {block_ptr, column_extractor}; + } + auto nested_column = nested_tables[search_nested_table_name].second->extractColumn(search_column_name); + if (nested_column) + { + column = *nested_column; + if (case_insensitive_matching) + column.name = header_column.name; + read_from_nested = true; } } + if (!read_from_nested) { if (!allow_missing_columns) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index 64ff99c70ac..57f33069e0e 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -24,7 +24,6 @@ public: ArrowColumnToCHColumn( const Block & header_, const std::string & format_name_, - bool import_nested_, bool allow_missing_columns_, bool null_as_default_, bool case_insensitive_matching_ = false); @@ -53,7 +52,6 @@ public: private: const Block & header; const std::string format_name; - bool import_nested; /// If false, throw exception if some columns in header not exists in arrow table. bool allow_missing_columns; bool null_as_default; diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index c661e6b782d..e7ea64e487c 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -1258,6 +1258,8 @@ void registerInputFormatAvro(FormatFactory & factory) { return std::make_shared(sample, buf, params, settings); }); + + factory.markFormatSupportsSubsetOfColumns("AvroConfluent"); } void registerAvroSchemaReader(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 244b906549e..52f9571f962 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -284,7 +284,7 @@ bool CSVFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out) return true; } -bool CSVFormatReader::allowVariableNumberOfColumns() +bool CSVFormatReader::allowVariableNumberOfColumns() const { return format_settings.csv.allow_variable_number_of_columns; } @@ -410,19 +410,22 @@ CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_t { } -std::pair, DataTypes> CSVSchemaReader::readRowAndGetFieldsAndDataTypes() +std::optional, DataTypes>> CSVSchemaReader::readRowAndGetFieldsAndDataTypes() { if (buf.eof()) return {}; auto fields = reader.readRow(); auto data_types = tryInferDataTypesByEscapingRule(fields, format_settings, FormatSettings::EscapingRule::CSV); - return {fields, data_types}; + return std::make_pair(std::move(fields), std::move(data_types)); } -DataTypes CSVSchemaReader::readRowAndGetDataTypesImpl() +std::optional CSVSchemaReader::readRowAndGetDataTypesImpl() { - return std::move(readRowAndGetFieldsAndDataTypes().second); + auto fields_with_types = readRowAndGetFieldsAndDataTypes(); + if (!fields_with_types) + return {}; + return std::move(fields_with_types->second); } diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h index 7b1a1fc433d..2444477b184 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -70,7 +70,7 @@ public: void skipPrefixBeforeHeader() override; bool checkForEndOfRow() override; - bool allowVariableNumberOfColumns() override; + bool allowVariableNumberOfColumns() const override; std::vector readNames() override { return readHeaderRow(); } std::vector readTypes() override { return readHeaderRow(); } @@ -102,8 +102,10 @@ public: CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_settings_); private: - DataTypes readRowAndGetDataTypesImpl() override; - std::pair, DataTypes> readRowAndGetFieldsAndDataTypes() override; + bool allowVariableNumberOfColumns() const override { return format_settings.csv.allow_variable_number_of_columns; } + + std::optional readRowAndGetDataTypesImpl() override; + std::optional, DataTypes>> readRowAndGetFieldsAndDataTypes() override; PeekableReadBuffer buf; CSVFormatReader reader; diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp index c056ee2b4a4..d43367a5b44 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp @@ -17,12 +17,12 @@ namespace ErrorCodes extern const int INCORRECT_DATA; } -CapnProtoRowInputFormat::CapnProtoRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const FormatSchemaInfo & info, const FormatSettings & format_settings) +CapnProtoRowInputFormat::CapnProtoRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const CapnProtoSchemaInfo & info, const FormatSettings & format_settings) : IRowInputFormat(std::move(header_), in_, std::move(params_)) , parser(std::make_shared()) { // Parse the schema and fetch the root object - schema = parser->getMessageSchema(info); + schema = parser->getMessageSchema(info.getSchemaInfo()); const auto & header = getPort().getHeader(); serializer = std::make_unique(header.getDataTypes(), header.getNames(), schema, format_settings.capn_proto); } @@ -106,8 +106,12 @@ void registerInputFormatCapnProto(FormatFactory & factory) "CapnProto", [](ReadBuffer & buf, const Block & sample, IRowInputFormat::Params params, const FormatSettings & settings) { - return std::make_shared(buf, sample, std::move(params), - FormatSchemaInfo(settings, "CapnProto", true), settings); + return std::make_shared( + buf, + sample, + std::move(params), + CapnProtoSchemaInfo(settings, "CapnProto", sample, settings.capn_proto.use_autogenerated_schema), + settings); }); factory.markFormatSupportsSubsetOfColumns("CapnProto"); factory.registerFileExtension("capnp", "CapnProto"); diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h index 06e94da123f..480be8b760f 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h @@ -24,7 +24,7 @@ class ReadBuffer; class CapnProtoRowInputFormat final : public IRowInputFormat { public: - CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info, const FormatSettings & format_settings_); + CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const CapnProtoSchemaInfo & info, const FormatSettings & format_settings); String getName() const override { return "CapnProtoRowInputFormat"; } diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp index 66a7160dd89..8ab19331b11 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp @@ -23,14 +23,14 @@ void CapnProtoOutputStream::write(const void * buffer, size_t size) CapnProtoRowOutputFormat::CapnProtoRowOutputFormat( WriteBuffer & out_, const Block & header_, - const FormatSchemaInfo & info, + const CapnProtoSchemaInfo & info, const FormatSettings & format_settings) : IRowOutputFormat(header_, out_) , column_names(header_.getNames()) , column_types(header_.getDataTypes()) , output_stream(std::make_unique(out_)) { - schema = schema_parser.getMessageSchema(info); + schema = schema_parser.getMessageSchema(info.getSchemaInfo()); const auto & header = getPort(PortKind::Main).getHeader(); serializer = std::make_unique(header.getDataTypes(), header.getNames(), schema, format_settings.capn_proto); capnp::MallocMessageBuilder message; @@ -52,7 +52,11 @@ void registerOutputFormatCapnProto(FormatFactory & factory) const Block & sample, const FormatSettings & format_settings) { - return std::make_shared(buf, sample, FormatSchemaInfo(format_settings, "CapnProto", true), format_settings); + return std::make_shared( + buf, + sample, + CapnProtoSchemaInfo(format_settings, "CapnProto", sample, format_settings.capn_proto.use_autogenerated_schema), + format_settings); }); } diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h index dd9dcc6b340..c00dceb498e 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h @@ -31,8 +31,8 @@ public: CapnProtoRowOutputFormat( WriteBuffer & out_, const Block & header_, - const FormatSchemaInfo & info, - const FormatSettings & format_settings_); + const CapnProtoSchemaInfo & info, + const FormatSettings & format_settings); String getName() const override { return "CapnProtoRowOutputFormat"; } diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index 06efe0a20aa..fe6fb42d0a0 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -402,11 +402,10 @@ size_t ConstantExpressionTemplate::TemplateStructure::getTemplateHash(const ASTP /// Allows distinguish expression in the last column in Values format hash_state.update(salt); - IAST::Hash res128; - hash_state.get128(res128); + const auto res128 = getSipHash128AsPair(hash_state); size_t res = 0; - boost::hash_combine(res, res128.first); - boost::hash_combine(res, res128.second); + boost::hash_combine(res, res128.low64); + boost::hash_combine(res, res128.high64); return res; } diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp index 1e67db79a2c..17cc88425f5 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp @@ -139,10 +139,13 @@ void CustomSeparatedFormatReader::skipRowBetweenDelimiter() void CustomSeparatedFormatReader::skipField() { skipSpaces(); - skipFieldByEscapingRule(*buf, format_settings.custom.escaping_rule, format_settings); + if (format_settings.custom.escaping_rule == FormatSettings::EscapingRule::CSV) + readCSVFieldWithTwoPossibleDelimiters(*buf, format_settings.csv, format_settings.custom.field_delimiter, format_settings.custom.row_after_delimiter); + else + skipFieldByEscapingRule(*buf, format_settings.custom.escaping_rule, format_settings); } -bool CustomSeparatedFormatReader::checkEndOfRow() +bool CustomSeparatedFormatReader::checkForEndOfRow() { PeekableReadBufferCheckpoint checkpoint{*buf, true}; @@ -200,12 +203,12 @@ std::vector CustomSeparatedFormatReader::readRowImpl() std::vector values; skipRowStartDelimiter(); - if (columns == 0) + if (columns == 0 || allowVariableNumberOfColumns()) { do { values.push_back(readFieldIntoString(values.empty(), false, true)); - } while (!checkEndOfRow()); + } while (!checkForEndOfRow()); columns = values.size(); } else @@ -230,7 +233,7 @@ void CustomSeparatedFormatReader::skipHeaderRow() skipField(); } - while (!checkEndOfRow()); + while (!checkForEndOfRow()); skipRowEndDelimiter(); } @@ -369,7 +372,7 @@ CustomSeparatedSchemaReader::CustomSeparatedSchemaReader( { } -std::pair, DataTypes> CustomSeparatedSchemaReader::readRowAndGetFieldsAndDataTypes() +std::optional, DataTypes>> CustomSeparatedSchemaReader::readRowAndGetFieldsAndDataTypes() { if (no_more_data || reader.checkForSuffix()) { @@ -385,12 +388,15 @@ std::pair, DataTypes> CustomSeparatedSchemaReader::readRowAn auto fields = reader.readRow(); auto data_types = tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule(), &json_inference_info); - return {fields, data_types}; + return std::make_pair(std::move(fields), std::move(data_types)); } -DataTypes CustomSeparatedSchemaReader::readRowAndGetDataTypesImpl() +std::optional CustomSeparatedSchemaReader::readRowAndGetDataTypesImpl() { - return readRowAndGetFieldsAndDataTypes().second; + auto fields_with_types = readRowAndGetFieldsAndDataTypes(); + if (!fields_with_types) + return {}; + return std::move(fields_with_types->second); } void CustomSeparatedSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h index 2acf35bd143..893f06409f6 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h @@ -74,7 +74,9 @@ public: std::vector readRowForHeaderDetection() override { return readRowImpl(); } - bool checkEndOfRow(); + bool checkForEndOfRow() override; + bool allowVariableNumberOfColumns() const override { return format_settings.custom.allow_variable_number_of_columns; } + bool checkForSuffixImpl(bool check_eof); inline void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf, true); } @@ -109,9 +111,11 @@ public: CustomSeparatedSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_setting_); private: - DataTypes readRowAndGetDataTypesImpl() override; + bool allowVariableNumberOfColumns() const override { return format_settings.custom.allow_variable_number_of_columns; } - std::pair, DataTypes> readRowAndGetFieldsAndDataTypes() override; + std::optional readRowAndGetDataTypesImpl() override; + + std::optional, DataTypes>> readRowAndGetFieldsAndDataTypes() override; void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp index b91345bebe3..e3583a3dff0 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp @@ -112,6 +112,12 @@ bool JSONCompactEachRowFormatReader::readField(IColumn & column, const DataTypeP return JSONUtils::readField(*in, column, type, serialization, column_name, format_settings, yield_strings); } +bool JSONCompactEachRowFormatReader::checkForEndOfRow() +{ + skipWhitespaceIfAny(*in); + return !in->eof() && *in->position() == ']'; +} + bool JSONCompactEachRowFormatReader::parseRowStartWithDiagnosticInfo(WriteBuffer & out) { skipWhitespaceIfAny(*in); @@ -187,7 +193,7 @@ JSONCompactEachRowRowSchemaReader::JSONCompactEachRowRowSchemaReader( { } -DataTypes JSONCompactEachRowRowSchemaReader::readRowAndGetDataTypesImpl() +std::optional JSONCompactEachRowRowSchemaReader::readRowAndGetDataTypesImpl() { if (first_row) first_row = false; diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h index bb699f0ca2e..378a41e6471 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h @@ -68,6 +68,9 @@ public: std::vector readNames() override { return readHeaderRow(); } std::vector readTypes() override { return readHeaderRow(); } + bool checkForEndOfRow() override; + bool allowVariableNumberOfColumns() const override { return format_settings.json.compact_allow_variable_number_of_columns; } + bool yieldStrings() const { return yield_strings; } private: bool yield_strings; @@ -79,7 +82,9 @@ public: JSONCompactEachRowRowSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool yield_strings_, const FormatSettings & format_settings_); private: - DataTypes readRowAndGetDataTypesImpl() override; + bool allowVariableNumberOfColumns() const override { return format_settings.json.compact_allow_variable_number_of_columns; } + + std::optional readRowAndGetDataTypesImpl() override; void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; void transformFinalTypeIfNeeded(DataTypePtr & type) override; diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index eeca14176cc..ac2a0058c70 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -326,8 +326,8 @@ static void insertUUID(IColumn & column, DataTypePtr type, const char * value, s throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert MessagePack UUID into column with type {}.", type->getName()); ReadBufferFromMemory buf(value, size); UUID uuid; - readBinaryBigEndian(uuid.toUnderType().items[0], buf); - readBinaryBigEndian(uuid.toUnderType().items[1], buf); + readBinaryBigEndian(UUIDHelpers::getHighBytes(uuid), buf); + readBinaryBigEndian(UUIDHelpers::getLowBytes(uuid), buf); assert_cast(column).insertValue(uuid); } @@ -634,7 +634,7 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object) UNREACHABLE(); } -DataTypes MsgPackSchemaReader::readRowAndGetDataTypes() +std::optional MsgPackSchemaReader::readRowAndGetDataTypes() { if (buf.eof()) return {}; diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h index 0b485d3b97c..028ab878ad0 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h @@ -91,7 +91,7 @@ public: private: msgpack::object_handle readObject(); DataTypePtr getDataType(const msgpack::object & object); - DataTypes readRowAndGetDataTypes() override; + std::optional readRowAndGetDataTypes() override; PeekableReadBuffer buf; UInt64 number_of_columns; diff --git a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp index 9c601492217..12bbd35b77b 100644 --- a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp @@ -270,8 +270,8 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr { WriteBufferFromOwnString buf; UUID value = uuid_column.getElement(row_num); - writeBinaryBigEndian(value.toUnderType().items[0], buf); - writeBinaryBigEndian(value.toUnderType().items[1], buf); + writeBinaryBigEndian(UUIDHelpers::getHighBytes(value), buf); + writeBinaryBigEndian(UUIDHelpers::getLowBytes(value), buf); std::string_view uuid_ext = buf.stringView(); packer.pack_ext(sizeof(UUID), int8_t(MsgPackExtensionTypes::UUIDType)); packer.pack_ext_body(uuid_ext.data(), static_cast(uuid_ext.size())); diff --git a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp index 90dd07bd5a8..6c754f141da 100644 --- a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp @@ -422,7 +422,7 @@ NamesAndTypesList MySQLDumpSchemaReader::readSchema() return IRowSchemaReader::readSchema(); } -DataTypes MySQLDumpSchemaReader::readRowAndGetDataTypes() +std::optional MySQLDumpSchemaReader::readRowAndGetDataTypes() { if (in.eof()) return {}; diff --git a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.h b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.h index c28355054d7..14a73bf83b0 100644 --- a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.h +++ b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.h @@ -33,7 +33,7 @@ public: private: NamesAndTypesList readSchema() override; - DataTypes readRowAndGetDataTypes() override; + std::optional readRowAndGetDataTypes() override; String table_name; }; diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp new file mode 100644 index 00000000000..6948bb31e75 --- /dev/null +++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp @@ -0,0 +1,1019 @@ +#include "NativeORCBlockInputFormat.h" + +#if USE_ORC +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include "ArrowBufferedStreams.h" + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int UNKNOWN_TYPE; + extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE; + extern const int THERE_IS_NO_COLUMN; + extern const int INCORRECT_DATA; + extern const int ARGUMENT_OUT_OF_BOUND; +} + +ORCInputStream::ORCInputStream(SeekableReadBuffer & in_, size_t file_size_) : in(in_), file_size(file_size_) +{ +} + +uint64_t ORCInputStream::getLength() const +{ + return file_size; +} + +uint64_t ORCInputStream::getNaturalReadSize() const +{ + return 128 * 1024; +} + +void ORCInputStream::read(void * buf, uint64_t length, uint64_t offset) +{ + if (offset != static_cast(in.getPosition())) + in.seek(offset, SEEK_SET); + + in.readStrict(reinterpret_cast(buf), length); +} + +std::unique_ptr asORCInputStream(ReadBuffer & in, const FormatSettings & settings, std::atomic & is_cancelled) +{ + bool has_file_size = isBufferWithFileSize(in); + auto * seekable_in = dynamic_cast(&in); + + if (has_file_size && seekable_in && settings.seekable_read && seekable_in->checkIfActuallySeekable()) + return std::make_unique(*seekable_in, getFileSizeFromReadBuffer(in)); + + /// Fallback to loading the entire file in memory + return asORCInputStreamLoadIntoMemory(in, is_cancelled); +} + +std::unique_ptr asORCInputStreamLoadIntoMemory(ReadBuffer & in, std::atomic & is_cancelled) +{ + size_t magic_size = strlen(ORC_MAGIC_BYTES); + std::string file_data(magic_size, '\0'); + + /// Avoid loading the whole file if it doesn't seem to even be in the correct format. + size_t bytes_read = in.read(file_data.data(), magic_size); + if (bytes_read < magic_size || file_data != ORC_MAGIC_BYTES) + throw Exception(ErrorCodes::INCORRECT_DATA, "Not an ORC file"); + + WriteBufferFromString file_buffer(file_data, AppendModeTag{}); + copyData(in, file_buffer, is_cancelled); + file_buffer.finalize(); + + size_t file_size = file_data.size(); + return std::make_unique(std::move(file_data), file_size); +} + +static DataTypePtr parseORCType(const orc::Type * orc_type, bool skip_columns_with_unsupported_types, bool & skipped) +{ + assert(orc_type != nullptr); + + const int subtype_count = static_cast(orc_type->getSubtypeCount()); + switch (orc_type->getKind()) + { + case orc::TypeKind::BOOLEAN: + return DataTypeFactory::instance().get("Bool"); + case orc::TypeKind::BYTE: + return std::make_shared(); + case orc::TypeKind::SHORT: + return std::make_shared(); + case orc::TypeKind::INT: + return std::make_shared(); + case orc::TypeKind::LONG: + return std::make_shared(); + case orc::TypeKind::FLOAT: + return std::make_shared(); + case orc::TypeKind::DOUBLE: + return std::make_shared(); + case orc::TypeKind::DATE: + return std::make_shared(); + case orc::TypeKind::TIMESTAMP: + return std::make_shared(9); + case orc::TypeKind::VARCHAR: + case orc::TypeKind::BINARY: + case orc::TypeKind::STRING: + return std::make_shared(); + case orc::TypeKind::CHAR: + return std::make_shared(orc_type->getMaximumLength()); + case orc::TypeKind::DECIMAL: { + UInt64 precision = orc_type->getPrecision(); + UInt64 scale = orc_type->getScale(); + if (precision == 0) + { + // In HIVE 0.11/0.12 precision is set as 0, but means max precision + return createDecimal(38, 6); + } + else + return createDecimal(precision, scale); + } + case orc::TypeKind::LIST: { + if (subtype_count != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid Orc List type {}", orc_type->toString()); + + DataTypePtr nested_type = parseORCType(orc_type->getSubtype(0), skip_columns_with_unsupported_types, skipped); + if (skipped) + return {}; + + return std::make_shared(nested_type); + } + case orc::TypeKind::MAP: { + if (subtype_count != 2) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid Orc Map type {}", orc_type->toString()); + + DataTypePtr key_type = parseORCType(orc_type->getSubtype(0), skip_columns_with_unsupported_types, skipped); + if (skipped) + return {}; + + DataTypePtr value_type = parseORCType(orc_type->getSubtype(1), skip_columns_with_unsupported_types, skipped); + if (skipped) + return {}; + + return std::make_shared(key_type, value_type); + } + case orc::TypeKind::STRUCT: { + DataTypes nested_types; + Strings nested_names; + nested_types.reserve(subtype_count); + nested_names.reserve(subtype_count); + + for (size_t i = 0; i < orc_type->getSubtypeCount(); ++i) + { + auto parsed_type = parseORCType(orc_type->getSubtype(i), skip_columns_with_unsupported_types, skipped); + if (skipped) + return {}; + + nested_types.push_back(parsed_type); + nested_names.push_back(orc_type->getFieldName(i)); + } + return std::make_shared(nested_types, nested_names); + } + default: { + if (skip_columns_with_unsupported_types) + { + skipped = true; + return {}; + } + + throw Exception( + ErrorCodes::UNKNOWN_TYPE, + "Unsupported ORC type '{}'." + "If you want to skip columns with unsupported types, " + "you can enable setting input_format_orc_skip_columns_with_unsupported_types_in_schema_inference", + orc_type->toString()); + } + } +} + + +static void getFileReaderAndSchema( + ReadBuffer & in, + std::unique_ptr & file_reader, + Block & header, + const FormatSettings & format_settings, + std::atomic & is_stopped) +{ + if (is_stopped) + return; + + orc::ReaderOptions options; + auto input_stream = asORCInputStream(in, format_settings, is_stopped); + file_reader = orc::createReader(std::move(input_stream), options); + const auto & schema = file_reader->getType(); + + for (size_t i = 0; i < schema.getSubtypeCount(); ++i) + { + const std::string & name = schema.getFieldName(i); + const orc::Type * orc_type = schema.getSubtype(i); + + bool skipped = false; + DataTypePtr type = parseORCType(orc_type, format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference, skipped); + if (!skipped) + header.insert(ColumnWithTypeAndName{type, name}); + } +} + +NativeORCBlockInputFormat::NativeORCBlockInputFormat(ReadBuffer & in_, Block header_, const FormatSettings & format_settings_) + : IInputFormat(std::move(header_), &in_), format_settings(format_settings_), skip_stripes(format_settings.orc.skip_stripes) +{ +} + +void NativeORCBlockInputFormat::prepareFileReader() +{ + Block schema; + getFileReaderAndSchema(*in, file_reader, schema, format_settings, is_stopped); + if (is_stopped) + return; + + total_stripes = static_cast(file_reader->getNumberOfStripes()); + current_stripe = -1; + + orc_column_to_ch_column = std::make_unique( + getPort().getHeader(), + format_settings.orc.allow_missing_columns, + format_settings.null_as_default, + format_settings.orc.case_insensitive_column_matching); + + const bool ignore_case = format_settings.orc.case_insensitive_column_matching; + std::unordered_set nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case); + + for (size_t i = 0; i < schema.columns(); ++i) + { + const auto & name = schema.getByPosition(i).name; + if (getPort().getHeader().has(name, ignore_case) || nested_table_names.contains(ignore_case ? boost::to_lower_copy(name) : name)) + include_indices.push_back(static_cast(i)); + } +} + +bool NativeORCBlockInputFormat::prepareStripeReader() +{ + assert(file_reader); + + ++current_stripe; + for (; current_stripe < total_stripes && skip_stripes.contains(current_stripe); ++current_stripe) + ; + + /// No more stripes to read + if (current_stripe >= total_stripes) + return false; + + current_stripe_info = file_reader->getStripe(current_stripe); + if (!current_stripe_info->getNumberOfRows()) + throw Exception(ErrorCodes::INCORRECT_DATA, "ORC stripe {} has no rows", current_stripe); + + orc::RowReaderOptions row_reader_options; + row_reader_options.include(include_indices); + row_reader_options.range(current_stripe_info->getOffset(), current_stripe_info->getLength()); + stripe_reader = file_reader->createRowReader(row_reader_options); + + if (!batch) + batch = stripe_reader->createRowBatch(format_settings.orc.row_batch_size); + + return true; +} + +Chunk NativeORCBlockInputFormat::generate() +{ + block_missing_values.clear(); + + if (!file_reader) + prepareFileReader(); + + if (!stripe_reader) + { + if (!prepareStripeReader()) + return {}; + } + + if (is_stopped) + return {}; + + while (true) + { + bool ok = stripe_reader->next(*batch); + if (ok) + break; + + /// No more rows to read in current stripe, continue to prepare reading next stripe + if (!prepareStripeReader()) + return {}; + } + + Chunk res; + size_t num_rows = batch->numElements; + const auto & schema = stripe_reader->getSelectedType(); + orc_column_to_ch_column->orcTableToCHChunk(res, &schema, batch.get(), num_rows, &block_missing_values); + + approx_bytes_read_for_chunk = num_rows * current_stripe_info->getLength() / current_stripe_info->getNumberOfRows(); + return res; +} + +void NativeORCBlockInputFormat::resetParser() +{ + IInputFormat::resetParser(); + + file_reader.reset(); + stripe_reader.reset(); + include_indices.clear(); + batch.reset(); + block_missing_values.clear(); +} + +const BlockMissingValues & NativeORCBlockInputFormat::getMissingValues() const +{ + return block_missing_values; +} + +NativeORCSchemaReader::NativeORCSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) + : ISchemaReader(in_), format_settings(format_settings_) +{ +} + +NamesAndTypesList NativeORCSchemaReader::readSchema() +{ + Block header; + std::unique_ptr file_reader; + std::atomic is_stopped = 0; + getFileReaderAndSchema(in, file_reader, header, format_settings, is_stopped); + + if (format_settings.schema_inference_make_columns_nullable) + return getNamesAndRecursivelyNullableTypes(header); + return header.getNamesAndTypesList(); +} + + +ORCColumnToCHColumn::ORCColumnToCHColumn( + const Block & header_, bool allow_missing_columns_, bool null_as_default_, bool case_insensitive_matching_) + : header(header_) + , allow_missing_columns(allow_missing_columns_) + , null_as_default(null_as_default_) + , case_insensitive_matching(case_insensitive_matching_) +{ +} + +void ORCColumnToCHColumn::orcTableToCHChunk( + Chunk & res, const orc::Type * schema, const orc::ColumnVectorBatch * table, size_t num_rows, BlockMissingValues * block_missing_values) +{ + const auto * struct_batch = dynamic_cast(table); + if (!struct_batch) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ORC table must be StructVectorBatch but is {}", struct_batch->toString()); + + if (schema->getSubtypeCount() != struct_batch->fields.size()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, "ORC table has {} fields but schema has {}", struct_batch->fields.size(), schema->getSubtypeCount()); + + size_t field_num = struct_batch->fields.size(); + NameToColumnPtr name_to_column_ptr; + for (size_t i = 0; i < field_num; ++i) + { + auto name = schema->getFieldName(i); + const auto * field = struct_batch->fields[i]; + if (!field) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ORC table field {} is null", name); + + if (case_insensitive_matching) + boost::to_lower(name); + + name_to_column_ptr[std::move(name)] = {field, schema->getSubtype(i)}; + } + + orcColumnsToCHChunk(res, name_to_column_ptr, num_rows, block_missing_values); +} + +/// Creates a null bytemap from ORC's not-null bytemap +static ColumnPtr readByteMapFromORCColumn(const orc::ColumnVectorBatch * orc_column) +{ + if (!orc_column->hasNulls) + return ColumnUInt8::create(orc_column->numElements, 0); + + auto nullmap_column = ColumnUInt8::create(); + PaddedPODArray & bytemap_data = assert_cast &>(*nullmap_column).getData(); + bytemap_data.resize(orc_column->numElements); + + for (size_t i = 0; i < orc_column->numElements; ++i) + bytemap_data[i] = 1 - orc_column->notNull[i]; + return nullmap_column; +} + + +static const orc::ColumnVectorBatch * getNestedORCColumn(const orc::ListVectorBatch * orc_column) +{ + return orc_column->elements.get(); +} + +template +static ColumnPtr readOffsetsFromORCListColumn(const BatchType * orc_column) +{ + auto offsets_column = ColumnUInt64::create(); + ColumnArray::Offsets & offsets_data = assert_cast &>(*offsets_column).getData(); + offsets_data.reserve(orc_column->numElements); + + for (size_t i = 0; i < orc_column->numElements; ++i) + offsets_data.push_back(orc_column->offsets[i + 1]); + + return offsets_column; +} + +static ColumnWithTypeAndName +readColumnWithBooleanData(const orc::ColumnVectorBatch * orc_column, const orc::Type *, const String & column_name) +{ + const auto * orc_bool_column = dynamic_cast(orc_column); + auto internal_type = DataTypeFactory::instance().get("Bool"); + auto internal_column = internal_type->createColumn(); + auto & column_data = assert_cast &>(*internal_column).getData(); + column_data.reserve(orc_bool_column->numElements); + + for (size_t i = 0; i < orc_bool_column->numElements; ++i) + column_data.push_back(static_cast(orc_bool_column->data[i])); + + return {std::move(internal_column), internal_type, column_name}; +} + +/// Inserts numeric data right into internal column data to reduce an overhead +template > +static ColumnWithTypeAndName +readColumnWithNumericData(const orc::ColumnVectorBatch * orc_column, const orc::Type *, const String & column_name) +{ + auto internal_type = std::make_shared>(); + auto internal_column = internal_type->createColumn(); + auto & column_data = static_cast(*internal_column).getData(); + column_data.reserve(orc_column->numElements); + + const auto * orc_int_column = dynamic_cast(orc_column); + column_data.insert_assume_reserved(orc_int_column->data.data(), orc_int_column->data.data() + orc_int_column->numElements); + + return {std::move(internal_column), std::move(internal_type), column_name}; +} + +template > +static ColumnWithTypeAndName +readColumnWithNumericDataCast(const orc::ColumnVectorBatch * orc_column, const orc::Type *, const String & column_name) +{ + auto internal_type = std::make_shared>(); + auto internal_column = internal_type->createColumn(); + auto & column_data = static_cast(*internal_column).getData(); + column_data.reserve(orc_column->numElements); + + const auto * orc_int_column = dynamic_cast(orc_column); + for (size_t i = 0; i < orc_int_column->numElements; ++i) + column_data.push_back(static_cast(orc_int_column->data[i])); + + return {std::move(internal_column), std::move(internal_type), column_name}; +} + +static ColumnWithTypeAndName +readColumnWithStringData(const orc::ColumnVectorBatch * orc_column, const orc::Type *, const String & column_name) +{ + auto internal_type = std::make_shared(); + auto internal_column = internal_type->createColumn(); + PaddedPODArray & column_chars_t = assert_cast(*internal_column).getChars(); + PaddedPODArray & column_offsets = assert_cast(*internal_column).getOffsets(); + + const auto * orc_str_column = dynamic_cast(orc_column); + size_t reserver_size = 0; + for (size_t i = 0; i < orc_str_column->numElements; ++i) + reserver_size += orc_str_column->length[i] + 1; + column_chars_t.reserve(reserver_size); + column_offsets.reserve(orc_str_column->numElements); + + size_t curr_offset = 0; + for (size_t i = 0; i < orc_str_column->numElements; ++i) + { + const auto * buf = orc_str_column->data[i]; + if (buf) + { + size_t buf_size = orc_str_column->length[i]; + column_chars_t.insert_assume_reserved(buf, buf + buf_size); + curr_offset += buf_size; + } + + column_chars_t.push_back(0); + ++curr_offset; + + column_offsets.push_back(curr_offset); + } + return {std::move(internal_column), std::move(internal_type), column_name}; +} + +static ColumnWithTypeAndName +readColumnWithFixedStringData(const orc::ColumnVectorBatch * orc_column, const orc::Type * orc_type, const String & column_name) +{ + size_t fixed_len = orc_type->getMaximumLength(); + auto internal_type = std::make_shared(fixed_len); + auto internal_column = internal_type->createColumn(); + PaddedPODArray & column_chars_t = assert_cast(*internal_column).getChars(); + column_chars_t.reserve(orc_column->numElements * fixed_len); + + const auto * orc_str_column = dynamic_cast(orc_column); + for (size_t i = 0; i < orc_str_column->numElements; ++i) + { + if (orc_str_column->data[i]) + column_chars_t.insert_assume_reserved(orc_str_column->data[i], orc_str_column->data[i] + orc_str_column->length[i]); + else + column_chars_t.resize_fill(column_chars_t.size() + fixed_len); + } + + return {std::move(internal_column), std::move(internal_type), column_name}; +} + + +template > +static ColumnWithTypeAndName readColumnWithDecimalDataCast( + const orc::ColumnVectorBatch * orc_column, const orc::Type *, const String & column_name, DataTypePtr internal_type) +{ + using NativeType = typename DecimalType::NativeType; + static_assert(std::is_same_v || std::is_same_v); + + auto internal_column = internal_type->createColumn(); + auto & column_data = static_cast(*internal_column).getData(); + column_data.reserve(orc_column->numElements); + + const auto * orc_decimal_column = dynamic_cast(orc_column); + for (size_t i = 0; i < orc_decimal_column->numElements; ++i) + { + DecimalType decimal_value; + if constexpr (std::is_same_v) + { + Int128 int128_value; + int128_value.items[0] = orc_decimal_column->values[i].getLowBits(); + int128_value.items[1] = orc_decimal_column->values[i].getHighBits(); + decimal_value.value = static_cast(int128_value); + } + else + decimal_value.value = static_cast(orc_decimal_column->values[i]); + + column_data.push_back(std::move(decimal_value)); + } + + return {std::move(internal_column), internal_type, column_name}; +} + +static ColumnWithTypeAndName +readIPv6ColumnFromBinaryData(const orc::ColumnVectorBatch * orc_column, const orc::Type * orc_type, const String & column_name) +{ + const auto * orc_str_column = dynamic_cast(orc_column); + + for (size_t i = 0; i < orc_str_column->numElements; ++i) + { + /// If at least one value size is not 16 bytes, fallback to reading String column and further cast to IPv6. + if (orc_str_column->data[i] && orc_str_column->length[i] != sizeof(IPv6)) + return readColumnWithStringData(orc_column, orc_type, column_name); + } + + auto internal_type = std::make_shared(); + auto internal_column = internal_type->createColumn(); + auto & ipv6_column = assert_cast(*internal_column); + ipv6_column.reserve(orc_str_column->numElements); + + for (size_t i = 0; i < orc_str_column->numElements; ++i) + { + if (!orc_str_column->data[i]) [[unlikely]] + ipv6_column.insertDefault(); + else + ipv6_column.insertData(orc_str_column->data[i], orc_str_column->length[i]); + } + + return {std::move(internal_column), std::move(internal_type), column_name}; +} + +static ColumnWithTypeAndName +readIPv4ColumnWithInt32Data(const orc::ColumnVectorBatch * orc_column, const orc::Type *, const String & column_name) +{ + const auto * orc_int_column = dynamic_cast(orc_column); + + auto internal_type = std::make_shared(); + auto internal_column = internal_type->createColumn(); + auto & column_data = assert_cast(*internal_column).getData(); + column_data.reserve(orc_int_column->numElements); + + for (size_t i = 0; i < orc_int_column->numElements; ++i) + column_data.push_back(static_cast(orc_int_column->data[i])); + + return {std::move(internal_column), std::move(internal_type), column_name}; +} + +template +static ColumnWithTypeAndName readColumnWithBigNumberFromBinaryData( + const orc::ColumnVectorBatch * orc_column, const orc::Type *, const String & column_name, const DataTypePtr & column_type) +{ + const auto * orc_str_column = dynamic_cast(orc_column); + + auto internal_column = column_type->createColumn(); + auto & integer_column = assert_cast(*internal_column); + integer_column.reserve(orc_str_column->numElements); + + for (size_t i = 0; i < orc_str_column->numElements; ++i) + { + if (!orc_str_column->data[i]) [[unlikely]] + integer_column.insertDefault(); + else + { + if (sizeof(typename ColumnType::ValueType) != orc_str_column->length[i]) + throw Exception( + ErrorCodes::INCORRECT_DATA, + "ValueType size {} of column {} is not equal to size of binary data {}", + sizeof(typename ColumnType::ValueType), + integer_column.getName(), + orc_str_column->length[i]); + + integer_column.insertData(orc_str_column->data[i], orc_str_column->length[i]); + } + } + return {std::move(internal_column), column_type, column_name}; +} + +static ColumnWithTypeAndName readColumnWithDateData( + const orc::ColumnVectorBatch * orc_column, const orc::Type *, const String & column_name, const DataTypePtr & type_hint) +{ + DataTypePtr internal_type; + bool check_date_range = false; + /// Make result type Date32 when requested type is actually Date32 or when we use schema inference + if (!type_hint || (type_hint && isDate32(*type_hint))) + { + internal_type = std::make_shared(); + check_date_range = true; + } + else + { + internal_type = std::make_shared(); + } + + const auto * orc_int_column = dynamic_cast(orc_column); + auto internal_column = internal_type->createColumn(); + PaddedPODArray & column_data = assert_cast &>(*internal_column).getData(); + column_data.reserve(orc_int_column->numElements); + + for (size_t i = 0; i < orc_int_column->numElements; ++i) + { + Int32 days_num = static_cast(orc_int_column->data[i]); + if (check_date_range && (days_num > DATE_LUT_MAX_EXTEND_DAY_NUM || days_num < -DAYNUM_OFFSET_EPOCH)) + throw Exception( + ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, + "Input value {} of a column \"{}\" exceeds the range of type Date32", + days_num, + column_name); + + column_data.push_back(days_num); + } + + return {std::move(internal_column), internal_type, column_name}; +} + +static ColumnWithTypeAndName +readColumnWithTimestampData(const orc::ColumnVectorBatch * orc_column, const orc::Type *, const String & column_name) +{ + const auto * orc_ts_column = dynamic_cast(orc_column); + + auto internal_type = std::make_shared(9); + auto internal_column = internal_type->createColumn(); + auto & column_data = assert_cast(*internal_column).getData(); + column_data.reserve(orc_ts_column->numElements); + + constexpr Int64 multiplier = 1e9L; + for (size_t i = 0; i < orc_ts_column->numElements; ++i) + { + Decimal64 decimal64; + decimal64.value = orc_ts_column->data[i] * multiplier + orc_ts_column->nanoseconds[i]; + column_data.emplace_back(std::move(decimal64)); + } + return {std::move(internal_column), std::move(internal_type), column_name}; +} + +static ColumnWithTypeAndName readColumnFromORCColumn( + const orc::ColumnVectorBatch * orc_column, + const orc::Type * orc_type, + const std::string & column_name, + bool inside_nullable, + DataTypePtr type_hint = nullptr) +{ + bool skipped = false; + + if (!inside_nullable && (orc_column->hasNulls || (type_hint && type_hint->isNullable())) + && (orc_type->getKind() != orc::LIST && orc_type->getKind() != orc::MAP && orc_type->getKind() != orc::STRUCT)) + { + DataTypePtr nested_type_hint; + if (type_hint) + nested_type_hint = removeNullable(type_hint); + + auto nested_column = readColumnFromORCColumn(orc_column, orc_type, column_name, true, nested_type_hint); + + auto nullmap_column = readByteMapFromORCColumn(orc_column); + auto nullable_type = std::make_shared(std::move(nested_column.type)); + auto nullable_column = ColumnNullable::create(nested_column.column, nullmap_column); + return {std::move(nullable_column), std::move(nullable_type), column_name}; + } + + switch (orc_type->getKind()) + { + case orc::STRING: + case orc::BINARY: + case orc::VARCHAR: { + if (type_hint) + { + switch (type_hint->getTypeId()) + { + case TypeIndex::IPv6: + return readIPv6ColumnFromBinaryData(orc_column, orc_type, column_name); + /// ORC format outputs big integers as binary column, because there is no fixed binary in ORC. + case TypeIndex::Int128: + return readColumnWithBigNumberFromBinaryData(orc_column, orc_type, column_name, type_hint); + case TypeIndex::UInt128: + return readColumnWithBigNumberFromBinaryData(orc_column, orc_type, column_name, type_hint); + case TypeIndex::Int256: + return readColumnWithBigNumberFromBinaryData(orc_column, orc_type, column_name, type_hint); + case TypeIndex::UInt256: + return readColumnWithBigNumberFromBinaryData(orc_column, orc_type, column_name, type_hint); + /// ORC doesn't support Decimal256 as separate type. We read and write it as binary data. + case TypeIndex::Decimal256: + return readColumnWithBigNumberFromBinaryData>( + orc_column, orc_type, column_name, type_hint); + default:; + } + } + return readColumnWithStringData(orc_column, orc_type, column_name); + } + case orc::CHAR: { + if (type_hint) + { + switch (type_hint->getTypeId()) + { + case TypeIndex::Int128: + return readColumnWithBigNumberFromBinaryData(orc_column, orc_type, column_name, type_hint); + case TypeIndex::UInt128: + return readColumnWithBigNumberFromBinaryData(orc_column, orc_type, column_name, type_hint); + case TypeIndex::Int256: + return readColumnWithBigNumberFromBinaryData(orc_column, orc_type, column_name, type_hint); + case TypeIndex::UInt256: + return readColumnWithBigNumberFromBinaryData(orc_column, orc_type, column_name, type_hint); + default:; + } + } + return readColumnWithFixedStringData(orc_column, orc_type, column_name); + } + case orc::BOOLEAN: + return readColumnWithBooleanData(orc_column, orc_type, column_name); + case orc::BYTE: + return readColumnWithNumericDataCast(orc_column, orc_type, column_name); + case orc::SHORT: + return readColumnWithNumericDataCast(orc_column, orc_type, column_name); + case orc::INT: { + /// ORC format doesn't have unsigned integers and we output IPv4 as Int32. + /// We should allow to read it back from Int32. + if (type_hint && isIPv4(type_hint)) + return readIPv4ColumnWithInt32Data(orc_column, orc_type, column_name); + return readColumnWithNumericDataCast(orc_column, orc_type, column_name); + } + case orc::LONG: + return readColumnWithNumericData(orc_column, orc_type, column_name); + case orc::FLOAT: + return readColumnWithNumericDataCast(orc_column, orc_type, column_name); + case orc::DOUBLE: + return readColumnWithNumericData(orc_column, orc_type, column_name); + case orc::DATE: + return readColumnWithDateData(orc_column, orc_type, column_name, type_hint); + case orc::TIMESTAMP: + return readColumnWithTimestampData(orc_column, orc_type, column_name); + case orc::DECIMAL: { + auto interal_type = parseORCType(orc_type, false, skipped); + + auto precision = orc_type->getPrecision(); + if (precision == 0) + precision = 38; + + if (precision <= DecimalUtils::max_precision) + return readColumnWithDecimalDataCast(orc_column, orc_type, column_name, interal_type); + else if (precision <= DecimalUtils::max_precision) + return readColumnWithDecimalDataCast(orc_column, orc_type, column_name, interal_type); + else if (precision <= DecimalUtils::max_precision) + return readColumnWithDecimalDataCast( + orc_column, orc_type, column_name, interal_type); + else + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Decimal precision {} in ORC type {} is out of bound", + precision, + orc_type->toString()); + } + case orc::MAP: { + DataTypePtr key_type_hint; + DataTypePtr value_type_hint; + if (type_hint) + { + const auto * map_type_hint = typeid_cast(type_hint.get()); + if (map_type_hint) + { + key_type_hint = map_type_hint->getKeyType(); + value_type_hint = map_type_hint->getValueType(); + } + } + + const auto * orc_map_column = dynamic_cast(orc_column); + const auto * orc_key_column = orc_map_column->keys.get(); + const auto * orc_value_column = orc_map_column->elements.get(); + const auto * orc_key_type = orc_type->getSubtype(0); + const auto * orc_value_type = orc_type->getSubtype(1); + + auto key_column = readColumnFromORCColumn(orc_key_column, orc_key_type, "key", false, key_type_hint); + if (key_type_hint && !key_type_hint->equals(*key_column.type)) + { + /// Cast key column to target type, because it can happen + /// that parsed type cannot be ClickHouse Map key type. + key_column.column = castColumn(key_column, key_type_hint); + key_column.type = key_type_hint; + } + + auto value_column = readColumnFromORCColumn(orc_value_column, orc_value_type, "value", false, value_type_hint); + if (skipped) + return {}; + + if (value_type_hint && !value_type_hint->equals(*value_column.type)) + { + /// Cast value column to target type, because it can happen + /// that parsed type cannot be ClickHouse Map value type. + value_column.column = castColumn(value_column, value_type_hint); + value_column.type = value_type_hint; + } + + auto offsets_column = readOffsetsFromORCListColumn(orc_map_column); + auto map_column = ColumnMap::create(key_column.column, value_column.column, offsets_column); + auto map_type = std::make_shared(key_column.type, value_column.type); + return {std::move(map_column), std::move(map_type), column_name}; + } + case orc::LIST: { + DataTypePtr nested_type_hint; + if (type_hint) + { + const auto * array_type_hint = typeid_cast(type_hint.get()); + if (array_type_hint) + nested_type_hint = array_type_hint->getNestedType(); + } + + const auto * orc_list_column = dynamic_cast(orc_column); + const auto * orc_nested_column = getNestedORCColumn(orc_list_column); + const auto * orc_nested_type = orc_type->getSubtype(0); + auto nested_column = readColumnFromORCColumn(orc_nested_column, orc_nested_type, column_name, false, nested_type_hint); + + auto offsets_column = readOffsetsFromORCListColumn(orc_list_column); + auto array_column = ColumnArray::create(nested_column.column, offsets_column); + auto array_type = std::make_shared(nested_column.type); + return {std::move(array_column), std::move(array_type), column_name}; + } + case orc::STRUCT: { + Columns tuple_elements; + DataTypes tuple_types; + std::vector tuple_names; + const auto * tuple_type_hint = type_hint ? typeid_cast(type_hint.get()) : nullptr; + + const auto * orc_struct_column = dynamic_cast(orc_column); + for (size_t i = 0; i < orc_type->getSubtypeCount(); ++i) + { + const auto & field_name = orc_type->getFieldName(i); + + DataTypePtr nested_type_hint; + if (tuple_type_hint) + { + if (tuple_type_hint->haveExplicitNames()) + { + auto pos = tuple_type_hint->tryGetPositionByName(field_name); + if (pos) + nested_type_hint = tuple_type_hint->getElement(*pos); + } + else if (size_t(i) < tuple_type_hint->getElements().size()) + nested_type_hint = tuple_type_hint->getElement(i); + } + + const auto * nested_orc_column = orc_struct_column->fields[i]; + const auto * nested_orc_type = orc_type->getSubtype(i); + auto element = readColumnFromORCColumn(nested_orc_column, nested_orc_type, field_name, false, nested_type_hint); + + tuple_elements.emplace_back(std::move(element.column)); + tuple_types.emplace_back(std::move(element.type)); + tuple_names.emplace_back(std::move(element.name)); + } + + auto tuple_column = ColumnTuple::create(std::move(tuple_elements)); + auto tuple_type = std::make_shared(std::move(tuple_types), std::move(tuple_names)); + return {std::move(tuple_column), std::move(tuple_type), column_name}; + } + default: + throw Exception( + ErrorCodes::UNKNOWN_TYPE, "Unsupported ORC type {} while reading column {}.", orc_type->toString(), column_name); + } +} + +void ORCColumnToCHColumn::orcColumnsToCHChunk( + Chunk & res, NameToColumnPtr & name_to_column_ptr, size_t num_rows, BlockMissingValues * block_missing_values) +{ + Columns columns_list; + columns_list.reserve(header.columns()); + std::unordered_map>> nested_tables; + for (size_t column_i = 0, columns = header.columns(); column_i < columns; ++column_i) + { + const ColumnWithTypeAndName & header_column = header.getByPosition(column_i); + + auto search_column_name = header_column.name; + if (case_insensitive_matching) + boost::to_lower(search_column_name); + + ColumnWithTypeAndName column; + if (!name_to_column_ptr.contains(search_column_name)) + { + bool read_from_nested = false; + + /// Check if it's a column from nested table. + String nested_table_name = Nested::extractTableName(header_column.name); + String search_nested_table_name = nested_table_name; + if (case_insensitive_matching) + boost::to_lower(search_nested_table_name); + if (name_to_column_ptr.contains(search_nested_table_name)) + { + if (!nested_tables.contains(search_nested_table_name)) + { + NamesAndTypesList nested_columns; + for (const auto & name_and_type : header.getNamesAndTypesList()) + { + if (name_and_type.name.starts_with(nested_table_name + ".")) + nested_columns.push_back(name_and_type); + } + auto nested_table_type = Nested::collect(nested_columns).front().type; + + auto orc_column_with_type = name_to_column_ptr[search_nested_table_name]; + ColumnsWithTypeAndName cols = {readColumnFromORCColumn( + orc_column_with_type.first, orc_column_with_type.second, nested_table_name, false, nested_table_type)}; + BlockPtr block_ptr = std::make_shared(cols); + auto column_extractor = std::make_shared(*block_ptr, case_insensitive_matching); + nested_tables[search_nested_table_name] = {block_ptr, column_extractor}; + } + + auto nested_column = nested_tables[search_nested_table_name].second->extractColumn(search_column_name); + if (nested_column) + { + column = *nested_column; + if (case_insensitive_matching) + column.name = header_column.name; + read_from_nested = true; + } + } + + if (!read_from_nested) + { + if (!allow_missing_columns) + throw Exception{ErrorCodes::THERE_IS_NO_COLUMN, "Column '{}' is not presented in input data.", header_column.name}; + else + { + column.name = header_column.name; + column.type = header_column.type; + column.column = header_column.column->cloneResized(num_rows); + columns_list.push_back(std::move(column.column)); + if (block_missing_values) + block_missing_values->setBits(column_i, num_rows); + continue; + } + } + } + else + { + auto orc_column_with_type = name_to_column_ptr[search_column_name]; + column = readColumnFromORCColumn( + orc_column_with_type.first, orc_column_with_type.second, header_column.name, false, header_column.type); + } + + if (null_as_default) + insertNullAsDefaultIfNeeded(column, header_column, column_i, block_missing_values); + + try + { + column.column = castColumn(column, header_column.type); + } + catch (Exception & e) + { + e.addMessage(fmt::format( + "while converting column {} from type {} to type {}", + backQuote(header_column.name), + column.type->getName(), + header_column.type->getName())); + throw; + } + + column.type = header_column.type; + columns_list.push_back(std::move(column.column)); + } + + res.setColumns(columns_list, num_rows); +} + +} + +#endif diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.h b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.h new file mode 100644 index 00000000000..c54eb0520bc --- /dev/null +++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.h @@ -0,0 +1,129 @@ +#pragma once +#include "config.h" + +#if USE_ORC +# include +# include +# include +# include +# include + +namespace DB +{ + +class ORCInputStream : public orc::InputStream +{ +public: + ORCInputStream(SeekableReadBuffer & in_, size_t file_size_); + + uint64_t getLength() const override; + uint64_t getNaturalReadSize() const override; + void read(void * buf, uint64_t length, uint64_t offset) override; + const std::string & getName() const override { return name; } + +protected: + SeekableReadBuffer & in; + size_t file_size; + std::string name = "ORCInputStream"; +}; + +class ORCInputStreamFromString : public ReadBufferFromOwnString, public ORCInputStream +{ +public: + template + ORCInputStreamFromString(S && s_, size_t file_size_) + : ReadBufferFromOwnString(std::forward(s_)), ORCInputStream(dynamic_cast(*this), file_size_) + { + } +}; + +std::unique_ptr asORCInputStream(ReadBuffer & in, const FormatSettings & settings, std::atomic & is_cancelled); + +// Reads the whole file into a memory buffer, owned by the returned RandomAccessFile. +std::unique_ptr asORCInputStreamLoadIntoMemory(ReadBuffer & in, std::atomic & is_cancelled); + + +class ORCColumnToCHColumn; +class NativeORCBlockInputFormat : public IInputFormat +{ +public: + NativeORCBlockInputFormat(ReadBuffer & in_, Block header_, const FormatSettings & format_settings_); + + String getName() const override { return "ORCBlockInputFormat"; } + + void resetParser() override; + + const BlockMissingValues & getMissingValues() const override; + + size_t getApproxBytesReadForChunk() const override { return approx_bytes_read_for_chunk; } + +protected: + Chunk generate() override; + + void onCancel() override { is_stopped = 1; } + +private: + void prepareFileReader(); + bool prepareStripeReader(); + + std::unique_ptr file_reader; + std::unique_ptr stripe_reader; + std::unique_ptr orc_column_to_ch_column; + std::unique_ptr batch; + + // indices of columns to read from ORC file + std::list include_indices; + + BlockMissingValues block_missing_values; + size_t approx_bytes_read_for_chunk; + + const FormatSettings format_settings; + const std::unordered_set & skip_stripes; + + int total_stripes = 0; + int current_stripe = -1; + std::unique_ptr current_stripe_info; + + std::atomic is_stopped{0}; +}; + +class NativeORCSchemaReader : public ISchemaReader +{ +public: + NativeORCSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_); + + NamesAndTypesList readSchema() override; + +private: + const FormatSettings format_settings; +}; + +class ORCColumnToCHColumn +{ +public: + using ORCColumnPtr = const orc::ColumnVectorBatch *; + using ORCTypePtr = const orc::Type *; + using ORCColumnWithType = std::pair; + using NameToColumnPtr = std::unordered_map; + + ORCColumnToCHColumn(const Block & header_, bool allow_missing_columns_, bool null_as_default_, bool case_insensitive_matching_ = false); + + void orcTableToCHChunk( + Chunk & res, + const orc::Type * schema, + const orc::ColumnVectorBatch * table, + size_t num_rows, + BlockMissingValues * block_missing_values = nullptr); + + void orcColumnsToCHChunk( + Chunk & res, NameToColumnPtr & name_to_column_ptr, size_t num_rows, BlockMissingValues * block_missing_values = nullptr); + +private: + const Block & header; + /// If false, throw exception if some columns in header not exists in arrow table. + bool allow_missing_columns; + bool null_as_default; + bool case_insensitive_matching; +}; +} +#endif diff --git a/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp b/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp index 43294355f2f..7c0428834e0 100644 --- a/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp @@ -3,9 +3,6 @@ #include #include #include - - -#include #include @@ -19,7 +16,7 @@ ODBCDriver2BlockOutputFormat::ODBCDriver2BlockOutputFormat( static void writeODBCString(WriteBuffer & out, const std::string & str) { - writeIntBinary(Int32(str.size()), out); + writeBinaryLittleEndian(Int32(str.size()), out); out.write(str.data(), str.size()); } @@ -33,7 +30,7 @@ void ODBCDriver2BlockOutputFormat::writeRow(const Columns & columns, size_t row_ if (column->isNullAt(row_idx)) { - writeIntBinary(Int32(-1), out); + writeBinaryLittleEndian(Int32(-1), out); } else { @@ -72,11 +69,11 @@ void ODBCDriver2BlockOutputFormat::writePrefix() const size_t columns = header.columns(); /// Number of header rows. - writeIntBinary(Int32(2), out); + writeBinaryLittleEndian(Int32(2), out); /// Names of columns. /// Number of columns + 1 for first name column. - writeIntBinary(Int32(columns + 1), out); + writeBinaryLittleEndian(Int32(columns + 1), out); writeODBCString(out, "name"); for (size_t i = 0; i < columns; ++i) { @@ -85,7 +82,7 @@ void ODBCDriver2BlockOutputFormat::writePrefix() } /// Types of columns. - writeIntBinary(Int32(columns + 1), out); + writeBinaryLittleEndian(Int32(columns + 1), out); writeODBCString(out, "type"); for (size_t i = 0; i < columns; ++i) { diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 016f07731d5..37b660bc8e0 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -1,16 +1,17 @@ #include "ORCBlockInputFormat.h" -#include -#if USE_ORC -#include -#include -#include -#include -#include -#include "ArrowBufferedStreams.h" -#include "ArrowColumnToCHColumn.h" -#include "ArrowFieldIndexUtil.h" -#include +#if USE_ORC +# include +# include +# include +# include +# include +# include +# include +# include "ArrowBufferedStreams.h" +# include "ArrowColumnToCHColumn.h" +# include "ArrowFieldIndexUtil.h" +# include "NativeORCBlockInputFormat.h" namespace DB { @@ -125,16 +126,12 @@ void ORCBlockInputFormat::prepareReader() arrow_column_to_ch_column = std::make_unique( getPort().getHeader(), "ORC", - format_settings.orc.import_nested, format_settings.orc.allow_missing_columns, format_settings.null_as_default, format_settings.orc.case_insensitive_column_matching); const bool ignore_case = format_settings.orc.case_insensitive_column_matching; - std::unordered_set nested_table_names; - if (format_settings.orc.import_nested) - nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case); - + std::unordered_set nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case); for (int i = 0; i < schema->num_fields(); ++i) { const auto & name = schema->field(i)->name(); @@ -158,20 +155,24 @@ NamesAndTypesList ORCSchemaReader::readSchema() *schema, "ORC", format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference); if (format_settings.schema_inference_make_columns_nullable) return getNamesAndRecursivelyNullableTypes(header); - return header.getNamesAndTypesList();} + return header.getNamesAndTypesList(); +} + void registerInputFormatORC(FormatFactory & factory) { factory.registerInputFormat( - "ORC", - [](ReadBuffer &buf, - const Block &sample, - const RowInputFormatParams &, - const FormatSettings & settings) - { - return std::make_shared(buf, sample, settings); - }); - factory.markFormatSupportsSubcolumns("ORC"); + "ORC", + [](ReadBuffer & buf, const Block & sample, const RowInputFormatParams &, const FormatSettings & settings) + { + InputFormatPtr res; + if (settings.orc.use_fast_decoder) + res = std::make_shared(buf, sample, settings); + else + res = std::make_shared(buf, sample, settings); + + return res; + }); factory.markFormatSupportsSubsetOfColumns("ORC"); } @@ -181,7 +182,13 @@ void registerORCSchemaReader(FormatFactory & factory) "ORC", [](ReadBuffer & buf, const FormatSettings & settings) { - return std::make_shared(buf, settings); + SchemaReaderPtr res; + if (settings.orc.use_fast_decoder) + res = std::make_shared(buf, settings); + else + res = std::make_shared(buf, settings); + + return res; } ); diff --git a/src/Processors/Formats/Impl/OneFormat.cpp b/src/Processors/Formats/Impl/OneFormat.cpp new file mode 100644 index 00000000000..4a9c8caebf3 --- /dev/null +++ b/src/Processors/Formats/Impl/OneFormat.cpp @@ -0,0 +1,57 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +OneInputFormat::OneInputFormat(const Block & header, ReadBuffer & in_) : IInputFormat(header, &in_) +{ + if (header.columns() != 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "One input format is only suitable for tables with a single column of type UInt8 but the number of columns is {}", + header.columns()); + + if (!WhichDataType(header.getByPosition(0).type).isUInt8()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "One input format is only suitable for tables with a single column of type String but the column type is {}", + header.getByPosition(0).type->getName()); +} + +Chunk OneInputFormat::generate() +{ + if (done) + return {}; + + done = true; + auto column = ColumnUInt8::create(); + column->insertDefault(); + return Chunk(Columns{std::move(column)}, 1); +} + +void registerInputFormatOne(FormatFactory & factory) +{ + factory.registerInputFormat("One", []( + ReadBuffer & buf, + const Block & sample, + const RowInputFormatParams &, + const FormatSettings &) + { + return std::make_shared(sample, buf); + }); +} + +void registerOneSchemaReader(FormatFactory & factory) +{ + factory.registerExternalSchemaReader("One", [](const FormatSettings &) + { + return std::make_shared(); + }); +} + +} diff --git a/src/Processors/Formats/Impl/OneFormat.h b/src/Processors/Formats/Impl/OneFormat.h new file mode 100644 index 00000000000..f73b2dab66a --- /dev/null +++ b/src/Processors/Formats/Impl/OneFormat.h @@ -0,0 +1,32 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class OneInputFormat final : public IInputFormat +{ +public: + OneInputFormat(const Block & header, ReadBuffer & in_); + + String getName() const override { return "One"; } + +protected: + Chunk generate() override; + +private: + bool done = false; +}; + +class OneSchemaReader: public IExternalSchemaReader +{ +public: + NamesAndTypesList readSchema() override + { + return {{"dummy", std::make_shared()}}; + } +}; + +} diff --git a/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp b/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp index bc4c9ca3b72..9b51ca0c295 100644 --- a/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp +++ b/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp @@ -279,6 +279,8 @@ void preparePrimitiveColumn(ColumnPtr column, DataTypePtr type, const std::strin auto decimal = [&](Int32 bytes, UInt32 precision, UInt32 scale) { + /// Currently we encode all decimals as byte arrays, even though Decimal32 and Decimal64 + /// could be INT32 and INT64 instead. There doesn't seem to be much difference. state.column_chunk.meta_data.__set_type(parq::Type::FIXED_LEN_BYTE_ARRAY); schema.__set_type(parq::Type::FIXED_LEN_BYTE_ARRAY); schema.__set_type_length(bytes); @@ -335,32 +337,42 @@ void preparePrimitiveColumn(ColumnPtr column, DataTypePtr type, const std::strin case TypeIndex::DateTime64: { - std::optional converted; - std::optional unit; - switch (assert_cast(*type).getScale()) + parq::ConvertedType::type converted; + parq::TimeUnit unit; + const auto & dt = assert_cast(*type); + UInt32 scale = dt.getScale(); + UInt32 converted_scale; + if (scale <= 3) { - case 3: - converted = parq::ConvertedType::TIMESTAMP_MILLIS; - unit.emplace().__set_MILLIS({}); - break; - case 6: - converted = parq::ConvertedType::TIMESTAMP_MICROS; - unit.emplace().__set_MICROS({}); - break; - case 9: - unit.emplace().__set_NANOS({}); - break; + converted = parq::ConvertedType::TIMESTAMP_MILLIS; + unit.__set_MILLIS({}); + converted_scale = 3; + } + else if (scale <= 6) + { + converted = parq::ConvertedType::TIMESTAMP_MICROS; + unit.__set_MICROS({}); + converted_scale = 6; + } + else if (scale <= 9) + { + unit.__set_NANOS({}); + converted_scale = 9; + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected DateTime64 scale: {}", scale); } - std::optional t; - if (unit) - { - parq::TimestampType tt; - tt.__set_isAdjustedToUTC(true); - tt.__set_unit(*unit); - t.emplace().__set_TIMESTAMP(tt); - } + parq::TimestampType tt; + /// (Shouldn't we check the DateTime64's timezone parameter here? No, the actual number + /// in DateTime64 column is always in UTC, regardless of the timezone parameter.) + tt.__set_isAdjustedToUTC(true); + tt.__set_unit(unit); + parq::LogicalType t; + t.__set_TIMESTAMP(tt); types(T::INT64, converted, t); + state.datetime64_multiplier = DataTypeDateTime64::getScaleMultiplier(converted_scale - scale); break; } diff --git a/src/Processors/Formats/Impl/Parquet/Write.cpp b/src/Processors/Formats/Impl/Parquet/Write.cpp index 5ebf2be76d2..1d0b72ba5ec 100644 --- a/src/Processors/Formats/Impl/Parquet/Write.cpp +++ b/src/Processors/Formats/Impl/Parquet/Write.cpp @@ -37,8 +37,10 @@ namespace template struct StatisticsNumeric { - T min = std::numeric_limits::max(); - T max = std::numeric_limits::min(); + T min = std::numeric_limits::has_infinity + ? std::numeric_limits::infinity() : std::numeric_limits::max(); + T max = std::numeric_limits::has_infinity + ? -std::numeric_limits::infinity() : std::numeric_limits::lowest(); void add(SourceType x) { @@ -254,6 +256,28 @@ struct ConverterNumeric } }; +struct ConverterDateTime64WithMultiplier +{ + using Statistics = StatisticsNumeric; + + using Col = ColumnDecimal; + const Col & column; + Int64 multiplier; + PODArray buf; + + ConverterDateTime64WithMultiplier(const ColumnPtr & c, Int64 multiplier_) : column(assert_cast(*c)), multiplier(multiplier_) {} + + const Int64 * getBatch(size_t offset, size_t count) + { + buf.resize(count); + for (size_t i = 0; i < count; ++i) + /// Not checking overflow because DateTime64 values should already be in the range where + /// they fit in Int64 at any allowed scale (i.e. up to nanoseconds). + buf[i] = column.getData()[offset + i].value * multiplier; + return buf.data(); + } +}; + struct ConverterString { using Statistics = StatisticsStringRef; @@ -786,9 +810,14 @@ void writeColumnChunkBody(ColumnChunkWriteState & s, const WriteOptions & option break; case TypeIndex::DateTime64: - writeColumnImpl( - s, options, out, ConverterNumeric, Int64, Int64>( - s.primitive_column)); + if (s.datetime64_multiplier == 1) + writeColumnImpl( + s, options, out, ConverterNumeric, Int64, Int64>( + s.primitive_column)); + else + writeColumnImpl( + s, options, out, ConverterDateTime64WithMultiplier( + s.primitive_column, s.datetime64_multiplier)); break; case TypeIndex::IPv4: diff --git a/src/Processors/Formats/Impl/Parquet/Write.h b/src/Processors/Formats/Impl/Parquet/Write.h index 9197eae5384..24733ac276b 100644 --- a/src/Processors/Formats/Impl/Parquet/Write.h +++ b/src/Processors/Formats/Impl/Parquet/Write.h @@ -42,7 +42,8 @@ struct ColumnChunkWriteState ColumnPtr primitive_column; CompressionMethod compression; // must match what's inside column_chunk - bool is_bool = false; + Int64 datetime64_multiplier = 1; // for converting e.g. seconds to milliseconds + bool is_bool = false; // bool vs UInt8 have the same column type but are encoded differently /// Repetition and definition levels. Produced by prepareColumnForWrite(). /// def is empty iff max_def == 0, which means no arrays or nullables. diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index be9c600f9bd..bf7e035e601 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -14,11 +14,15 @@ #include #include #include +#include #include "ArrowBufferedStreams.h" #include "ArrowColumnToCHColumn.h" #include "ArrowFieldIndexUtil.h" #include #include +#include +#include +#include namespace CurrentMetrics { @@ -33,6 +37,7 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int CANNOT_READ_ALL_DATA; + extern const int CANNOT_PARSE_NUMBER; } #define THROW_ARROW_NOT_OK(status) \ @@ -42,6 +47,322 @@ namespace ErrorCodes throw Exception::createDeprecated(_s.ToString(), ErrorCodes::BAD_ARGUMENTS); \ } while (false) +/// Decode min/max value from column chunk statistics. +/// +/// There are two questionable decisions in this implementation: +/// * We parse the value from the encoded byte string instead of casting the parquet::Statistics +/// to parquet::TypedStatistics and taking the value from there. +/// * We dispatch based on the parquet logical+converted+physical type instead of the ClickHouse type. +/// The idea is that this is similar to what we'll have to do when reimplementing Parquet parsing in +/// ClickHouse instead of using Arrow (for speed). So, this is an exercise in parsing Parquet manually. +static std::optional decodePlainParquetValueSlow(const std::string & data, parquet::Type::type physical_type, const parquet::ColumnDescriptor & descr) +{ + using namespace parquet; + + auto decode_integer = [&](bool signed_) -> UInt64 { + size_t size; + switch (physical_type) + { + case parquet::Type::type::BOOLEAN: size = 1; break; + case parquet::Type::type::INT32: size = 4; break; + case parquet::Type::type::INT64: size = 8; break; + default: throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Unexpected physical type for number"); + } + if (data.size() != size) + throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Unexpected size: {}", data.size()); + + UInt64 val = 0; + memcpy(&val, data.data(), size); + + /// Sign-extend. + if (signed_ && size < 8 && (val >> (size * 8 - 1)) != 0) + val |= 0 - (1ul << (size * 8)); + + return val; + }; + + /// Decimal. + do // while (false) + { + Int32 scale; + if (descr.logical_type() && descr.logical_type()->is_decimal()) + scale = assert_cast(*descr.logical_type()).scale(); + else if (descr.converted_type() == ConvertedType::type::DECIMAL) + scale = descr.type_scale(); + else + break; + + size_t size; + bool big_endian = false; + switch (physical_type) + { + case Type::type::BOOLEAN: size = 1; break; + case Type::type::INT32: size = 4; break; + case Type::type::INT64: size = 8; break; + + case Type::type::FIXED_LEN_BYTE_ARRAY: + big_endian = true; + size = data.size(); + break; + default: throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Unexpected decimal physical type"); + } + /// Note that size is not necessarily a power of two. + /// E.g. spark turns 8-byte unsigned integers into 9-byte signed decimals. + if (data.size() != size || size < 1 || size > 32) + throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Unexpected decimal size: {} (actual {})", size, data.size()); + + /// For simplicity, widen all decimals to 256-bit. It should compare correctly with values + /// of different bitness. + Int256 val = 0; + memcpy(&val, data.data(), size); + if (big_endian) + std::reverse(reinterpret_cast(&val), reinterpret_cast(&val) + size); + /// Sign-extend. + if (size < 32 && (val >> (size * 8 - 1)) != 0) + val |= ~((Int256(1) << (size * 8)) - 1); + + return Field(DecimalField(Decimal256(val), static_cast(scale))); + } + while (false); + + /// Timestamp (decimal). + { + Int32 scale = -1; + bool is_timestamp = true; + if (descr.logical_type() && (descr.logical_type()->is_time() || descr.logical_type()->is_timestamp())) + { + LogicalType::TimeUnit::unit unit = descr.logical_type()->is_time() + ? assert_cast(*descr.logical_type()).time_unit() + : assert_cast(*descr.logical_type()).time_unit(); + switch (unit) + { + case LogicalType::TimeUnit::unit::MILLIS: scale = 3; break; + case LogicalType::TimeUnit::unit::MICROS: scale = 6; break; + case LogicalType::TimeUnit::unit::NANOS: scale = 9; break; + default: throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Unknown time unit"); + } + } + else switch (descr.converted_type()) + { + case ConvertedType::type::TIME_MILLIS: scale = 3; break; + case ConvertedType::type::TIME_MICROS: scale = 6; break; + case ConvertedType::type::TIMESTAMP_MILLIS: scale = 3; break; + case ConvertedType::type::TIMESTAMP_MICROS: scale = 6; break; + default: is_timestamp = false; + } + + if (is_timestamp) + { + Int64 val = static_cast(decode_integer(/* signed */ true)); + return Field(DecimalField(Decimal64(val), scale)); + } + } + + /// Floats. + + if (physical_type == Type::type::FLOAT) + { + if (data.size() != 4) + throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Unexpected float size"); + Float32 val; + memcpy(&val, data.data(), data.size()); + return Field(val); + } + + if (physical_type == Type::type::DOUBLE) + { + if (data.size() != 8) + throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Unexpected float size"); + Float64 val; + memcpy(&val, data.data(), data.size()); + return Field(val); + } + + /// Strings. + + if (physical_type == Type::type::BYTE_ARRAY || physical_type == Type::type::FIXED_LEN_BYTE_ARRAY) + { + /// Arrow's parquet decoder handles missing min/max values slightly incorrectly. + /// In a parquet file, min and max have separate is_set flags, i.e. one may be missing even + /// if the other is set. Arrow decoder ORs (!) these two flags together into one: HasMinMax(). + /// So, if exactly one of {min, max} is missing, Arrow reports it as empty string, with no + /// indication that it's actually missing. + /// + /// How can exactly one of {min, max} be missing? This happens if one of the two strings + /// exceeds the length limit for stats. Repro: + /// + /// insert into function file('t.parquet') select arrayStringConcat(range(number*1000000)) from numbers(2) settings output_format_parquet_use_custom_encoder=0 + /// select tupleElement(tupleElement(row_groups[1], 'columns')[1], 'statistics') from file('t.parquet', ParquetMetadata) + /// + /// Here the row group contains two strings: one empty, one very long. But the statistics + /// reported by arrow are indistinguishable from statistics if all strings were empty. + /// (Min and max are the last two tuple elements in the output of the second query. Notice + /// how they're empty strings instead of NULLs.) + /// + /// So we have to be conservative and treat empty string as unknown. + /// This is unfortunate because it's probably common for string columns to have lots of empty + /// values, and filter pushdown would probably often be useful in that case. + /// + /// TODO: Remove this workaround either when we implement our own Parquet decoder that + /// doesn't have this bug, or if it's fixed in Arrow. + if (data.empty()) + return std::nullopt; + + return Field(data); + } + + /// This one's deprecated in Parquet. + if (physical_type == Type::type::INT96) + throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Parquet INT96 type is deprecated and not supported"); + + /// Integers. + + bool signed_ = true; + if (descr.logical_type() && descr.logical_type()->is_int()) + signed_ = assert_cast(*descr.logical_type()).is_signed(); + else + signed_ = descr.converted_type() != ConvertedType::type::UINT_8 && + descr.converted_type() != ConvertedType::type::UINT_16 && + descr.converted_type() != ConvertedType::type::UINT_32 && + descr.converted_type() != ConvertedType::type::UINT_64; + + UInt64 val = decode_integer(signed_); + Field field = signed_ ? Field(static_cast(val)) : Field(val); + return field; +} + +/// Range of values for each column, based on statistics in the Parquet metadata. +/// This is lower/upper bounds, not necessarily exact min and max, e.g. the min/max can be just +/// missing in the metadata. +static std::vector getHyperrectangleForRowGroup(const parquet::FileMetaData & file, int row_group_idx, const Block & header, const FormatSettings & format_settings) +{ + auto column_name_for_lookup = [&](std::string column_name) -> std::string + { + if (format_settings.parquet.case_insensitive_column_matching) + boost::to_lower(column_name); + return column_name; + }; + + std::unique_ptr row_group = file.RowGroup(row_group_idx); + + std::unordered_map> name_to_statistics; + for (int i = 0; i < row_group->num_columns(); ++i) + { + auto c = row_group->ColumnChunk(i); + auto s = c->statistics(); + if (!s) + continue; + + auto path = c->path_in_schema()->ToDotVector(); + if (path.size() != 1) + continue; // compound types not supported + + name_to_statistics.emplace(column_name_for_lookup(path[0]), s); + } + + /// +-----+ + /// / /| + /// +-----+ | + /// | | + + /// | |/ + /// +-----+ + std::vector hyperrectangle(header.columns(), Range::createWholeUniverse()); + + for (size_t idx = 0; idx < header.columns(); ++idx) + { + const std::string & name = header.getByPosition(idx).name; + auto it = name_to_statistics.find(column_name_for_lookup(name)); + if (it == name_to_statistics.end()) + continue; + auto stats = it->second; + + auto default_value = [&]() -> Field + { + DataTypePtr type = header.getByPosition(idx).type; + if (type->lowCardinality()) + type = assert_cast(*type).getDictionaryType(); + if (type->isNullable()) + type = assert_cast(*type).getNestedType(); + return type->getDefault(); + }; + + /// Only primitive fields are supported, not arrays, maps, tuples, or Nested. + /// Arrays, maps, and Nested can't be meaningfully supported because Parquet only has min/max + /// across all *elements* of the array, not min/max array itself. + /// Same limitation for tuples, but maybe it would make sense to have some kind of tuple + /// expansion in KeyCondition to accept ranges per element instead of whole tuple. + + std::optional min; + std::optional max; + if (stats->HasMinMax()) + { + try + { + min = decodePlainParquetValueSlow(stats->EncodeMin(), stats->physical_type(), *stats->descr()); + max = decodePlainParquetValueSlow(stats->EncodeMax(), stats->physical_type(), *stats->descr()); + } + catch (Exception & e) + { + e.addMessage(" (When parsing Parquet statistics for column {}, physical type {}, {}. Please report an issue and use input_format_parquet_filter_push_down = false to work around.)", name, static_cast(stats->physical_type()), stats->descr()->ToString()); + throw; + } + } + + /// In Range, NULL is represented as positive or negative infinity (represented by a special + /// kind of Field, different from floating-point infinities). + + bool always_null = stats->descr()->max_definition_level() != 0 && + stats->HasNullCount() && stats->num_values() == 0; + bool can_be_null = stats->descr()->max_definition_level() != 0 && + (!stats->HasNullCount() || stats->null_count() != 0); + bool null_as_default = format_settings.null_as_default && !isNullableOrLowCardinalityNullable(header.getByPosition(idx).type); + + if (always_null) + { + /// Single-point range containing either the default value of one of the infinities. + if (null_as_default) + hyperrectangle[idx].right = hyperrectangle[idx].left = default_value(); + else + hyperrectangle[idx].right = hyperrectangle[idx].left; + continue; + } + + if (can_be_null) + { + if (null_as_default) + { + /// Make sure the range contains the default value. + Field def = default_value(); + if (min.has_value() && applyVisitor(FieldVisitorAccurateLess(), def, *min)) + min = def; + if (max.has_value() && applyVisitor(FieldVisitorAccurateLess(), *max, def)) + max = def; + } + else + { + /// Make sure the range reaches infinity on at least one side. + if (min.has_value() && max.has_value()) + min.reset(); + } + } + else + { + /// If the column doesn't have nulls, exclude both infinities. + if (!min.has_value()) + hyperrectangle[idx].left_included = false; + if (!max.has_value()) + hyperrectangle[idx].right_included = false; + } + + if (min.has_value()) + hyperrectangle[idx].left = std::move(min.value()); + if (max.has_value()) + hyperrectangle[idx].right = std::move(max.value()); + } + + return hyperrectangle; +} + ParquetBlockInputFormat::ParquetBlockInputFormat( ReadBuffer & buf, const Block & header_, @@ -66,6 +387,16 @@ ParquetBlockInputFormat::~ParquetBlockInputFormat() pool->wait(); } +void ParquetBlockInputFormat::setQueryInfo(const SelectQueryInfo & query_info, ContextPtr context) +{ + /// When analyzer is enabled, query_info.filter_asts is missing sets and maybe some type casts, + /// so don't use it. I'm not sure how to support analyzer here: https://github.com/ClickHouse/ClickHouse/issues/53536 + if (format_settings.parquet.filter_push_down && !context->getSettingsRef().allow_experimental_analyzer) + key_condition.emplace(query_info, context, getPort().getHeader().getNames(), + std::make_shared(std::make_shared( + getPort().getHeader().getColumnsWithTypeAndName()))); +} + void ParquetBlockInputFormat::initializeIfNeeded() { if (std::exchange(is_initialized, true)) @@ -84,17 +415,37 @@ void ParquetBlockInputFormat::initializeIfNeeded() std::shared_ptr schema; THROW_ARROW_NOT_OK(parquet::arrow::FromParquetSchema(metadata->schema(), &schema)); - row_groups.resize(metadata->num_row_groups()); - ArrowFieldIndexUtil field_util( format_settings.parquet.case_insensitive_column_matching, format_settings.parquet.allow_missing_columns); column_indices = field_util.findRequiredIndices(getPort().getHeader(), *schema); + + int num_row_groups = metadata->num_row_groups(); + row_group_batches.reserve(num_row_groups); + + for (int row_group = 0; row_group < num_row_groups; ++row_group) + { + if (skip_row_groups.contains(row_group)) + continue; + + if (key_condition.has_value() && + !key_condition->checkInHyperrectangle( + getHyperrectangleForRowGroup(*metadata, row_group, getPort().getHeader(), format_settings), + getPort().getHeader().getDataTypes()).can_be_true) + continue; + + if (row_group_batches.empty() || row_group_batches.back().total_bytes_compressed >= min_bytes_for_seek) + row_group_batches.emplace_back(); + + row_group_batches.back().row_groups_idxs.push_back(row_group); + row_group_batches.back().total_rows += metadata->RowGroup(row_group)->num_rows(); + row_group_batches.back().total_bytes_compressed += metadata->RowGroup(row_group)->total_compressed_size(); + } } -void ParquetBlockInputFormat::initializeRowGroupReader(size_t row_group_idx) +void ParquetBlockInputFormat::initializeRowGroupBatchReader(size_t row_group_batch_idx) { - auto & row_group = row_groups[row_group_idx]; + auto & row_group_batch = row_group_batches[row_group_batch_idx]; parquet::ArrowReaderProperties properties; properties.set_use_threads(false); @@ -140,34 +491,30 @@ void ParquetBlockInputFormat::initializeRowGroupReader(size_t row_group_idx) builder.Open(arrow_file, /* not to be confused with ArrowReaderProperties */ parquet::default_reader_properties(), metadata)); builder.properties(properties); // TODO: Pass custom memory_pool() to enable memory accounting with non-jemalloc allocators. - THROW_ARROW_NOT_OK(builder.Build(&row_group.file_reader)); + THROW_ARROW_NOT_OK(builder.Build(&row_group_batch.file_reader)); THROW_ARROW_NOT_OK( - row_group.file_reader->GetRecordBatchReader({static_cast(row_group_idx)}, column_indices, &row_group.record_batch_reader)); + row_group_batch.file_reader->GetRecordBatchReader(row_group_batch.row_groups_idxs, column_indices, &row_group_batch.record_batch_reader)); - row_group.arrow_column_to_ch_column = std::make_unique( + row_group_batch.arrow_column_to_ch_column = std::make_unique( getPort().getHeader(), "Parquet", - format_settings.parquet.import_nested, format_settings.parquet.allow_missing_columns, format_settings.null_as_default, format_settings.parquet.case_insensitive_column_matching); - - row_group.row_group_bytes_uncompressed = metadata->RowGroup(static_cast(row_group_idx))->total_compressed_size(); - row_group.row_group_rows = metadata->RowGroup(static_cast(row_group_idx))->num_rows(); } -void ParquetBlockInputFormat::scheduleRowGroup(size_t row_group_idx) +void ParquetBlockInputFormat::scheduleRowGroup(size_t row_group_batch_idx) { chassert(!mutex.try_lock()); - auto & status = row_groups[row_group_idx].status; - chassert(status == RowGroupState::Status::NotStarted || status == RowGroupState::Status::Paused); + auto & status = row_group_batches[row_group_batch_idx].status; + chassert(status == RowGroupBatchState::Status::NotStarted || status == RowGroupBatchState::Status::Paused); - status = RowGroupState::Status::Running; + status = RowGroupBatchState::Status::Running; pool->scheduleOrThrowOnError( - [this, row_group_idx, thread_group = CurrentThread::getGroup()]() + [this, row_group_batch_idx, thread_group = CurrentThread::getGroup()]() { if (thread_group) CurrentThread::attachToGroupIfDetached(thread_group); @@ -177,7 +524,7 @@ void ParquetBlockInputFormat::scheduleRowGroup(size_t row_group_idx) { setThreadName("ParquetDecoder"); - threadFunction(row_group_idx); + threadFunction(row_group_batch_idx); } catch (...) { @@ -188,44 +535,44 @@ void ParquetBlockInputFormat::scheduleRowGroup(size_t row_group_idx) }); } -void ParquetBlockInputFormat::threadFunction(size_t row_group_idx) +void ParquetBlockInputFormat::threadFunction(size_t row_group_batch_idx) { std::unique_lock lock(mutex); - auto & row_group = row_groups[row_group_idx]; - chassert(row_group.status == RowGroupState::Status::Running); + auto & row_group_batch = row_group_batches[row_group_batch_idx]; + chassert(row_group_batch.status == RowGroupBatchState::Status::Running); while (true) { - if (is_stopped || row_group.num_pending_chunks >= max_pending_chunks_per_row_group) + if (is_stopped || row_group_batch.num_pending_chunks >= max_pending_chunks_per_row_group_batch) { - row_group.status = RowGroupState::Status::Paused; + row_group_batch.status = RowGroupBatchState::Status::Paused; return; } - decodeOneChunk(row_group_idx, lock); + decodeOneChunk(row_group_batch_idx, lock); - if (row_group.status == RowGroupState::Status::Done) + if (row_group_batch.status == RowGroupBatchState::Status::Done) return; } } -void ParquetBlockInputFormat::decodeOneChunk(size_t row_group_idx, std::unique_lock & lock) +void ParquetBlockInputFormat::decodeOneChunk(size_t row_group_batch_idx, std::unique_lock & lock) { - auto & row_group = row_groups[row_group_idx]; - chassert(row_group.status != RowGroupState::Status::Done); + auto & row_group_batch = row_group_batches[row_group_batch_idx]; + chassert(row_group_batch.status != RowGroupBatchState::Status::Done); chassert(lock.owns_lock()); SCOPE_EXIT({ chassert(lock.owns_lock() || std::uncaught_exceptions()); }); lock.unlock(); auto end_of_row_group = [&] { - row_group.arrow_column_to_ch_column.reset(); - row_group.record_batch_reader.reset(); - row_group.file_reader.reset(); + row_group_batch.arrow_column_to_ch_column.reset(); + row_group_batch.record_batch_reader.reset(); + row_group_batch.file_reader.reset(); lock.lock(); - row_group.status = RowGroupState::Status::Done; + row_group_batch.status = RowGroupBatchState::Status::Done; // We may be able to schedule more work now, but can't call scheduleMoreWorkIfNeeded() right // here because we're running on the same thread pool, so it'll deadlock if thread limit is @@ -233,23 +580,10 @@ void ParquetBlockInputFormat::decodeOneChunk(size_t row_group_idx, std::unique_l condvar.notify_all(); }; - if (!row_group.record_batch_reader) - { - if (skip_row_groups.contains(static_cast(row_group_idx))) - { - // Pretend that the row group is empty. - // (We could avoid scheduling the row group on a thread in the first place. But the - // skip_row_groups feature is mostly unused, so it's better to be a little inefficient - // than to add a bunch of extra mostly-dead code for this.) - end_of_row_group(); - return; - } + if (!row_group_batch.record_batch_reader) + initializeRowGroupBatchReader(row_group_batch_idx); - initializeRowGroupReader(row_group_idx); - } - - - auto batch = row_group.record_batch_reader->Next(); + auto batch = row_group_batch.record_batch_reader->Next(); if (!batch.ok()) throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading Parquet data: {}", batch.status().ToString()); @@ -261,44 +595,44 @@ void ParquetBlockInputFormat::decodeOneChunk(size_t row_group_idx, std::unique_l auto tmp_table = arrow::Table::FromRecordBatches({*batch}); - size_t approx_chunk_original_size = static_cast(std::ceil(static_cast(row_group.row_group_bytes_uncompressed) / row_group.row_group_rows * (*tmp_table)->num_rows())); - PendingChunk res = {.chunk_idx = row_group.next_chunk_idx, .row_group_idx = row_group_idx, .approx_original_chunk_size = approx_chunk_original_size}; + size_t approx_chunk_original_size = static_cast(std::ceil(static_cast(row_group_batch.total_bytes_compressed) / row_group_batch.total_rows * (*tmp_table)->num_rows())); + PendingChunk res = {.chunk_idx = row_group_batch.next_chunk_idx, .row_group_batch_idx = row_group_batch_idx, .approx_original_chunk_size = approx_chunk_original_size}; /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. BlockMissingValues * block_missing_values_ptr = format_settings.defaults_for_omitted_fields ? &res.block_missing_values : nullptr; - row_group.arrow_column_to_ch_column->arrowTableToCHChunk(res.chunk, *tmp_table, (*tmp_table)->num_rows(), block_missing_values_ptr); + row_group_batch.arrow_column_to_ch_column->arrowTableToCHChunk(res.chunk, *tmp_table, (*tmp_table)->num_rows(), block_missing_values_ptr); lock.lock(); - ++row_group.next_chunk_idx; - ++row_group.num_pending_chunks; + ++row_group_batch.next_chunk_idx; + ++row_group_batch.num_pending_chunks; pending_chunks.push(std::move(res)); condvar.notify_all(); } -void ParquetBlockInputFormat::scheduleMoreWorkIfNeeded(std::optional row_group_touched) +void ParquetBlockInputFormat::scheduleMoreWorkIfNeeded(std::optional row_group_batch_touched) { - while (row_groups_completed < row_groups.size()) + while (row_group_batches_completed < row_group_batches.size()) { - auto & row_group = row_groups[row_groups_completed]; - if (row_group.status != RowGroupState::Status::Done || row_group.num_pending_chunks != 0) + auto & row_group = row_group_batches[row_group_batches_completed]; + if (row_group.status != RowGroupBatchState::Status::Done || row_group.num_pending_chunks != 0) break; - ++row_groups_completed; + ++row_group_batches_completed; } if (pool) { - while (row_groups_started - row_groups_completed < max_decoding_threads && - row_groups_started < row_groups.size()) - scheduleRowGroup(row_groups_started++); + while (row_group_batches_started - row_group_batches_completed < max_decoding_threads && + row_group_batches_started < row_group_batches.size()) + scheduleRowGroup(row_group_batches_started++); - if (row_group_touched) + if (row_group_batch_touched) { - auto & row_group = row_groups[*row_group_touched]; - if (row_group.status == RowGroupState::Status::Paused && - row_group.num_pending_chunks < max_pending_chunks_per_row_group) - scheduleRowGroup(*row_group_touched); + auto & row_group = row_group_batches[*row_group_batch_touched]; + if (row_group.status == RowGroupBatchState::Status::Paused && + row_group.num_pending_chunks < max_pending_chunks_per_row_group_batch) + scheduleRowGroup(*row_group_batch_touched); } } } @@ -323,30 +657,30 @@ Chunk ParquetBlockInputFormat::generate() if (!pending_chunks.empty() && (!format_settings.parquet.preserve_order || - pending_chunks.top().row_group_idx == row_groups_completed)) + pending_chunks.top().row_group_batch_idx == row_group_batches_completed)) { PendingChunk chunk = std::move(const_cast(pending_chunks.top())); pending_chunks.pop(); - auto & row_group = row_groups[chunk.row_group_idx]; + auto & row_group = row_group_batches[chunk.row_group_batch_idx]; chassert(row_group.num_pending_chunks != 0); chassert(chunk.chunk_idx == row_group.next_chunk_idx - row_group.num_pending_chunks); --row_group.num_pending_chunks; - scheduleMoreWorkIfNeeded(chunk.row_group_idx); + scheduleMoreWorkIfNeeded(chunk.row_group_batch_idx); previous_block_missing_values = std::move(chunk.block_missing_values); previous_approx_bytes_read_for_chunk = chunk.approx_original_chunk_size; return std::move(chunk.chunk); } - if (row_groups_completed == row_groups.size()) + if (row_group_batches_completed == row_group_batches.size()) return {}; if (pool) condvar.wait(lock); else - decodeOneChunk(row_groups_completed, lock); + decodeOneChunk(row_group_batches_completed, lock); } } @@ -359,12 +693,12 @@ void ParquetBlockInputFormat::resetParser() arrow_file.reset(); metadata.reset(); column_indices.clear(); - row_groups.clear(); + row_group_batches.clear(); while (!pending_chunks.empty()) pending_chunks.pop(); - row_groups_completed = 0; + row_group_batches_completed = 0; previous_block_missing_values.clear(); - row_groups_started = 0; + row_group_batches_started = 0; background_exception = nullptr; is_stopped = false; @@ -412,7 +746,7 @@ void registerInputFormatParquet(FormatFactory & factory) size_t /* max_download_threads */, size_t max_parsing_threads) { - size_t min_bytes_for_seek = is_remote_fs ? read_settings.remote_read_min_bytes_for_seek : 8 * 1024; + size_t min_bytes_for_seek = is_remote_fs ? read_settings.remote_read_min_bytes_for_seek : settings.parquet.local_read_min_bytes_for_seek; return std::make_shared( buf, sample, @@ -420,7 +754,6 @@ void registerInputFormatParquet(FormatFactory & factory) max_parsing_threads, min_bytes_for_seek); }); - factory.markFormatSupportsSubcolumns("Parquet"); factory.markFormatSupportsSubsetOfColumns("Parquet"); } diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index dc14edf2099..20ea2fb0ae6 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace parquet { class FileMetaData; } namespace parquet::arrow { class FileReader; } @@ -52,8 +53,11 @@ public: const FormatSettings & format_settings, size_t max_decoding_threads, size_t min_bytes_for_seek); + ~ParquetBlockInputFormat() override; + void setQueryInfo(const SelectQueryInfo & query_info, ContextPtr context) override; + void resetParser() override; String getName() const override { return "ParquetBlockInputFormat"; } @@ -71,14 +75,14 @@ private: } void initializeIfNeeded(); - void initializeRowGroupReader(size_t row_group_idx); + void initializeRowGroupBatchReader(size_t row_group_batch_idx); - void decodeOneChunk(size_t row_group_idx, std::unique_lock & lock); + void decodeOneChunk(size_t row_group_batch_idx, std::unique_lock & lock); - void scheduleMoreWorkIfNeeded(std::optional row_group_touched = std::nullopt); - void scheduleRowGroup(size_t row_group_idx); + void scheduleMoreWorkIfNeeded(std::optional row_group_batch_touched = std::nullopt); + void scheduleRowGroup(size_t row_group_batch_idx); - void threadFunction(size_t row_group_idx); + void threadFunction(size_t row_group_batch_idx); // Data layout in the file: // @@ -165,7 +169,7 @@ private: // * The max_pending_chunks_per_row_group limit could be based on actual memory usage too. // Useful for preserve_order. - struct RowGroupState + struct RowGroupBatchState { // Transitions: // @@ -202,8 +206,10 @@ private: size_t next_chunk_idx = 0; size_t num_pending_chunks = 0; - size_t row_group_bytes_uncompressed = 0; - size_t row_group_rows = 0; + size_t total_rows = 0; + size_t total_bytes_compressed = 0; + + std::vector row_groups_idxs; // These are only used by the decoding thread, so don't require locking the mutex. std::unique_ptr file_reader; @@ -217,7 +223,7 @@ private: Chunk chunk; BlockMissingValues block_missing_values; size_t chunk_idx; // within row group - size_t row_group_idx; + size_t row_group_batch_idx; size_t approx_original_chunk_size; // For priority_queue. @@ -230,8 +236,8 @@ private: bool operator()(const PendingChunk & a, const PendingChunk & b) const { auto tuplificate = [this](const PendingChunk & c) - { return row_group_first ? std::tie(c.row_group_idx, c.chunk_idx) - : std::tie(c.chunk_idx, c.row_group_idx); }; + { return row_group_first ? std::tie(c.row_group_batch_idx, c.chunk_idx) + : std::tie(c.chunk_idx, c.row_group_batch_idx); }; return tuplificate(a) > tuplificate(b); } }; @@ -241,14 +247,17 @@ private: const std::unordered_set & skip_row_groups; size_t max_decoding_threads; size_t min_bytes_for_seek; - const size_t max_pending_chunks_per_row_group = 2; + const size_t max_pending_chunks_per_row_group_batch = 2; - // RandomAccessFile is thread safe, so we share it among threads. - // FileReader is not, so each thread creates its own. + /// RandomAccessFile is thread safe, so we share it among threads. + /// FileReader is not, so each thread creates its own. std::shared_ptr arrow_file; std::shared_ptr metadata; - // indices of columns to read from Parquet file + /// Indices of columns to read from Parquet file. std::vector column_indices; + /// Pushed-down filter that we'll use to skip row groups. + std::optional key_condition; + // Window of active row groups: // @@ -264,12 +273,12 @@ private: // Wakes up the generate() call, if any. std::condition_variable condvar; - std::vector row_groups; + std::vector row_group_batches; std::priority_queue, PendingChunk::Compare> pending_chunks; - size_t row_groups_completed = 0; + size_t row_group_batches_completed = 0; // These are only used when max_decoding_threads > 1. - size_t row_groups_started = 0; + size_t row_group_batches_started = 0; std::unique_ptr pool; BlockMissingValues previous_block_missing_values; diff --git a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp index cff83b0ad3b..043e6d2260c 100644 --- a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp @@ -504,7 +504,6 @@ void registerInputFormatParquetMetadata(FormatFactory & factory) { return std::make_shared(buf, sample, settings); }); - factory.markFormatSupportsSubcolumns("ParquetMetadata"); factory.markFormatSupportsSubsetOfColumns("ParquetMetadata"); } diff --git a/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp index 6098923a195..4a5ce2705e7 100644 --- a/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp @@ -14,7 +14,7 @@ ProtobufListInputFormat::ProtobufListInputFormat( ReadBuffer & in_, const Block & header_, const Params & params_, - const FormatSchemaInfo & schema_info_, + const ProtobufSchemaInfo & schema_info_, bool flatten_google_wrappers_) : IRowInputFormat(header_, in_, params_) , reader(std::make_unique(in_)) @@ -22,7 +22,7 @@ ProtobufListInputFormat::ProtobufListInputFormat( header_.getNames(), header_.getDataTypes(), missing_column_indices, - *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_, ProtobufSchemas::WithEnvelope::Yes), + *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::Yes), /* with_length_delimiter = */ true, /* with_envelope = */ true, flatten_google_wrappers_, @@ -84,7 +84,7 @@ void registerInputFormatProtobufList(FormatFactory & factory) const FormatSettings & settings) { return std::make_shared(buf, sample, std::move(params), - FormatSchemaInfo(settings, "Protobuf", true), settings.protobuf.input_flatten_google_wrappers); + ProtobufSchemaInfo(settings, "Protobuf", sample, settings.protobuf.use_autogenerated_schema), settings.protobuf.input_flatten_google_wrappers); }); factory.markFormatSupportsSubsetOfColumns("ProtobufList"); factory.registerAdditionalInfoForSchemaCacheGetter( diff --git a/src/Processors/Formats/Impl/ProtobufListInputFormat.h b/src/Processors/Formats/Impl/ProtobufListInputFormat.h index ba2e8014878..09eabb0ee59 100644 --- a/src/Processors/Formats/Impl/ProtobufListInputFormat.h +++ b/src/Processors/Formats/Impl/ProtobufListInputFormat.h @@ -28,7 +28,7 @@ public: ReadBuffer & in_, const Block & header_, const Params & params_, - const FormatSchemaInfo & schema_info_, + const ProtobufSchemaInfo & schema_info_, bool flatten_google_wrappers_); String getName() const override { return "ProtobufListInputFormat"; } diff --git a/src/Processors/Formats/Impl/ProtobufListOutputFormat.cpp b/src/Processors/Formats/Impl/ProtobufListOutputFormat.cpp index 815b9ebb61d..ae0b9db7357 100644 --- a/src/Processors/Formats/Impl/ProtobufListOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufListOutputFormat.cpp @@ -2,7 +2,6 @@ #if USE_PROTOBUF # include -# include # include # include # include @@ -13,14 +12,14 @@ namespace DB ProtobufListOutputFormat::ProtobufListOutputFormat( WriteBuffer & out_, const Block & header_, - const FormatSchemaInfo & schema_info_, + const ProtobufSchemaInfo & schema_info_, bool defaults_for_nullable_google_wrappers_) : IRowOutputFormat(header_, out_) , writer(std::make_unique(out)) , serializer(ProtobufSerializer::create( header_.getNames(), header_.getDataTypes(), - *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_, ProtobufSchemas::WithEnvelope::Yes), + *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::Yes), /* with_length_delimiter = */ true, /* with_envelope = */ true, defaults_for_nullable_google_wrappers_, @@ -55,7 +54,7 @@ void registerOutputFormatProtobufList(FormatFactory & factory) const FormatSettings & settings) { return std::make_shared( - buf, header, FormatSchemaInfo(settings, "Protobuf", true), + buf, header, ProtobufSchemaInfo(settings, "Protobuf", header, settings.protobuf.use_autogenerated_schema), settings.protobuf.output_nullables_with_google_wrappers); }); } diff --git a/src/Processors/Formats/Impl/ProtobufListOutputFormat.h b/src/Processors/Formats/Impl/ProtobufListOutputFormat.h index d85018c0351..e7765590d51 100644 --- a/src/Processors/Formats/Impl/ProtobufListOutputFormat.h +++ b/src/Processors/Formats/Impl/ProtobufListOutputFormat.h @@ -4,10 +4,10 @@ #if USE_PROTOBUF # include +# include namespace DB { -class FormatSchemaInfo; class ProtobufWriter; class ProtobufSerializer; @@ -26,7 +26,7 @@ public: ProtobufListOutputFormat( WriteBuffer & out_, const Block & header_, - const FormatSchemaInfo & schema_info_, + const ProtobufSchemaInfo & schema_info_, bool defaults_for_nullable_google_wrappers_); String getName() const override { return "ProtobufListOutputFormat"; } diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp index 126f3673571..771a0790527 100644 --- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp @@ -11,9 +11,9 @@ namespace DB { ProtobufRowInputFormat::ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, const Params & params_, - const FormatSchemaInfo & schema_info_, bool with_length_delimiter_, bool flatten_google_wrappers_) + const ProtobufSchemaInfo & schema_info_, bool with_length_delimiter_, bool flatten_google_wrappers_) : IRowInputFormat(header_, in_, params_) - , message_descriptor(ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_, ProtobufSchemas::WithEnvelope::No)) + , message_descriptor(ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::No)) , with_length_delimiter(with_length_delimiter_) , flatten_google_wrappers(flatten_google_wrappers_) { @@ -89,7 +89,7 @@ void registerInputFormatProtobuf(FormatFactory & factory) const FormatSettings & settings) { return std::make_shared(buf, sample, std::move(params), - FormatSchemaInfo(settings, "Protobuf", true), + ProtobufSchemaInfo(settings, "Protobuf", sample, settings.protobuf.use_autogenerated_schema), with_length_delimiter, settings.protobuf.input_flatten_google_wrappers); }); diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h index 5c042f7c5ab..9d7cecd7146 100644 --- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h +++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h @@ -33,7 +33,7 @@ public: ReadBuffer & in_, const Block & header_, const Params & params_, - const FormatSchemaInfo & schema_info_, + const ProtobufSchemaInfo & schema_info_, bool with_length_delimiter_, bool flatten_google_wrappers_); diff --git a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp index 129c9ca3156..7b4cc1bf0be 100644 --- a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp @@ -3,7 +3,6 @@ #if USE_PROTOBUF # include # include -# include # include # include # include @@ -20,7 +19,7 @@ namespace ErrorCodes ProtobufRowOutputFormat::ProtobufRowOutputFormat( WriteBuffer & out_, const Block & header_, - const FormatSchemaInfo & schema_info_, + const ProtobufSchemaInfo & schema_info_, const FormatSettings & settings_, bool with_length_delimiter_) : IRowOutputFormat(header_, out_) @@ -28,7 +27,7 @@ ProtobufRowOutputFormat::ProtobufRowOutputFormat( , serializer(ProtobufSerializer::create( header_.getNames(), header_.getDataTypes(), - *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_, ProtobufSchemas::WithEnvelope::No), + *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::No), with_length_delimiter_, /* with_envelope = */ false, settings_.protobuf.output_nullables_with_google_wrappers, @@ -61,7 +60,7 @@ void registerOutputFormatProtobuf(FormatFactory & factory) const FormatSettings & settings) { return std::make_shared( - buf, header, FormatSchemaInfo(settings, "Protobuf", true), + buf, header, ProtobufSchemaInfo(settings, "Protobuf", header, settings.protobuf.use_autogenerated_schema), settings, with_length_delimiter); }); } diff --git a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h index f6ff5bae999..213e1c785fd 100644 --- a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h +++ b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h @@ -4,11 +4,11 @@ #if USE_PROTOBUF # include +# include namespace DB { class DB; -class FormatSchemaInfo; class ProtobufSerializer; class ProtobufWriter; class WriteBuffer; @@ -30,7 +30,7 @@ public: ProtobufRowOutputFormat( WriteBuffer & out_, const Block & header_, - const FormatSchemaInfo & schema_info_, + const ProtobufSchemaInfo & schema_info_, const FormatSettings & settings_, bool with_length_delimiter_); diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp index d902a8be6a7..8e94a568b1e 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp @@ -143,7 +143,7 @@ RegexpSchemaReader::RegexpSchemaReader(ReadBuffer & in_, const FormatSettings & { } -DataTypes RegexpSchemaReader::readRowAndGetDataTypes() +std::optional RegexpSchemaReader::readRowAndGetDataTypes() { if (buf.eof()) return {}; diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.h b/src/Processors/Formats/Impl/RegexpRowInputFormat.h index 2469774aaf9..7417d48d8c1 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.h +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.h @@ -79,7 +79,7 @@ public: RegexpSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings); private: - DataTypes readRowAndGetDataTypes() override; + std::optional readRowAndGetDataTypes() override; void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index 2239c8539e3..7fbad583ced 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -300,6 +300,11 @@ bool TabSeparatedFormatReader::checkForSuffix() return false; } +bool TabSeparatedFormatReader::checkForEndOfRow() +{ + return buf->eof() || *buf->position() == '\n'; +} + TabSeparatedSchemaReader::TabSeparatedSchemaReader( ReadBuffer & in_, bool with_names_, bool with_types_, bool is_raw_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesSchemaReader( @@ -315,19 +320,22 @@ TabSeparatedSchemaReader::TabSeparatedSchemaReader( { } -std::pair, DataTypes> TabSeparatedSchemaReader::readRowAndGetFieldsAndDataTypes() +std::optional, DataTypes>> TabSeparatedSchemaReader::readRowAndGetFieldsAndDataTypes() { if (buf.eof()) return {}; auto fields = reader.readRow(); auto data_types = tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule()); - return {fields, data_types}; + return std::make_pair(fields, data_types); } -DataTypes TabSeparatedSchemaReader::readRowAndGetDataTypesImpl() +std::optional TabSeparatedSchemaReader::readRowAndGetDataTypesImpl() { - return readRowAndGetFieldsAndDataTypes().second; + auto fields_with_types = readRowAndGetFieldsAndDataTypes(); + if (!fields_with_types) + return {}; + return std::move(fields_with_types->second); } void registerInputFormatTabSeparated(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h index 8df57675cf5..e0234761d61 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h @@ -76,6 +76,9 @@ public: void setReadBuffer(ReadBuffer & in_) override; bool checkForSuffix() override; + bool checkForEndOfRow() override; + + bool allowVariableNumberOfColumns() const override { return format_settings.tsv.allow_variable_number_of_columns; } private: template @@ -92,8 +95,10 @@ public: TabSeparatedSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool is_raw_, const FormatSettings & format_settings); private: - DataTypes readRowAndGetDataTypesImpl() override; - std::pair, DataTypes> readRowAndGetFieldsAndDataTypes() override; + bool allowVariableNumberOfColumns() const override { return format_settings.tsv.allow_variable_number_of_columns; } + + std::optional readRowAndGetDataTypesImpl() override; + std::optional, DataTypes>> readRowAndGetFieldsAndDataTypes() override; PeekableReadBuffer buf; TabSeparatedFormatReader reader; diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp index 8a09e800fa7..b065e00f5d1 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp @@ -490,7 +490,7 @@ TemplateSchemaReader::TemplateSchemaReader( setColumnNames(row_format.column_names); } -DataTypes TemplateSchemaReader::readRowAndGetDataTypes() +std::optional TemplateSchemaReader::readRowAndGetDataTypes() { if (first_row) format_reader.readPrefix(); diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.h b/src/Processors/Formats/Impl/TemplateRowInputFormat.h index 8f9088e2c47..2752cb13e50 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.h +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.h @@ -119,7 +119,7 @@ public: std::string row_between_delimiter, const FormatSettings & format_settings_); - DataTypes readRowAndGetDataTypes() override; + std::optional readRowAndGetDataTypes() override; private: void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index 3a65a6fe4ea..6cb469afca1 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -638,7 +638,7 @@ ValuesSchemaReader::ValuesSchemaReader(ReadBuffer & in_, const FormatSettings & { } -DataTypes ValuesSchemaReader::readRowAndGetDataTypes() +std::optional ValuesSchemaReader::readRowAndGetDataTypes() { if (first_row) { diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h index 8f8d44ec088..7f1dbc0da66 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h @@ -105,7 +105,7 @@ public: ValuesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings); private: - DataTypes readRowAndGetDataTypes() override; + std::optional readRowAndGetDataTypes() override; PeekableReadBuffer buf; ParserExpression parser; diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp index fb49779e0af..fc2b5cd8207 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp @@ -212,8 +212,24 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE format_reader->skipRowStartDelimiter(); ext.read_columns.resize(data_types.size()); - for (size_t file_column = 0; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column) + size_t file_column = 0; + for (; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column) { + if (format_reader->allowVariableNumberOfColumns() && format_reader->checkForEndOfRow()) + { + while (file_column < column_mapping->column_indexes_for_input_fields.size()) + { + const auto & rem_column_index = column_mapping->column_indexes_for_input_fields[file_column]; + if (rem_column_index) + columns[*rem_column_index]->insertDefault(); + ++file_column; + } + break; + } + + if (file_column != 0) + format_reader->skipFieldDelimiter(); + const auto & column_index = column_mapping->column_indexes_for_input_fields[file_column]; const bool is_last_file_column = file_column + 1 == column_mapping->column_indexes_for_input_fields.size(); if (column_index) @@ -225,22 +241,6 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE column_mapping->names_of_columns[file_column]); else format_reader->skipField(file_column); - - if (!is_last_file_column) - { - if (format_reader->allowVariableNumberOfColumns() && format_reader->checkForEndOfRow()) - { - ++file_column; - while (file_column < column_mapping->column_indexes_for_input_fields.size()) - { - const auto & rem_column_index = column_mapping->column_indexes_for_input_fields[file_column]; - columns[*rem_column_index]->insertDefault(); - ++file_column; - } - } - else - format_reader->skipFieldDelimiter(); - } } if (format_reader->allowVariableNumberOfColumns() && !format_reader->checkForEndOfRow()) @@ -248,7 +248,7 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE do { format_reader->skipFieldDelimiter(); - format_reader->skipField(1); + format_reader->skipField(file_column++); } while (!format_reader->checkForEndOfRow()); } @@ -419,12 +419,14 @@ namespace void FormatWithNamesAndTypesSchemaReader::tryDetectHeader(std::vector & column_names, std::vector & type_names) { - auto [first_row_values, first_row_types] = readRowAndGetFieldsAndDataTypes(); + auto first_row = readRowAndGetFieldsAndDataTypes(); /// No data. - if (first_row_values.empty()) + if (!first_row) return; + const auto & [first_row_values, first_row_types] = *first_row; + /// The first row contains non String elements, it cannot be a header. if (!checkIfAllTypesAreString(first_row_types)) { @@ -432,15 +434,17 @@ void FormatWithNamesAndTypesSchemaReader::tryDetectHeader(std::vector & return; } - auto [second_row_values, second_row_types] = readRowAndGetFieldsAndDataTypes(); + auto second_row = readRowAndGetFieldsAndDataTypes(); /// Data contains only 1 row, don't treat it as a header. - if (second_row_values.empty()) + if (!second_row) { buffered_types = first_row_types; return; } + const auto & [second_row_values, second_row_types] = *second_row; + DataTypes data_types; bool second_row_can_be_type_names = checkIfAllTypesAreString(second_row_types) && checkIfAllValuesAreTypeNames(readNamesFromFields(second_row_values)); size_t row = 2; @@ -450,15 +454,16 @@ void FormatWithNamesAndTypesSchemaReader::tryDetectHeader(std::vector & } else { - data_types = readRowAndGetDataTypes(); + auto data_types_maybe = readRowAndGetDataTypes(); /// Data contains only 2 rows. - if (data_types.empty()) + if (!data_types_maybe) { second_row_can_be_type_names = false; data_types = second_row_types; } else { + data_types = *data_types_maybe; ++row; } } @@ -490,10 +495,10 @@ void FormatWithNamesAndTypesSchemaReader::tryDetectHeader(std::vector & return; } - auto next_row_types = readRowAndGetDataTypes(); + auto next_row_types_maybe = readRowAndGetDataTypes(); /// Check if there are no more rows in data. It means that all rows contains only String values and Nulls, /// so, the first two rows with all String elements can be real data and we cannot use them as a header. - if (next_row_types.empty()) + if (!next_row_types_maybe) { /// Buffer first data types from the first row, because it doesn't contain Nulls. buffered_types = first_row_types; @@ -502,11 +507,11 @@ void FormatWithNamesAndTypesSchemaReader::tryDetectHeader(std::vector & ++row; /// Combine types from current row and from previous rows. - chooseResultColumnTypes(*this, data_types, next_row_types, getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::CSV), default_colum_names, row); + chooseResultColumnTypes(*this, data_types, *next_row_types_maybe, getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::CSV), default_colum_names, row); } } -DataTypes FormatWithNamesAndTypesSchemaReader::readRowAndGetDataTypes() +std::optional FormatWithNamesAndTypesSchemaReader::readRowAndGetDataTypes() { /// Check if we tried to detect a header and have buffered types from read rows. if (!buffered_types.empty()) diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h index b5103d3db39..377341da685 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h @@ -119,9 +119,10 @@ public: /// Check suffix. virtual bool checkForSuffix() { return in->eof(); } + /// Check if we are at the end of row, not between fields. virtual bool checkForEndOfRow() { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method checkForEndOfRow is not implemented"); } - virtual bool allowVariableNumberOfColumns() { return false; } + virtual bool allowVariableNumberOfColumns() const { return false; } const FormatSettings & getFormatSettings() const { return format_settings; } @@ -160,15 +161,15 @@ public: NamesAndTypesList readSchema() override; protected: - virtual DataTypes readRowAndGetDataTypes() override; + virtual std::optional readRowAndGetDataTypes() override; - virtual DataTypes readRowAndGetDataTypesImpl() + virtual std::optional readRowAndGetDataTypesImpl() { throw Exception{ErrorCodes::NOT_IMPLEMENTED, "Method readRowAndGetDataTypesImpl is not implemented"}; } - /// Return column fields with inferred types. In case of no more rows, return empty vectors. - virtual std::pair, DataTypes> readRowAndGetFieldsAndDataTypes() + /// Return column fields with inferred types. In case of no more rows, return nullopt. + virtual std::optional, DataTypes>> readRowAndGetFieldsAndDataTypes() { throw Exception{ErrorCodes::NOT_IMPLEMENTED, "Method readRowAndGetFieldsAndDataTypes is not implemented"}; } diff --git a/src/Processors/IAccumulatingTransform.cpp b/src/Processors/IAccumulatingTransform.cpp index 00d5b2ee089..4136fc5a5f2 100644 --- a/src/Processors/IAccumulatingTransform.cpp +++ b/src/Processors/IAccumulatingTransform.cpp @@ -1,5 +1,4 @@ #include -#include namespace DB { @@ -14,14 +13,6 @@ IAccumulatingTransform::IAccumulatingTransform(Block input_header, Block output_ { } -InputPort * IAccumulatingTransform::addTotalsPort() -{ - if (inputs.size() > 1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Totals port was already added to IAccumulatingTransform"); - - return &inputs.emplace_back(getInputPort().getHeader(), this); -} - IAccumulatingTransform::Status IAccumulatingTransform::prepare() { /// Check can output. diff --git a/src/Processors/IAccumulatingTransform.h b/src/Processors/IAccumulatingTransform.h index b51753199c3..67063da4e11 100644 --- a/src/Processors/IAccumulatingTransform.h +++ b/src/Processors/IAccumulatingTransform.h @@ -36,10 +36,6 @@ public: Status prepare() override; void work() override; - /// Adds additional port for totals. - /// If added, totals will have been ready by the first generate() call (in totals chunk). - InputPort * addTotalsPort(); - InputPort & getInputPort() { return input; } OutputPort & getOutputPort() { return output; } }; diff --git a/src/Processors/Merges/Algorithms/Graphite.cpp b/src/Processors/Merges/Algorithms/Graphite.cpp index 418bf5e3f13..a3ff7a4ef87 100644 --- a/src/Processors/Merges/Algorithms/Graphite.cpp +++ b/src/Processors/Merges/Algorithms/Graphite.cpp @@ -163,12 +163,8 @@ static bool compareRetentions(const Retention & a, const Retention & b) { return false; } - String error_msg = "age and precision should only grow up: " - + std::to_string(a.age) + ":" + std::to_string(a.precision) + " vs " - + std::to_string(b.age) + ":" + std::to_string(b.precision); - throw Exception::createDeprecated( - error_msg, - DB::ErrorCodes::BAD_ARGUMENTS); + throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Age and precision should only grow up: {}:{} vs {}:{}", + a.age, a.precision, b.age, b.precision); } bool operator==(const Retention & a, const Retention & b) diff --git a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp index c54d32c1385..c5b5dad5aa5 100644 --- a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp +++ b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp @@ -141,7 +141,7 @@ void CreateSetAndFilterOnTheFlyStep::transformPipeline(QueryPipelineBuilder & pi /// Add balancing transform auto idx = position == JoinTableSide::Left ? PingPongProcessor::First : PingPongProcessor::Second; auto stream_balancer = std::make_shared(input_header, num_ports, max_rows_in_set, idx); - stream_balancer->setDescription(getStepDescription()); + stream_balancer->setDescription("Reads rows from two streams evenly"); /// Regular inputs just bypass data for respective ports connectAllInputs(ports, stream_balancer->getInputs(), num_ports); @@ -163,7 +163,7 @@ void CreateSetAndFilterOnTheFlyStep::transformPipeline(QueryPipelineBuilder & pi { auto & port = *output_it++; auto transform = std::make_shared(port.getHeader(), column_names, filtering_set); - transform->setDescription(this->getStepDescription()); + transform->setDescription("Filter rows using other join table side's set"); connect(port, transform->getInputPort()); result_transforms.emplace_back(std::move(transform)); } diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp index b251eec2d28..2ff50ca4fe3 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp @@ -75,7 +75,7 @@ std::unique_ptr createLocalPlan( new_context->setClientInterface(ClientInfo::Interface::LOCAL); new_context->setQueryKind(ClientInfo::QueryKind::SECONDARY_QUERY); new_context->setReplicaInfo(true, replica_count, replica_num); - new_context->setConnectionClientVersion(DBMS_VERSION_MAJOR, DBMS_VERSION_MINOR, DBMS_VERSION_PATCH, DBMS_TCP_PROTOCOL_VERSION); + new_context->setConnectionClientVersion(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH, DBMS_TCP_PROTOCOL_VERSION); new_context->setParallelReplicasGroupUUID(group_uuid); new_context->setMergeTreeAllRangesCallback([coordinator](InitialAllRangesAnnouncement announcement) { diff --git a/src/Processors/QueryPlan/JoinStep.cpp b/src/Processors/QueryPlan/JoinStep.cpp index 33fa7955e0d..63a5eeb51d2 100644 --- a/src/Processors/QueryPlan/JoinStep.cpp +++ b/src/Processors/QueryPlan/JoinStep.cpp @@ -2,6 +2,9 @@ #include #include #include +#include +#include +#include #include namespace DB @@ -62,6 +65,36 @@ void JoinStep::describePipeline(FormatSettings & settings) const IQueryPlanStep::describePipeline(processors, settings); } +void JoinStep::describeActions(FormatSettings & settings) const +{ + String prefix(settings.offset, ' '); + + const auto & table_join = join->getTableJoin(); + settings.out << prefix << "Type: " << toString(table_join.kind()) << '\n'; + settings.out << prefix << "Strictness: " << toString(table_join.strictness()) << '\n'; + settings.out << prefix << "Algorithm: " << join->getName() << '\n'; + + if (table_join.strictness() == JoinStrictness::Asof) + settings.out << prefix << "ASOF inequality: " << toString(table_join.getAsofInequality()) << '\n'; + + if (!table_join.getClauses().empty()) + settings.out << prefix << "Clauses: " << table_join.formatClauses(table_join.getClauses(), true /*short_format*/) << '\n'; +} + +void JoinStep::describeActions(JSONBuilder::JSONMap & map) const +{ + const auto & table_join = join->getTableJoin(); + map.add("Type", toString(table_join.kind())); + map.add("Strictness", toString(table_join.strictness())); + map.add("Algorithm", join->getName()); + + if (table_join.strictness() == JoinStrictness::Asof) + map.add("ASOF inequality", toString(table_join.getAsofInequality())); + + if (!table_join.getClauses().empty()) + map.add("Clauses", table_join.formatClauses(table_join.getClauses(), true /*short_format*/)); +} + void JoinStep::updateInputStream(const DataStream & new_input_stream_, size_t idx) { if (idx == 0) diff --git a/src/Processors/QueryPlan/JoinStep.h b/src/Processors/QueryPlan/JoinStep.h index e7185f36588..369ee9bec8b 100644 --- a/src/Processors/QueryPlan/JoinStep.h +++ b/src/Processors/QueryPlan/JoinStep.h @@ -27,6 +27,9 @@ public: void describePipeline(FormatSettings & settings) const override; + void describeActions(JSONBuilder::JSONMap & map) const override; + void describeActions(FormatSettings & settings) const override; + const JoinPtr & getJoin() const { return join; } bool allowPushDownToRight() const; diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h index 6ecec1359c5..2230e50425c 100644 --- a/src/Processors/QueryPlan/Optimizations/Optimizations.h +++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h @@ -16,7 +16,7 @@ void optimizeTreeFirstPass(const QueryPlanOptimizationSettings & settings, Query void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_settings, QueryPlan::Node & root, QueryPlan::Nodes & nodes); /// Third pass is used to apply filters such as key conditions and skip indexes to the storages that support them. /// After that it add CreateSetsStep for the subqueries that has not be used in the filters. -void optimizeTreeThirdPass(QueryPlan::Node & root, QueryPlan::Nodes & nodes); +void optimizeTreeThirdPass(QueryPlan & plan, QueryPlan::Node & root, QueryPlan::Nodes & nodes); /// Optimization (first pass) is a function applied to QueryPlan::Node. /// It can read and update subtree of specified node. @@ -113,7 +113,7 @@ void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes); void optimizeAggregationInOrder(QueryPlan::Node & node, QueryPlan::Nodes &); bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & nodes, bool allow_implicit_projections); bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes); -bool addPlansForSets(QueryPlan::Node & node, QueryPlan::Nodes & nodes); +bool addPlansForSets(QueryPlan & plan, QueryPlan::Node & node, QueryPlan::Nodes & nodes); /// Enable memory bound merging of aggregation states for remote queries /// in case it was enabled for local plan diff --git a/src/Processors/QueryPlan/Optimizations/addPlansForSets.cpp b/src/Processors/QueryPlan/Optimizations/addPlansForSets.cpp index e9100ae9d02..47df05301c9 100644 --- a/src/Processors/QueryPlan/Optimizations/addPlansForSets.cpp +++ b/src/Processors/QueryPlan/Optimizations/addPlansForSets.cpp @@ -6,7 +6,7 @@ namespace DB::QueryPlanOptimizations { -bool addPlansForSets(QueryPlan::Node & node, QueryPlan::Nodes & nodes) +bool addPlansForSets(QueryPlan & root_plan, QueryPlan::Node & node, QueryPlan::Nodes & nodes) { auto * delayed = typeid_cast(node.step.get()); if (!delayed) @@ -23,7 +23,9 @@ bool addPlansForSets(QueryPlan::Node & node, QueryPlan::Nodes & nodes) { input_streams.push_back(plan->getCurrentDataStream()); node.children.push_back(plan->getRootNode()); - nodes.splice(nodes.end(), QueryPlan::detachNodes(std::move(*plan))); + auto [add_nodes, add_resources] = QueryPlan::detachNodesAndResources(std::move(*plan)); + nodes.splice(nodes.end(), std::move(add_nodes)); + root_plan.addResources(std::move(add_resources)); } auto creating_sets = std::make_unique(std::move(input_streams)); diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp index b13dda9a8f0..0caedff67a5 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -181,7 +181,7 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s "No projection is used when optimize_use_projections = 1 and force_optimize_projection = 1"); } -void optimizeTreeThirdPass(QueryPlan::Node & root, QueryPlan::Nodes & nodes) +void optimizeTreeThirdPass(QueryPlan & plan, QueryPlan::Node & root, QueryPlan::Nodes & nodes) { Stack stack; stack.push_back({.node = &root}); @@ -205,7 +205,7 @@ void optimizeTreeThirdPass(QueryPlan::Node & root, QueryPlan::Nodes & nodes) source_step_with_filter->applyFilters(); } - addPlansForSets(*frame.node, nodes); + addPlansForSets(plan, *frame.node, nodes); stack.pop_back(); } diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index 533fbde1e13..61c6422de5a 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -254,6 +254,32 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +static void reorderColumns(ActionsDAG & dag, const Block & header, const std::string & filter_column) +{ + std::unordered_map inputs_map; + for (const auto * input : dag.getInputs()) + inputs_map[input->result_name] = input; + + for (const auto & col : header) + { + auto & input = inputs_map[col.name]; + if (!input) + input = &dag.addInput(col); + } + + ActionsDAG::NodeRawConstPtrs new_outputs; + new_outputs.reserve(header.columns() + 1); + + new_outputs.push_back(&dag.findInOutputs(filter_column)); + for (const auto & col : header) + { + auto & input = inputs_map[col.name]; + new_outputs.push_back(input); + } + + dag.getOutputs() = std::move(new_outputs); +} + Pipes buildPipesForReadingByPKRanges( const KeyDescription & primary_key, ExpressionActionsPtr sorting_expr, @@ -279,6 +305,7 @@ Pipes buildPipesForReadingByPKRanges( continue; auto syntax_result = TreeRewriter(context).analyze(filter_function, primary_key.expression->getRequiredColumnsWithTypes()); auto actions = ExpressionAnalyzer(filter_function, syntax_result, context).getActionsDAG(false); + reorderColumns(*actions, pipes[i].getHeader(), filter_function->getColumnName()); ExpressionActionsPtr expression_actions = std::make_shared(std::move(actions)); auto description = fmt::format( "filter values in [{}, {})", i ? ::toString(borders[i - 1]) : "-inf", i < borders.size() ? ::toString(borders[i]) : "+inf"); diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 687260441ff..ceda9f97bab 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -482,7 +482,7 @@ void QueryPlan::optimize(const QueryPlanOptimizationSettings & optimization_sett QueryPlanOptimizations::optimizeTreeFirstPass(optimization_settings, *root, nodes); QueryPlanOptimizations::optimizeTreeSecondPass(optimization_settings, *root, nodes); - QueryPlanOptimizations::optimizeTreeThirdPass(*root, nodes); + QueryPlanOptimizations::optimizeTreeThirdPass(*this, *root, nodes); updateDataStreams(*root); } @@ -542,9 +542,9 @@ void QueryPlan::explainEstimate(MutableColumns & columns) } } -QueryPlan::Nodes QueryPlan::detachNodes(QueryPlan && plan) +std::pair QueryPlan::detachNodesAndResources(QueryPlan && plan) { - return std::move(plan.nodes); + return {std::move(plan.nodes), std::move(plan.resources)}; } } diff --git a/src/Processors/QueryPlan/QueryPlan.h b/src/Processors/QueryPlan/QueryPlan.h index d89bdc534be..f4a6c9097f2 100644 --- a/src/Processors/QueryPlan/QueryPlan.h +++ b/src/Processors/QueryPlan/QueryPlan.h @@ -108,7 +108,7 @@ public: using Nodes = std::list; Node * getRootNode() const { return root; } - static Nodes detachNodes(QueryPlan && plan); + static std::pair detachNodesAndResources(QueryPlan && plan); private: QueryPlanResourceHolder resources; diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 7a99c363232..1e2f5adf2b3 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -103,7 +103,8 @@ ReadFromRemote::ReadFromRemote( Tables external_tables_, Poco::Logger * log_, UInt32 shard_count_, - std::shared_ptr storage_limits_) + std::shared_ptr storage_limits_, + const String & cluster_name_) : ISourceStep(DataStream{.header = std::move(header_)}) , shards(std::move(shards_)) , stage(stage_) @@ -116,6 +117,7 @@ ReadFromRemote::ReadFromRemote( , storage_limits(std::move(storage_limits_)) , log(log_) , shard_count(shard_count_) + , cluster_name(cluster_name_) { } @@ -234,13 +236,37 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact scalars["_shard_num"] = Block{{DataTypeUInt32().createColumnConst(1, shard.shard_info.shard_num), std::make_shared(), "_shard_num"}}; + if (context->getParallelReplicasMode() == Context::ParallelReplicasMode::READ_TASKS) + { + if (context->getSettingsRef().cluster_for_parallel_replicas.changed) + { + const String cluster_for_parallel_replicas = context->getSettingsRef().cluster_for_parallel_replicas; + if (cluster_for_parallel_replicas != cluster_name) + LOG_INFO(log, "cluster_for_parallel_replicas has been set for the query but has no effect: {}. Distributed table cluster is used: {}", + cluster_for_parallel_replicas, cluster_name); + } + context->setSetting("cluster_for_parallel_replicas", cluster_name); + } + std::shared_ptr remote_query_executor; remote_query_executor = std::make_shared( shard.shard_info.pool, query_string, output_stream->header, context, throttler, scalars, external_tables, stage); remote_query_executor->setLogger(log); - remote_query_executor->setPoolMode(PoolMode::GET_MANY); + + if (context->getParallelReplicasMode() == Context::ParallelReplicasMode::READ_TASKS) + { + // when doing parallel reading from replicas (ParallelReplicasMode::READ_TASKS) on a shard: + // establish a connection to a replica on the shard, the replica will instantiate coordinator to manage parallel reading from replicas on the shard. + // The coordinator will return query result from the shard. + // Only one coordinator per shard is necessary. Therefore using PoolMode::GET_ONE to establish only one connection per shard. + // Using PoolMode::GET_MANY for this mode will(can) lead to instantiation of several coordinators (depends on max_parallel_replicas setting) + // each will execute parallel reading from replicas, so the query result will be multiplied by the number of created coordinators + remote_query_executor->setPoolMode(PoolMode::GET_ONE); + } + else + remote_query_executor->setPoolMode(PoolMode::GET_MANY); if (!table_func_ptr) remote_query_executor->setMainTable(shard.main_table ? shard.main_table : main_table); diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h index ac869cd89f9..27e640970ce 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.h +++ b/src/Processors/QueryPlan/ReadFromRemote.h @@ -35,7 +35,8 @@ public: Tables external_tables_, Poco::Logger * log_, UInt32 shard_count_, - std::shared_ptr storage_limits_); + std::shared_ptr storage_limits_, + const String & cluster_name_); String getName() const override { return "ReadFromRemote"; } @@ -55,8 +56,9 @@ private: Tables external_tables; std::shared_ptr storage_limits; Poco::Logger * log; - UInt32 shard_count; + String cluster_name; + void addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStreamFactory::Shard & shard); void addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFactory::Shard & shard); }; diff --git a/src/Processors/ResizeProcessor.cpp b/src/Processors/ResizeProcessor.cpp index 8167fae9baf..57b878f7d39 100644 --- a/src/Processors/ResizeProcessor.cpp +++ b/src/Processors/ResizeProcessor.cpp @@ -1,5 +1,4 @@ #include -#include namespace DB { @@ -138,11 +137,11 @@ ResizeProcessor::Status ResizeProcessor::prepare() while (!is_end_input() && !is_end_output()) { auto output = get_next_out(); - auto input = get_next_input(); if (output == outputs.end()) return get_status_if_no_outputs(); + auto input = get_next_input(); if (input == inputs.end()) return get_status_if_no_inputs(); @@ -164,10 +163,7 @@ IProcessor::Status ResizeProcessor::prepare(const PortNumbers & updated_inputs, initialized = true; for (auto & input : inputs) - { - input.setNeeded(); input_ports.push_back({.port = &input, .status = InputStatus::NotActive}); - } for (auto & output : outputs) output_ports.push_back({.port = &output, .status = OutputStatus::NotActive}); @@ -197,6 +193,13 @@ IProcessor::Status ResizeProcessor::prepare(const PortNumbers & updated_inputs, } } + if (!is_reading_started && !waiting_outputs.empty()) + { + for (auto & input : inputs) + input.setNeeded(); + is_reading_started = true; + } + if (num_finished_outputs == outputs.size()) { for (auto & input : inputs) diff --git a/src/Processors/ResizeProcessor.h b/src/Processors/ResizeProcessor.h index 766c39172a2..61e35c54364 100644 --- a/src/Processors/ResizeProcessor.h +++ b/src/Processors/ResizeProcessor.h @@ -43,6 +43,7 @@ private: std::queue waiting_outputs; std::queue inputs_with_data; bool initialized = false; + bool is_reading_started = false; enum class OutputStatus { diff --git a/src/Processors/Sources/DelayedSource.cpp b/src/Processors/Sources/DelayedSource.cpp index ee7fd757949..a18ac802648 100644 --- a/src/Processors/Sources/DelayedSource.cpp +++ b/src/Processors/Sources/DelayedSource.cpp @@ -148,7 +148,9 @@ Processors DelayedSource::expandPipeline() inputs.emplace_back(outputs.front().getHeader(), this); /// Connect checks that header is same for ports. connect(*output, inputs.back()); - inputs.back().setNeeded(); + + if (output == main_output) + inputs.back().setNeeded(); } /// Executor will check that all processors are connected. diff --git a/src/Processors/Sources/ShellCommandSource.cpp b/src/Processors/Sources/ShellCommandSource.cpp index ace175f251c..2625a7cdabb 100644 --- a/src/Processors/Sources/ShellCommandSource.cpp +++ b/src/Processors/Sources/ShellCommandSource.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -12,6 +13,7 @@ #include #include #include +#include namespace DB @@ -21,10 +23,10 @@ namespace ErrorCodes { extern const int UNSUPPORTED_METHOD; extern const int TIMEOUT_EXCEEDED; - extern const int CANNOT_FCNTL; extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR; - extern const int CANNOT_POLL; extern const int CANNOT_WRITE_TO_FILE_DESCRIPTOR; + extern const int CANNOT_FCNTL; + extern const int CANNOT_POLL; } static bool tryMakeFdNonBlocking(int fd) @@ -64,19 +66,14 @@ static void makeFdBlocking(int fd) throwFromErrno("Cannot set blocking mode of pipe", ErrorCodes::CANNOT_FCNTL); } -static bool pollFd(int fd, size_t timeout_milliseconds, int events) +static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_milliseconds) { - pollfd pfd; - pfd.fd = fd; - pfd.events = events; - pfd.revents = 0; - int res; while (true) { Stopwatch watch; - res = poll(&pfd, 1, static_cast(timeout_milliseconds)); + res = poll(pfds, static_cast(num), static_cast(timeout_milliseconds)); if (res < 0) { @@ -94,17 +91,44 @@ static bool pollFd(int fd, size_t timeout_milliseconds, int events) } } - return res > 0; + return res; +} + +static bool pollFd(int fd, size_t timeout_milliseconds, int events) +{ + pollfd pfd; + pfd.fd = fd; + pfd.events = events; + pfd.revents = 0; + + return pollWithTimeout(&pfd, 1, timeout_milliseconds) > 0; } class TimeoutReadBufferFromFileDescriptor : public BufferWithOwnMemory { public: - explicit TimeoutReadBufferFromFileDescriptor(int fd_, size_t timeout_milliseconds_) - : fd(fd_) + explicit TimeoutReadBufferFromFileDescriptor( + int stdout_fd_, + int stderr_fd_, + size_t timeout_milliseconds_, + ExternalCommandStderrReaction stderr_reaction_) + : stdout_fd(stdout_fd_) + , stderr_fd(stderr_fd_) , timeout_milliseconds(timeout_milliseconds_) + , stderr_reaction(stderr_reaction_) { - makeFdNonBlocking(fd); + makeFdNonBlocking(stdout_fd); + makeFdNonBlocking(stderr_fd); + + pfds[0].fd = stdout_fd; + pfds[0].events = POLLIN; + pfds[1].fd = stderr_fd; + pfds[1].events = POLLIN; + + if (stderr_reaction == ExternalCommandStderrReaction::NONE) + num_pfds = 1; + else + num_pfds = 2; } bool nextImpl() override @@ -113,19 +137,54 @@ public: while (!bytes_read) { - if (!pollFd(fd, timeout_milliseconds, POLLIN)) + pfds[0].revents = 0; + pfds[1].revents = 0; + size_t num_events = pollWithTimeout(pfds, num_pfds, timeout_milliseconds); + if (0 == num_events) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Pipe read timeout exceeded {} milliseconds", timeout_milliseconds); - ssize_t res = ::read(fd, internal_buffer.begin(), internal_buffer.size()); + bool has_stdout = pfds[0].revents > 0; + bool has_stderr = pfds[1].revents > 0; - if (-1 == res && errno != EINTR) - throwFromErrno("Cannot read from pipe", ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR); + if (has_stderr) + { + if (stderr_read_buf == nullptr) + stderr_read_buf.reset(new char[BUFFER_SIZE]); + ssize_t res = ::read(stderr_fd, stderr_read_buf.get(), BUFFER_SIZE); + if (res > 0) + { + std::string_view str(stderr_read_buf.get(), res); + if (stderr_reaction == ExternalCommandStderrReaction::THROW) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Executable generates stderr: {}", str); + else if (stderr_reaction == ExternalCommandStderrReaction::LOG) + LOG_WARNING( + &::Poco::Logger::get("TimeoutReadBufferFromFileDescriptor"), "Executable generates stderr: {}", str); + else if (stderr_reaction == ExternalCommandStderrReaction::LOG_FIRST) + { + res = std::min(ssize_t(stderr_result_buf.reserve()), res); + if (res > 0) + stderr_result_buf.insert(stderr_result_buf.end(), str.begin(), str.begin() + res); + } + else if (stderr_reaction == ExternalCommandStderrReaction::LOG_LAST) + { + stderr_result_buf.insert(stderr_result_buf.end(), str.begin(), str.begin() + res); + } + } + } - if (res == 0) - break; + if (has_stdout) + { + ssize_t res = ::read(stdout_fd, internal_buffer.begin(), internal_buffer.size()); - if (res > 0) - bytes_read += res; + if (-1 == res && errno != EINTR) + throwFromErrno("Cannot read from pipe", ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR); + + if (res == 0) + break; + + if (res > 0) + bytes_read += res; + } } if (bytes_read > 0) @@ -143,25 +202,46 @@ public: void reset() const { - makeFdBlocking(fd); + makeFdBlocking(stdout_fd); + makeFdBlocking(stderr_fd); } ~TimeoutReadBufferFromFileDescriptor() override { - tryMakeFdBlocking(fd); + tryMakeFdBlocking(stdout_fd); + tryMakeFdBlocking(stderr_fd); + + if (!stderr_result_buf.empty()) + { + String stderr_result; + stderr_result.reserve(stderr_result_buf.size()); + stderr_result.append(stderr_result_buf.begin(), stderr_result_buf.end()); + LOG_WARNING( + &::Poco::Logger::get("ShellCommandSource"), + "Executable generates stderr at the {}: {}", + stderr_reaction == ExternalCommandStderrReaction::LOG_FIRST ? "beginning" : "end", + stderr_result); + } } private: - int fd; + int stdout_fd; + int stderr_fd; size_t timeout_milliseconds; + ExternalCommandStderrReaction stderr_reaction; + + static constexpr size_t BUFFER_SIZE = 4_KiB; + pollfd pfds[2]; + size_t num_pfds; + std::unique_ptr stderr_read_buf; + boost::circular_buffer_space_optimized stderr_result_buf{BUFFER_SIZE}; }; class TimeoutWriteBufferFromFileDescriptor : public BufferWithOwnMemory { public: explicit TimeoutWriteBufferFromFileDescriptor(int fd_, size_t timeout_milliseconds_) - : fd(fd_) - , timeout_milliseconds(timeout_milliseconds_) + : fd(fd_), timeout_milliseconds(timeout_milliseconds_) { makeFdNonBlocking(fd); } @@ -248,6 +328,8 @@ namespace ContextPtr context_, const std::string & format_, size_t command_read_timeout_milliseconds, + ExternalCommandStderrReaction stderr_reaction, + bool check_exit_code_, const Block & sample_block_, std::unique_ptr && command_, std::vector && send_data_tasks = {}, @@ -260,13 +342,14 @@ namespace , sample_block(sample_block_) , command(std::move(command_)) , configuration(configuration_) - , timeout_command_out(command->out.getFD(), command_read_timeout_milliseconds) + , timeout_command_out(command->out.getFD(), command->err.getFD(), command_read_timeout_milliseconds, stderr_reaction) , command_holder(std::move(command_holder_)) , process_pool(process_pool_) + , check_exit_code(check_exit_code_) { for (auto && send_data_task : send_data_tasks) { - send_data_threads.emplace_back([task = std::move(send_data_task), this]() + send_data_threads.emplace_back([task = std::move(send_data_task), this]() mutable { try { @@ -276,6 +359,10 @@ namespace { std::lock_guard lock(send_data_lock); exception_during_send_data = std::current_exception(); + + /// task should be reset inside catch block or else it breaks d'tor + /// invariants such as in ~WriteBuffer. + task = {}; } }); } @@ -352,11 +439,7 @@ namespace } if (!executor->pull(chunk)) - { - if (configuration.check_exit_code) - command->wait(); return {}; - } current_read_rows += chunk.getNumRows(); } @@ -379,6 +462,21 @@ namespace if (thread.joinable()) thread.join(); + if (check_exit_code) + { + if (process_pool) + { + bool valid_command + = configuration.read_fixed_number_of_rows && current_read_rows >= configuration.number_of_rows_to_read; + + // We can only wait for pooled commands when they are invalid. + if (!valid_command) + command->wait(); + } + else + command->wait(); + } + rethrowExceptionDuringSendDataIfNeeded(); } @@ -413,6 +511,8 @@ namespace ShellCommandHolderPtr command_holder; std::shared_ptr process_pool; + bool check_exit_code = false; + QueryPipeline pipeline; std::unique_ptr executor; @@ -534,7 +634,8 @@ Pipe ShellCommandSourceCoordinator::createPipe( } int write_buffer_fd = write_buffer->getFD(); - auto timeout_write_buffer = std::make_shared(write_buffer_fd, configuration.command_write_timeout_milliseconds); + auto timeout_write_buffer + = std::make_shared(write_buffer_fd, configuration.command_write_timeout_milliseconds); input_pipes[i].resize(1); @@ -570,6 +671,8 @@ Pipe ShellCommandSourceCoordinator::createPipe( context, configuration.format, configuration.command_read_timeout_milliseconds, + configuration.stderr_reaction, + configuration.check_exit_code, std::move(sample_block), std::move(process), std::move(tasks), diff --git a/src/Processors/Sources/ShellCommandSource.h b/src/Processors/Sources/ShellCommandSource.h index 6dc6781cc4c..44bd725bbe2 100644 --- a/src/Processors/Sources/ShellCommandSource.h +++ b/src/Processors/Sources/ShellCommandSource.h @@ -4,6 +4,7 @@ #include +#include #include #include @@ -33,9 +34,6 @@ struct ShellCommandSourceConfiguration size_t number_of_rows_to_read = 0; /// Max block size size_t max_block_size = DEFAULT_BLOCK_SIZE; - /// Will throw if the command exited with - /// non-zero status code - size_t check_exit_code = false; }; class ShellCommandSourceCoordinator @@ -57,6 +55,15 @@ public: /// Timeout for writing data to command stdin size_t command_write_timeout_milliseconds = 10000; + /// Reaction when external command outputs data to its stderr. + ExternalCommandStderrReaction stderr_reaction = ExternalCommandStderrReaction::NONE; + + /// Will throw if the command exited with + /// non-zero status code. + /// NOTE: If executable pool is used, we cannot check exit code, + /// which makes this configuration no effect. + size_t check_exit_code = false; + /// Pool size valid only if executable_pool = true size_t pool_size = 16; diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index 3d5a910f054..4e9f7b7601a 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -159,14 +159,14 @@ void AggregatingInOrderTransform::consume(Chunk chunk) if (group_by_key) params->aggregator.mergeOnBlockSmall(variants, key_begin, key_end, aggregate_columns_data, key_columns_raw); else - params->aggregator.mergeOnIntervalWithoutKeyImpl(variants, key_begin, key_end, aggregate_columns_data); + params->aggregator.mergeOnIntervalWithoutKey(variants, key_begin, key_end, aggregate_columns_data); } else { if (group_by_key) params->aggregator.executeOnBlockSmall(variants, key_begin, key_end, key_columns_raw, aggregate_function_instructions.data()); else - params->aggregator.executeOnIntervalWithoutKeyImpl(variants, key_begin, key_end, aggregate_function_instructions.data()); + params->aggregator.executeOnIntervalWithoutKey(variants, key_begin, key_end, aggregate_function_instructions.data()); } } diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index 646256d60c0..3dfb9fe178f 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -1,4 +1,3 @@ -#include #include #include diff --git a/src/Processors/Transforms/CreateSetAndFilterOnTheFlyTransform.cpp b/src/Processors/Transforms/CreateSetAndFilterOnTheFlyTransform.cpp index 59c4b9a6a87..83a75318d61 100644 --- a/src/Processors/Transforms/CreateSetAndFilterOnTheFlyTransform.cpp +++ b/src/Processors/Transforms/CreateSetAndFilterOnTheFlyTransform.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -35,7 +36,11 @@ Columns getColumnsByIndices(const Chunk & chunk, const std::vector & ind Columns columns; const Columns & all_cols = chunk.getColumns(); for (const auto & index : indices) - columns.push_back(all_cols.at(index)); + { + auto col = recursiveRemoveSparse(all_cols.at(index)); + columns.push_back(std::move(col)); + } + return columns; } @@ -149,7 +154,7 @@ IProcessor::Status FilterBySetOnTheFlyTransform::prepare() LOG_DEBUG(log, "Finished {} by [{}]: consumed {} rows in total, {} rows bypassed, result {} rows, {:.2f}% filtered", Poco::toLower(getDescription()), fmt::join(column_names, ", "), stat.consumed_rows, stat.consumed_rows_before_set, stat.result_rows, - 100 - 100.0 * stat.result_rows / stat.consumed_rows); + stat.consumed_rows > 0 ? (100 - 100.0 * stat.result_rows / stat.consumed_rows) : 0); } else { diff --git a/src/Processors/Transforms/DistinctSortedChunkTransform.cpp b/src/Processors/Transforms/DistinctSortedChunkTransform.cpp index 28a3260d742..91806a90083 100644 --- a/src/Processors/Transforms/DistinctSortedChunkTransform.cpp +++ b/src/Processors/Transforms/DistinctSortedChunkTransform.cpp @@ -5,6 +5,7 @@ namespace DB namespace ErrorCodes { + extern const int LOGICAL_ERROR; extern const int SET_SIZE_LIMIT_EXCEEDED; } @@ -126,9 +127,20 @@ bool DistinctSortedChunkTransform::isKey(const size_t key_pos, const size_t row_ bool DistinctSortedChunkTransform::isLatestKeyFromPrevChunk(const size_t row_pos) const { - for (size_t i = 0; i < sorted_columns.size(); ++i) + for (size_t i = 0, s = sorted_columns.size(); i < s; ++i) { - const int res = prev_chunk_latest_key[i]->compareAt(0, row_pos, *sorted_columns[i], sorted_columns_descr[i].nulls_direction); + const auto & sorted_column = *sorted_columns[i]; + /// temporary hardening due to suspious crashes in sqlancer tests + if (unlikely(sorted_column.size() <= row_pos)) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Unexpected size of a sorted column: size {}, row_pos {}, column position {}, type {}", + sorted_column.size(), + row_pos, + i, + sorted_column.getFamilyName()); + + const int res = prev_chunk_latest_key[i]->compareAt(0, row_pos, sorted_column, sorted_columns_descr[i].nulls_direction); if (res != 0) return false; } diff --git a/src/Processors/Transforms/ExceptionKeepingTransform.cpp b/src/Processors/Transforms/ExceptionKeepingTransform.cpp index 3c40c078225..b50f66b0240 100644 --- a/src/Processors/Transforms/ExceptionKeepingTransform.cpp +++ b/src/Processors/Transforms/ExceptionKeepingTransform.cpp @@ -2,7 +2,6 @@ #include #include #include -#include namespace DB { diff --git a/src/Processors/Transforms/ExtractColumnsTransform.cpp b/src/Processors/Transforms/ExtractColumnsTransform.cpp new file mode 100644 index 00000000000..44bf5582290 --- /dev/null +++ b/src/Processors/Transforms/ExtractColumnsTransform.cpp @@ -0,0 +1,35 @@ +#include +#include + +namespace DB +{ + +ExtractColumnsTransform::ExtractColumnsTransform(const Block & header_, const NamesAndTypesList & requested_columns_) + : ISimpleTransform(header_, transformHeader(header_, requested_columns_), false), requested_columns(requested_columns_) +{ + +} + +Block ExtractColumnsTransform::transformHeader(Block header, const NamesAndTypesList & requested_columns_) +{ + ColumnsWithTypeAndName columns; + columns.reserve(requested_columns_.size()); + for (const auto & required_column : requested_columns_) + columns.emplace_back(getColumnFromBlock(header, required_column), required_column.type, required_column.name); + + return Block(std::move(columns)); +} + +void ExtractColumnsTransform::transform(Chunk & chunk) +{ + size_t num_rows = chunk.getNumRows(); + auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); + Columns columns; + columns.reserve(requested_columns.size()); + for (const auto & required_column : requested_columns) + columns.emplace_back(getColumnFromBlock(block, required_column)); + + chunk.setColumns(std::move(columns), num_rows); +} + +} diff --git a/src/Processors/Transforms/ExtractColumnsTransform.h b/src/Processors/Transforms/ExtractColumnsTransform.h new file mode 100644 index 00000000000..f8b3d803736 --- /dev/null +++ b/src/Processors/Transforms/ExtractColumnsTransform.h @@ -0,0 +1,26 @@ +#pragma once +#include + +namespace DB +{ + +/// Extracts required columns and subcolumns from the block. +class ExtractColumnsTransform final : public ISimpleTransform +{ +public: + ExtractColumnsTransform( + const Block & header_, + const NamesAndTypesList & requested_columns_); + + String getName() const override { return "ExtractColumnsTransform"; } + + static Block transformHeader(Block header, const NamesAndTypesList & requested_columns_); + +protected: + void transform(Chunk & chunk) override; + +private: + const NamesAndTypesList requested_columns; +}; + +} diff --git a/src/Processors/Transforms/FilterSortedStreamByRange.h b/src/Processors/Transforms/FilterSortedStreamByRange.h index e1141ebd299..e3d3f6f10ef 100644 --- a/src/Processors/Transforms/FilterSortedStreamByRange.h +++ b/src/Processors/Transforms/FilterSortedStreamByRange.h @@ -27,6 +27,9 @@ public: true) , filter_transform(header_, expression_, filter_column_name_, remove_filter_column_, on_totals_) { + assertBlocksHaveEqualStructure( + header_, getOutputPort().getHeader(), + "Expression for FilterSortedStreamByRange should not change header"); } String getName() const override { return "FilterSortedStreamByRange"; } diff --git a/src/Processors/Transforms/LimitByTransform.cpp b/src/Processors/Transforms/LimitByTransform.cpp index cb2804007bd..5e6d7dc816a 100644 --- a/src/Processors/Transforms/LimitByTransform.cpp +++ b/src/Processors/Transforms/LimitByTransform.cpp @@ -33,14 +33,11 @@ void LimitByTransform::transform(Chunk & chunk) for (UInt64 row = 0; row < num_rows; ++row) { - UInt128 key{}; SipHash hash; - for (auto position : key_positions) columns[position]->updateHashWithValue(row, hash); - hash.get128(key); - + const auto key = hash.get128(); auto count = keys_counts[key]++; if (count >= group_offset && (group_length > std::numeric_limits::max() - group_offset || count < group_length + group_offset)) diff --git a/src/Processors/Transforms/PartialSortingTransform.cpp b/src/Processors/Transforms/PartialSortingTransform.cpp index 33c2d870b76..3fc9a4e71db 100644 --- a/src/Processors/Transforms/PartialSortingTransform.cpp +++ b/src/Processors/Transforms/PartialSortingTransform.cpp @@ -159,7 +159,7 @@ void PartialSortingTransform::transform(Chunk & chunk) { MutableColumnPtr sort_description_threshold_column_updated = raw_block_columns[i]->cloneEmpty(); sort_description_threshold_column_updated->insertFrom(*raw_block_columns[i], min_row_to_compare); - sort_description_threshold_columns_updated[i] = std::move(sort_description_threshold_column_updated); + sort_description_threshold_columns_updated[i] = sort_description_threshold_column_updated->convertToFullColumnIfSparse(); } sort_description_threshold_columns = std::move(sort_description_threshold_columns_updated); diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index e89aec31655..7de9538e435 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -1,5 +1,4 @@ #include -#include namespace DB { diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index a785d52bf65..be76971ddcd 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -148,7 +148,7 @@ static int compareValuesWithOffsetFloat(const IColumn * _compared_column, const auto * reference_column = assert_cast( _reference_column); const auto offset = _offset.get(); - assert(offset >= 0); + chassert(offset >= 0); const auto compared_value_data = compared_column->getDataAt(compared_row); assert(compared_value_data.size == sizeof(typename ColumnType::ValueType)); diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 1b20778877d..f70ebcf27b8 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -281,7 +281,7 @@ Chain buildPushingToViewsChain( /// and switch back to the original thread_status. auto * original_thread = current_thread; SCOPE_EXIT({ current_thread = original_thread; }); - + current_thread = nullptr; std::unique_ptr view_thread_status_ptr = std::make_unique(/*check_current_thread_on_destruction=*/ false); /// Copy of a ThreadStatus should be internal. view_thread_status_ptr->setInternalThread(); diff --git a/src/Processors/examples/CMakeLists.txt b/src/Processors/examples/CMakeLists.txt index 5d43a0d7d08..0c8734aee3c 100644 --- a/src/Processors/examples/CMakeLists.txt +++ b/src/Processors/examples/CMakeLists.txt @@ -2,3 +2,9 @@ if (TARGET ch_contrib::hivemetastore) clickhouse_add_executable (comma_separated_streams comma_separated_streams.cpp) target_link_libraries (comma_separated_streams PRIVATE dbms) endif() + +if (USE_ORC) + clickhouse_add_executable (native_orc native_orc.cpp) + target_link_libraries (native_orc PRIVATE dbms) + target_include_directories (native_orc PRIVATE ${ClickHouse_SOURCE_DIR}/contrib/orc/c++/include) +endif () diff --git a/src/Processors/examples/native_orc.cpp b/src/Processors/examples/native_orc.cpp new file mode 100644 index 00000000000..201e87b1f56 --- /dev/null +++ b/src/Processors/examples/native_orc.cpp @@ -0,0 +1,36 @@ +#include +#include +#include +#include + +using namespace DB; + +int main() +{ + /// Read schema from orc file + String path = "/path/to/orc/file"; + // String path = "/data1/clickhouse_official/data/user_files/bigolive_audience_stats_orc.orc"; + { + ReadBufferFromFile in(path); + NativeORCSchemaReader schema_reader(in, {}); + auto schema = schema_reader.readSchema(); + std::cout << "schema:" << schema.toString() << std::endl; + } + + /// Read schema from string with orc data + { + ReadBufferFromFile in(path); + + String content; + WriteBufferFromString out(content); + + copyData(in, out); + + content.resize(out.count()); + ReadBufferFromString in2(content); + NativeORCSchemaReader schema_reader(in2, {}); + auto schema = schema_reader.readSchema(); + std::cout << "schema:" << schema.toString() << std::endl; + } + return 0; +} diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index 553b18dd57b..39d51beaa9d 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -579,6 +579,7 @@ void QueryPipelineBuilder::addCreatingSetsTransform( const SizeLimits & limits, PreparedSetsCachePtr prepared_sets_cache) { + dropTotalsAndExtremes(); resize(1); auto transform = std::make_shared( @@ -589,12 +590,7 @@ void QueryPipelineBuilder::addCreatingSetsTransform( limits, std::move(prepared_sets_cache)); - InputPort * totals_port = nullptr; - - if (pipe.getTotalsPort()) - totals_port = transform->addTotalsPort(); - - pipe.addTransform(std::move(transform), totals_port, nullptr); + pipe.addTransform(std::move(transform)); } void QueryPipelineBuilder::addPipelineBefore(QueryPipelineBuilder pipeline) diff --git a/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp b/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp index 06d4a8cef87..5e211bf036d 100644 --- a/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp +++ b/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp @@ -76,7 +76,7 @@ bool RemoteQueryExecutorReadContext::checkTimeout(bool blocking) epoll_event events[3]; events[0].data.fd = events[1].data.fd = events[2].data.fd = -1; - size_t num_events = epoll.getManyReady(3, events, blocking); + size_t num_events = epoll.getManyReady(3, events, blocking ? -1 : 0); bool is_socket_ready = false; diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 67d30012b0e..3370a8c009b 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -833,7 +833,7 @@ namespace { settings_changes.push_back({key, value}); } - query_context->checkSettingsConstraints(settings_changes); + query_context->checkSettingsConstraints(settings_changes, SettingSource::QUERY); query_context->applySettingsChanges(settings_changes); query_context->setCurrentQueryId(query_info.query_id()); @@ -1118,7 +1118,7 @@ namespace SettingsChanges settings_changes; for (const auto & [key, value] : external_table.settings()) settings_changes.push_back({key, value}); - external_table_context->checkSettingsConstraints(settings_changes); + external_table_context->checkSettingsConstraints(settings_changes, SettingSource::QUERY); external_table_context->applySettingsChanges(settings_changes); } auto in = external_table_context->getInputFormat( diff --git a/src/Server/HTTP/HTTPServerResponse.h b/src/Server/HTTP/HTTPServerResponse.h index f5b7a70dc79..236a56e2323 100644 --- a/src/Server/HTTP/HTTPServerResponse.h +++ b/src/Server/HTTP/HTTPServerResponse.h @@ -5,9 +5,9 @@ #include #include -#include #include + namespace DB { diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index a0bfcd49dfd..10d5e7a0242 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -561,8 +561,7 @@ void HTTPHandler::processQuery( session->makeSessionContext(); } - auto client_info = session->getClientInfo(); - auto context = session->makeQueryContext(std::move(client_info)); + auto context = session->makeQueryContext(); /// This parameter is used to tune the behavior of output formats (such as Native) for compatibility. if (params.has("client_protocol_version")) @@ -764,7 +763,7 @@ void HTTPHandler::processQuery( context->setDefaultFormat(default_format); /// For external data we also want settings - context->checkSettingsConstraints(settings_changes); + context->checkSettingsConstraints(settings_changes, SettingSource::QUERY); context->applySettingsChanges(settings_changes); /// Set the query id supplied by the user, if any, and also update the OpenTelemetry fields. diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp index 78e374ee9e0..1c911034da1 100644 --- a/src/Server/HTTPHandlerFactory.cpp +++ b/src/Server/HTTPHandlerFactory.cpp @@ -132,21 +132,25 @@ void addCommonDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IS auto ping_handler = std::make_shared>(server, ping_response_expression); ping_handler->attachStrictPath("/ping"); ping_handler->allowGetAndHeadRequest(); + factory.addPathToHints("/ping"); factory.addHandler(ping_handler); auto replicas_status_handler = std::make_shared>(server); replicas_status_handler->attachNonStrictPath("/replicas_status"); replicas_status_handler->allowGetAndHeadRequest(); + factory.addPathToHints("/replicas_status"); factory.addHandler(replicas_status_handler); auto play_handler = std::make_shared>(server); play_handler->attachNonStrictPath("/play"); play_handler->allowGetAndHeadRequest(); + factory.addPathToHints("/play"); factory.addHandler(play_handler); auto dashboard_handler = std::make_shared>(server); dashboard_handler->attachNonStrictPath("/dashboard"); dashboard_handler->allowGetAndHeadRequest(); + factory.addPathToHints("/dashboard"); factory.addHandler(dashboard_handler); auto js_handler = std::make_shared>(server); diff --git a/src/Server/HTTPPathHints.cpp b/src/Server/HTTPPathHints.cpp new file mode 100644 index 00000000000..51ef3eabffe --- /dev/null +++ b/src/Server/HTTPPathHints.cpp @@ -0,0 +1,16 @@ +#include + +namespace DB +{ + +void HTTPPathHints::add(const String & http_path) +{ + http_paths.push_back(http_path); +} + +std::vector HTTPPathHints::getAllRegisteredNames() const +{ + return http_paths; +} + +} diff --git a/src/Server/HTTPPathHints.h b/src/Server/HTTPPathHints.h new file mode 100644 index 00000000000..708816ebf07 --- /dev/null +++ b/src/Server/HTTPPathHints.h @@ -0,0 +1,22 @@ +#pragma once + +#include + +#include + +namespace DB +{ + +class HTTPPathHints : public IHints<1, HTTPPathHints> +{ +public: + std::vector getAllRegisteredNames() const override; + void add(const String & http_path); + +private: + std::vector http_paths; +}; + +using HTTPPathHintsPtr = std::shared_ptr; + +} diff --git a/src/Server/HTTPRequestHandlerFactoryMain.cpp b/src/Server/HTTPRequestHandlerFactoryMain.cpp index 61a2909d30f..5481bcd5083 100644 --- a/src/Server/HTTPRequestHandlerFactoryMain.cpp +++ b/src/Server/HTTPRequestHandlerFactoryMain.cpp @@ -29,7 +29,7 @@ std::unique_ptr HTTPRequestHandlerFactoryMain::createRequest || request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD || request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST) { - return std::unique_ptr(new NotFoundHandler); + return std::unique_ptr(new NotFoundHandler(hints.getHints(request.getURI()))); } return nullptr; diff --git a/src/Server/HTTPRequestHandlerFactoryMain.h b/src/Server/HTTPRequestHandlerFactoryMain.h index b0e57bd6b3b..07b278d831c 100644 --- a/src/Server/HTTPRequestHandlerFactoryMain.h +++ b/src/Server/HTTPRequestHandlerFactoryMain.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -15,11 +16,14 @@ public: void addHandler(HTTPRequestHandlerFactoryPtr child_factory) { child_factories.emplace_back(child_factory); } + void addPathToHints(const std::string & http_path) { hints.add(http_path); } + std::unique_ptr createRequestHandler(const HTTPServerRequest & request) override; private: Poco::Logger * log; std::string name; + HTTPPathHints hints; std::vector child_factories; }; diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index e3edc281e83..58d227a5ae5 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -175,10 +175,13 @@ struct SocketInterruptablePollWrapper } while (rc < 0 && errno == POCO_EINTR); - if (rc >= 1 && poll_buf[0].revents & POLLIN) - socket_ready = true; - if (rc >= 2 && poll_buf[1].revents & POLLIN) - fd_ready = true; + if (rc >= 1) + { + if (poll_buf[0].revents & POLLIN) + socket_ready = true; + if (poll_buf[1].revents & POLLIN) + fd_ready = true; + } #endif } diff --git a/src/Server/NotFoundHandler.cpp b/src/Server/NotFoundHandler.cpp index 3181708b9b7..5b1db508551 100644 --- a/src/Server/NotFoundHandler.cpp +++ b/src/Server/NotFoundHandler.cpp @@ -10,7 +10,8 @@ void NotFoundHandler::handleRequest(HTTPServerRequest & request, HTTPServerRespo try { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_NOT_FOUND); - *response.send() << "There is no handle " << request.getURI() << "\n\n" + *response.send() << "There is no handle " << request.getURI() + << (!hints.empty() ? fmt::format(". Maybe you meant {}.", hints.front()) : "") << "\n\n" << "Use / or /ping for health checks.\n" << "Or /replicas_status for more sophisticated health checks.\n\n" << "Send queries from your program with POST method or GET /?query=...\n\n" diff --git a/src/Server/NotFoundHandler.h b/src/Server/NotFoundHandler.h index 749ac388c4d..1cbfcd57f8f 100644 --- a/src/Server/NotFoundHandler.h +++ b/src/Server/NotFoundHandler.h @@ -9,7 +9,10 @@ namespace DB class NotFoundHandler : public HTTPRequestHandler { public: + NotFoundHandler(std::vector hints_) : hints(std::move(hints_)) {} void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; +private: + std::vector hints; }; } diff --git a/src/Server/ServerType.cpp b/src/Server/ServerType.cpp index 4952cd1bd24..fb052e7d6e6 100644 --- a/src/Server/ServerType.cpp +++ b/src/Server/ServerType.cpp @@ -2,10 +2,10 @@ #include #include -#include #include + namespace DB { @@ -42,12 +42,9 @@ const char * ServerType::serverTypeToString(ServerType::Type type) bool ServerType::shouldStart(Type server_type, const std::string & server_custom_name) const { - if (type == Type::QUERIES_ALL) - return true; - - if (type == Type::QUERIES_DEFAULT) + auto is_type_default = [](Type current_type) { - switch (server_type) + switch (current_type) { case Type::TCP: case Type::TCP_WITH_PROXY: @@ -64,21 +61,37 @@ bool ServerType::shouldStart(Type server_type, const std::string & server_custom default: return false; } + }; + + if (exclude_types.contains(Type::QUERIES_ALL)) + return false; + + if (exclude_types.contains(Type::QUERIES_DEFAULT) && is_type_default(server_type)) + return false; + + if (exclude_types.contains(Type::QUERIES_CUSTOM) && server_type == Type::CUSTOM) + return false; + + if (exclude_types.contains(server_type)) + { + if (server_type != Type::CUSTOM) + return false; + + if (exclude_custom_names.contains(server_custom_name)) + return false; } + if (type == Type::QUERIES_ALL) + return true; + + if (type == Type::QUERIES_DEFAULT) + return is_type_default(server_type); + if (type == Type::QUERIES_CUSTOM) - { - switch (server_type) - { - case Type::CUSTOM: - return true; - default: - return false; - } - } + return server_type == Type::CUSTOM; if (type == Type::CUSTOM) - return server_type == type && server_custom_name == "protocols." + custom_name + ".port"; + return server_type == type && server_custom_name == custom_name; return server_type == type; } @@ -86,6 +99,7 @@ bool ServerType::shouldStart(Type server_type, const std::string & server_custom bool ServerType::shouldStop(const std::string & port_name) const { Type port_type; + std::string port_custom_name; if (port_name == "http_port") port_type = Type::HTTP; @@ -121,12 +135,19 @@ bool ServerType::shouldStop(const std::string & port_name) const port_type = Type::INTERSERVER_HTTPS; else if (port_name.starts_with("protocols.") && port_name.ends_with(".port")) + { port_type = Type::CUSTOM; + constexpr size_t protocols_size = std::string_view("protocols.").size(); + constexpr size_t ports_size = std::string_view(".ports").size(); + + port_custom_name = port_name.substr(protocols_size, port_name.size() - protocols_size - ports_size + 1); + } + else return false; - return shouldStart(port_type, port_name); + return shouldStart(port_type, port_custom_name); } } diff --git a/src/Server/ServerType.h b/src/Server/ServerType.h index 1fab492222a..e3544fe6a28 100644 --- a/src/Server/ServerType.h +++ b/src/Server/ServerType.h @@ -1,13 +1,14 @@ #pragma once #include +#include + namespace DB { class ServerType { public: - enum Type { TCP, @@ -28,8 +29,20 @@ public: END }; + using Types = std::unordered_set; + using CustomNames = std::unordered_set; + ServerType() = default; - explicit ServerType(Type type_, const std::string & custom_name_ = "") : type(type_), custom_name(custom_name_) {} + + explicit ServerType( + Type type_, + const std::string & custom_name_ = "", + const Types & exclude_types_ = {}, + const CustomNames exclude_custom_names_ = {}) + : type(type_), + custom_name(custom_name_), + exclude_types(exclude_types_), + exclude_custom_names(exclude_custom_names_) {} static const char * serverTypeToString(Type type); @@ -39,6 +52,9 @@ public: Type type; std::string custom_name; + + Types exclude_types; + CustomNames exclude_custom_names; }; } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index a747f06f1ce..ac3928b4abe 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -184,14 +184,17 @@ void TCPHandler::runImpl() try { receiveHello(); + + /// In interserver mode queries are executed without a session context. + if (!is_interserver_mode) + session->makeSessionContext(); + sendHello(); if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM) receiveAddendum(); - if (!is_interserver_mode) /// In interserver mode queries are executed without a session context. + if (!is_interserver_mode) { - session->makeSessionContext(); - /// If session created, then settings in session context has been updated. /// So it's better to update the connection settings for flexibility. extractConnectionSettingsFromContext(session->sessionContext()); @@ -260,6 +263,17 @@ void TCPHandler::runImpl() std::unique_ptr exception; bool network_error = false; bool query_duration_already_logged = false; + auto log_query_duration = [this, &query_duration_already_logged]() + { + if (query_duration_already_logged) + return; + query_duration_already_logged = true; + auto elapsed_sec = state.watch.elapsedSeconds(); + /// We already logged more detailed info if we read some rows + if (elapsed_sec < 1.0 && state.progress.read_rows) + return; + LOG_DEBUG(log, "Processed in {} sec.", elapsed_sec); + }; try { @@ -489,9 +503,7 @@ void TCPHandler::runImpl() /// Do it before sending end of stream, to have a chance to show log message in client. query_scope->logPeakMemoryUsage(); - - LOG_DEBUG(log, "Processed in {} sec.", state.watch.elapsedSeconds()); - query_duration_already_logged = true; + log_query_duration(); if (state.is_connection_closed) break; @@ -613,10 +625,7 @@ void TCPHandler::runImpl() LOG_WARNING(log, "Can't skip data packets after query failure."); } - if (!query_duration_already_logged) - { - LOG_DEBUG(log, "Processed in {} sec.", state.watch.elapsedSeconds()); - } + log_query_duration(); /// QueryState should be cleared before QueryScope, since otherwise /// the MemoryTracker will be wrong for possible deallocations. @@ -1181,7 +1190,6 @@ std::unique_ptr TCPHandler::makeSession() res->setClientName(client_name); res->setClientVersion(client_version_major, client_version_minor, client_version_patch, client_tcp_protocol_version); res->setConnectionClientVersion(client_version_major, client_version_minor, client_version_patch, client_tcp_protocol_version); - res->setQuotaClientKey(quota_key); res->setClientInterface(interface); return res; @@ -1274,11 +1282,10 @@ void TCPHandler::receiveHello() void TCPHandler::receiveAddendum() { if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_QUOTA_KEY) - { readStringBinary(quota_key, *in); - if (!is_interserver_mode) - session->setQuotaClientKey(quota_key); - } + + if (!is_interserver_mode) + session->setQuotaClientKey(quota_key); } @@ -1302,16 +1309,16 @@ void TCPHandler::receiveUnexpectedHello() void TCPHandler::sendHello() { writeVarUInt(Protocol::Server::Hello, *out); - writeStringBinary(DBMS_NAME, *out); - writeVarUInt(DBMS_VERSION_MAJOR, *out); - writeVarUInt(DBMS_VERSION_MINOR, *out); + writeStringBinary(VERSION_NAME, *out); + writeVarUInt(VERSION_MAJOR, *out); + writeVarUInt(VERSION_MINOR, *out); writeVarUInt(DBMS_TCP_PROTOCOL_VERSION, *out); if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE) writeStringBinary(DateLUT::instance().getTimeZone(), *out); if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME) writeStringBinary(server_display_name, *out); if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_VERSION_PATCH) - writeVarUInt(DBMS_VERSION_PATCH, *out); + writeVarUInt(VERSION_PATCH, *out); if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES) { auto rules = server.context()->getAccessControl().getPasswordComplexityRules(); @@ -1591,12 +1598,12 @@ void TCPHandler::receiveQuery() if (query_kind == ClientInfo::QueryKind::INITIAL_QUERY) { /// Throw an exception if the passed settings violate the constraints. - query_context->checkSettingsConstraints(settings_changes); + query_context->checkSettingsConstraints(settings_changes, SettingSource::QUERY); } else { /// Quietly clamp to the constraints if it's not an initial query. - query_context->clampToSettingsConstraints(settings_changes); + query_context->clampToSettingsConstraints(settings_changes, SettingSource::QUERY); } query_context->applySettingsChanges(settings_changes); diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index a9247f9b898..96dcd74f947 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -1077,9 +1077,8 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const { if (!command.if_exists) { - String exception_message = fmt::format("Wrong column. Cannot find column {} to modify", backQuote(column_name)); - all_columns.appendHintsMessage(exception_message, column_name); - throw Exception::createDeprecated(exception_message, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); + throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, "Wrong column. Cannot find column {} to modify{}", + backQuote(column_name), all_columns.getHintsMessage(column_name)); } else continue; diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 045afd7e6e6..0c918bda5fd 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -232,9 +232,7 @@ void ColumnsDescription::remove(const String & column_name) auto range = getNameRange(columns, column_name); if (range.first == range.second) { - String exception_message = fmt::format("There is no column {} in table", column_name); - appendHintsMessage(exception_message, column_name); - throw Exception::createDeprecated(exception_message, ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table{}", column_name, getHintsMessage(column_name)); } for (auto list_it = range.first; list_it != range.second;) @@ -249,9 +247,8 @@ void ColumnsDescription::rename(const String & column_from, const String & colum auto it = columns.get<1>().find(column_from); if (it == columns.get<1>().end()) { - String exception_message = fmt::format("Cannot find column {} in ColumnsDescription", column_from); - appendHintsMessage(exception_message, column_from); - throw Exception::createDeprecated(exception_message, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find column {} in ColumnsDescription{}", + column_from, getHintsMessage(column_from)); } columns.get<1>().modify_key(it, [&column_to] (String & old_name) diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index 365a999673e..fb1eeed3127 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -160,9 +160,8 @@ public: auto it = columns.get<1>().find(column_name); if (it == columns.get<1>().end()) { - String exception_message = fmt::format("Cannot find column {} in ColumnsDescription", column_name); - appendHintsMessage(exception_message, column_name); - throw Exception::createDeprecated(exception_message, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find column {} in ColumnsDescription{}", + column_name, getHintsMessage(column_name)); } removeSubcolumns(it->name); diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp index 309aa54909a..1172a40627d 100644 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp +++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp @@ -281,7 +281,6 @@ struct DeltaLakeMetadataParser::Impl ArrowColumnToCHColumn column_reader( header, "Parquet", - format_settings.parquet.import_nested, format_settings.parquet.allow_missing_columns, /* null_as_default */true, /* case_insensitive_column_matching */false); diff --git a/src/Storages/DataLakes/S3MetadataReader.cpp b/src/Storages/DataLakes/S3MetadataReader.cpp index f62c440bc2f..ac472c190e4 100644 --- a/src/Storages/DataLakes/S3MetadataReader.cpp +++ b/src/Storages/DataLakes/S3MetadataReader.cpp @@ -57,8 +57,8 @@ std::vector S3DataLakeMetadataReadHelper::listFiles( { outcome = client->ListObjectsV2(request); if (!outcome.IsSuccess()) - throw Exception( - ErrorCodes::S3_ERROR, + throw S3Exception( + outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with key {}, S3 exception: {}, message: {}", quoteString(bucket), quoteString(base_configuration.url.key), diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp index d8d9a0c9d1e..51839ad973a 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp @@ -419,7 +419,7 @@ catch (...) throw; } -void DistributedAsyncInsertDirectoryQueue::processFile(const std::string & file_path) +void DistributedAsyncInsertDirectoryQueue::processFile(std::string & file_path) { OpenTelemetry::TracingContextHolderPtr thread_trace_context; @@ -459,7 +459,7 @@ void DistributedAsyncInsertDirectoryQueue::processFile(const std::string & file_ if (isDistributedSendBroken(e.code(), e.isRemoteException())) { markAsBroken(file_path); - current_file.clear(); + file_path.clear(); } throw; } @@ -473,8 +473,8 @@ void DistributedAsyncInsertDirectoryQueue::processFile(const std::string & file_ auto dir_sync_guard = getDirectorySyncGuard(relative_path); markAsSend(file_path); - current_file.clear(); LOG_TRACE(log, "Finished processing `{}` (took {} ms)", file_path, watch.elapsedMilliseconds()); + file_path.clear(); } struct DistributedAsyncInsertDirectoryQueue::BatchHeader diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h index 9a8a235e265..45c355bb64e 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h @@ -100,7 +100,7 @@ private: void addFile(const std::string & file_path); void initializeFilesFromDisk(); void processFiles(); - void processFile(const std::string & file_path); + void processFile(std::string & file_path); void processFilesWithBatching(); void markAsBroken(const std::string & file_path); diff --git a/src/Storages/Distributed/DistributedSettings.h b/src/Storages/Distributed/DistributedSettings.h index 86b77707dee..85e64cabfd7 100644 --- a/src/Storages/Distributed/DistributedSettings.h +++ b/src/Storages/Distributed/DistributedSettings.h @@ -26,6 +26,7 @@ class ASTStorage; M(UInt64, monitor_split_batch_on_failure, 0, "Default - distributed_directory_monitor_split_batch_on_failure", 0) \ M(Milliseconds, monitor_sleep_time_ms, 0, "Default - distributed_directory_monitor_sleep_time_ms", 0) \ M(Milliseconds, monitor_max_sleep_time_ms, 0, "Default - distributed_directory_monitor_max_sleep_time_ms", 0) \ + M(Bool, flush_on_detach, true, "Flush data to remote nodes on DETACH/DROP/server shutdown", 0) \ DECLARE_SETTINGS_TRAITS(DistributedSettingsTraits, LIST_OF_DISTRIBUTED_SETTINGS) diff --git a/src/Storages/ExecutableSettings.h b/src/Storages/ExecutableSettings.h index 9374dac461e..10dbae8ac9f 100644 --- a/src/Storages/ExecutableSettings.h +++ b/src/Storages/ExecutableSettings.h @@ -14,7 +14,9 @@ class ASTStorage; M(UInt64, max_command_execution_time, 10, "Max command execution time in seconds.", 0) \ M(UInt64, command_termination_timeout, 10, "Command termination timeout in seconds.", 0) \ M(UInt64, command_read_timeout, 10000, "Timeout for reading data from command stdout in milliseconds.", 0) \ - M(UInt64, command_write_timeout, 10000, "Timeout for writing data to command stdin in milliseconds.", 0) + M(UInt64, command_write_timeout, 10000, "Timeout for writing data to command stdin in milliseconds.", 0) \ + M(ExternalCommandStderrReaction, stderr_reaction, ExternalCommandStderrReaction::NONE, "Reaction when external command outputs data to its stderr.", 0) \ + M(Bool, check_exit_code, false, "Throw exception if the command exited with non-zero status code.", 0) \ DECLARE_SETTINGS_TRAITS(ExecutableSettingsTraits, LIST_OF_EXECUTABLE_SETTINGS) diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index 1f6b9ff7882..9455afe7833 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -42,8 +42,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory file_size; + off_t file_size; explicit ReadBufferFromHDFSImpl( const std::string & hdfs_uri_, @@ -59,7 +58,6 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory(file_info->mSize); + hdfsFreeFileInfo(file_info, 1); + } } ~ReadBufferFromHDFSImpl() override @@ -75,16 +89,9 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory(file_info->mSize); - return *file_size; + return file_size; } bool nextImpl() override @@ -104,6 +111,10 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory= file_size) + { + return false; + } ResourceGuard rlock(read_settings.resource_link, num_bytes_to_read); int bytes_read; diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index f176ac6f037..e593cb63390 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -114,9 +115,9 @@ namespace { if (next_slash_after_glob_pos == std::string::npos) { - result.emplace_back( + result.emplace_back(StorageHDFS::PathWithInfo{ String(ls.file_info[i].mName), - StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast(ls.file_info[i].mSize)}); + StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast(ls.file_info[i].mSize)}}); } else { @@ -461,30 +462,23 @@ StorageHDFS::PathWithInfo HDFSSource::URISIterator::next() return pimpl->next(); } -Block HDFSSource::getHeader(Block sample_block, const std::vector & requested_virtual_columns) -{ - for (const auto & virtual_column : requested_virtual_columns) - sample_block.insert({virtual_column.type->createColumn(), virtual_column.type, virtual_column.name}); - - return sample_block; -} - HDFSSource::HDFSSource( + const ReadFromFormatInfo & info, StorageHDFSPtr storage_, - const Block & block_for_format_, - const std::vector & requested_virtual_columns_, ContextPtr context_, UInt64 max_block_size_, std::shared_ptr file_iterator_, - ColumnsDescription columns_description_) - : ISource(getHeader(block_for_format_, requested_virtual_columns_), false) + const SelectQueryInfo & query_info_) + : ISource(info.source_header, false) , WithContext(context_) , storage(std::move(storage_)) - , block_for_format(block_for_format_) - , requested_virtual_columns(requested_virtual_columns_) + , block_for_format(info.format_header) + , requested_columns(info.requested_columns) + , requested_virtual_columns(info.requested_virtual_columns) , max_block_size(max_block_size_) , file_iterator(file_iterator_) - , columns_description(std::move(columns_description_)) + , columns_description(info.columns_description) + , query_info(query_info_) { initialize(); } @@ -523,6 +517,7 @@ bool HDFSSource::initialize() current_path = path_with_info.path; input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size); + input_format->setQueryInfo(query_info, getContext()); QueryPipelineBuilder builder; builder.init(Pipe(input_format)); @@ -533,6 +528,14 @@ bool HDFSSource::initialize() return std::make_shared(header, columns_description, *input_format, getContext()); }); } + + /// Add ExtractColumnsTransform to extract requested columns/subcolumns + /// from chunk read by IInputFormat. + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, requested_columns); + }); + pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); reader = std::make_unique(*pipeline); return true; @@ -721,13 +724,13 @@ private: bool StorageHDFS::supportsSubsetOfColumns() const { - return format_name != "Distributed" && FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name); + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name); } Pipe StorageHDFS::read( const Names & column_names, const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & /*query_info*/, + SelectQueryInfo & query_info, ContextPtr context_, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, @@ -759,50 +762,18 @@ Pipe StorageHDFS::read( }); } - std::unordered_set column_names_set(column_names.begin(), column_names.end()); - std::vector requested_virtual_columns; - - for (const auto & virtual_column : getVirtuals()) - { - if (column_names_set.contains(virtual_column.name)) - requested_virtual_columns.push_back(virtual_column); - } - - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - auto fetch_columns = column_names; - const auto & virtuals = getVirtuals(); - std::erase_if( - fetch_columns, - [&](const String & col) - { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); }); - - if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); - - columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - block_for_format = storage_snapshot->metadata->getSampleBlock(); - } - + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(), getVirtuals()); Pipes pipes; auto this_ptr = std::static_pointer_cast(shared_from_this()); for (size_t i = 0; i < num_streams; ++i) { pipes.emplace_back(std::make_shared( + read_from_format_info, this_ptr, - block_for_format, - requested_virtual_columns, context_, max_block_size, iterator_wrapper, - columns_description)); + query_info)); } return Pipe::unitePipes(std::move(pipes)); } diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index b248a37a83d..477749b1f1e 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include namespace DB @@ -76,6 +78,8 @@ public: /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. bool supportsSubsetOfColumns() const override; + bool supportsSubcolumns() const override { return true; } + static ColumnsDescription getTableStructureFromData( const String & format, const String & uri, @@ -142,16 +146,13 @@ public: using IteratorWrapper = std::function; using StorageHDFSPtr = std::shared_ptr; - static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); - HDFSSource( + const ReadFromFormatInfo & info, StorageHDFSPtr storage_, - const Block & block_for_format_, - const std::vector & requested_virtual_columns_, ContextPtr context_, UInt64 max_block_size_, std::shared_ptr file_iterator_, - ColumnsDescription columns_description_); + const SelectQueryInfo & query_info_); String getName() const override; @@ -160,10 +161,12 @@ public: private: StorageHDFSPtr storage; Block block_for_format; - std::vector requested_virtual_columns; + NamesAndTypesList requested_columns; + NamesAndTypesList requested_virtual_columns; UInt64 max_block_size; std::shared_ptr file_iterator; ColumnsDescription columns_description; + SelectQueryInfo query_info; std::unique_ptr read_buf; std::shared_ptr input_format; diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h index 350051ab089..1dbf2f3c4e2 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.h +++ b/src/Storages/HDFS/StorageHDFSCluster.h @@ -37,6 +37,8 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override; + bool supportsSubcolumns() const override { return true; } + private: void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override; diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 00c942fd56b..28fa010b6d2 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -122,6 +122,7 @@ public: String compression_method_, Block sample_block_, ContextPtr context_, + const SelectQueryInfo & query_info_, UInt64 max_block_size_, const StorageHive & storage_, const Names & text_input_field_names_ = {}) @@ -138,6 +139,7 @@ public: , text_input_field_names(text_input_field_names_) , format_settings(getFormatSettings(getContext())) , read_settings(getContext()->getReadSettings()) + , query_info(query_info_) { to_read_block = sample_block; @@ -278,6 +280,7 @@ public: auto input_format = FormatFactory::instance().getInput( format, *read_buf, to_read_block, getContext(), max_block_size, updateFormatSettings(current_file), /* max_parsing_threads */ 1); + input_format->setQueryInfo(query_info, getContext()); Pipe pipe(input_format); if (columns_description.hasDefaults()) @@ -392,6 +395,7 @@ private: const Names & text_input_field_names; FormatSettings format_settings; ReadSettings read_settings; + SelectQueryInfo query_info; HiveFilePtr current_file; String current_path; @@ -831,6 +835,7 @@ Pipe StorageHive::read( compression_method, sample_block, context_, + query_info, max_block_size, *this, text_input_field_names)); diff --git a/src/Storages/Kafka/KafkaConsumer.cpp b/src/Storages/Kafka/KafkaConsumer.cpp index b2e6129c61c..9e558940012 100644 --- a/src/Storages/Kafka/KafkaConsumer.cpp +++ b/src/Storages/Kafka/KafkaConsumer.cpp @@ -61,6 +61,7 @@ KafkaConsumer::KafkaConsumer( , stopped(stopped_) , current(messages.begin()) , topics(_topics) + , exceptions_buffer(EXCEPTIONS_DEPTH) { // called (synchronously, during poll) when we enter the consumer group consumer->set_assignment_callback([this](const cppkafka::TopicPartitionList & topic_partitions) @@ -79,6 +80,7 @@ KafkaConsumer::KafkaConsumer( } assignment = topic_partitions; + num_rebalance_assignments++; }); // called (synchronously, during poll) when we leave the consumer group @@ -106,6 +108,8 @@ KafkaConsumer::KafkaConsumer( cleanUnprocessed(); stalled_status = REBALANCE_HAPPENED; + last_rebalance_timestamp_usec = static_cast(Poco::Timestamp().epochTime()); + assignment.reset(); waited_for_assignment = 0; @@ -118,12 +122,14 @@ KafkaConsumer::KafkaConsumer( // { // LOG_WARNING(log, "Commit error: {}", e.what()); // } + num_rebalance_revocations++; }); consumer->set_rebalance_error_callback([this](cppkafka::Error err) { LOG_ERROR(log, "Rebalance error: {}", err); ProfileEvents::increment(ProfileEvents::KafkaRebalanceErrors); + setExceptionInfo(err); }); } @@ -177,6 +183,7 @@ void KafkaConsumer::drain() else { LOG_ERROR(log, "Error during draining: {}", error); + setExceptionInfo(error); } } @@ -251,6 +258,8 @@ void KafkaConsumer::commit() consumer->commit(); committed = true; print_offsets("Committed offset", consumer->get_offsets_committed(consumer->get_assignment())); + last_commit_timestamp_usec = static_cast(Poco::Timestamp().epochTime()); + num_commits += 1; } catch (const cppkafka::HandleException & e) { @@ -259,7 +268,10 @@ void KafkaConsumer::commit() if (e.get_error() == RD_KAFKA_RESP_ERR__NO_OFFSET) committed = true; else + { LOG_ERROR(log, "Exception during commit attempt: {}", e.what()); + setExceptionInfo(e.what()); + } } --max_retries; } @@ -399,6 +411,8 @@ ReadBufferPtr KafkaConsumer::consume() /// Don't drop old messages immediately, since we may need them for virtual columns. auto new_messages = consumer->poll_batch(batch_size, std::chrono::milliseconds(actual_poll_timeout_ms)); + last_poll_timestamp_usec = static_cast(Poco::Timestamp().epochTime()); + num_messages_read += new_messages.size(); resetIfStopped(); if (stalled_status == CONSUMER_STOPPED) @@ -495,6 +509,7 @@ size_t KafkaConsumer::filterMessageErrors() { ProfileEvents::increment(ProfileEvents::KafkaConsumerErrors); LOG_ERROR(log, "Consumer error: {}", error); + setExceptionInfo(error); return true; } return false; @@ -527,4 +542,64 @@ void KafkaConsumer::storeLastReadMessageOffset() } } +void KafkaConsumer::setExceptionInfo(const cppkafka::Error & err) +{ + setExceptionInfo(err.to_string()); +} + +void KafkaConsumer::setExceptionInfo(const String & text) +{ + std::lock_guard lock(exception_mutex); + exceptions_buffer.push_back({text, static_cast(Poco::Timestamp().epochTime())}); +} + +/* + * Needed until + * https://github.com/mfontanini/cppkafka/pull/309 + * is merged, + * because consumer->get_member_id() contains a leak + */ +std::string KafkaConsumer::getMemberId() const +{ + char * memberid_ptr = rd_kafka_memberid(consumer->get_handle()); + std::string memberid_string = memberid_ptr; + rd_kafka_mem_free(nullptr, memberid_ptr); + return memberid_string; +} + + +KafkaConsumer::Stat KafkaConsumer::getStat() const +{ + KafkaConsumer::Stat::Assignments assignments; + auto cpp_assignments = consumer->get_assignment(); + auto cpp_offsets = consumer->get_offsets_position(cpp_assignments); + + for (size_t num = 0; num < cpp_assignments.size(); ++num) + { + assignments.push_back({ + cpp_assignments[num].get_topic(), + cpp_assignments[num].get_partition(), + cpp_offsets[num].get_offset(), + }); + } + + return { + .consumer_id = getMemberId() /* consumer->get_member_id() */ , + .assignments = std::move(assignments), + .last_poll_time = last_poll_timestamp_usec.load(), + .num_messages_read = num_messages_read.load(), + + .last_commit_timestamp_usec = last_commit_timestamp_usec.load(), + .last_rebalance_timestamp_usec = last_rebalance_timestamp_usec.load(), + .num_commits = num_commits.load(), + .num_rebalance_assignments = num_rebalance_assignments.load(), + .num_rebalance_revocations = num_rebalance_revocations.load(), + .exceptions_buffer = [&](){std::lock_guard lock(exception_mutex); + return exceptions_buffer;}(), + .in_use = in_use.load(), + .rdkafka_stat = [&](){std::lock_guard lock(rdkafka_stat_mutex); + return rdkafka_stat;}(), + }; +} + } diff --git a/src/Storages/Kafka/KafkaConsumer.h b/src/Storages/Kafka/KafkaConsumer.h index feda51a682e..91bb2ae8d77 100644 --- a/src/Storages/Kafka/KafkaConsumer.h +++ b/src/Storages/Kafka/KafkaConsumer.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include #include @@ -20,10 +22,44 @@ namespace Poco namespace DB { +class StorageSystemKafkaConsumers; + using ConsumerPtr = std::shared_ptr; class KafkaConsumer { +public: + struct ExceptionInfo + { + String text; + UInt64 timestamp_usec; + }; + using ExceptionsBuffer = boost::circular_buffer; + + struct Stat // system.kafka_consumers data + { + struct Assignment + { + String topic_str; + Int32 partition_id; + Int64 current_offset; + }; + using Assignments = std::vector; + + String consumer_id; + Assignments assignments; + UInt64 last_poll_time; + UInt64 num_messages_read; + UInt64 last_commit_timestamp_usec; + UInt64 last_rebalance_timestamp_usec; + UInt64 num_commits; + UInt64 num_rebalance_assignments; + UInt64 num_rebalance_revocations; + KafkaConsumer::ExceptionsBuffer exceptions_buffer; + bool in_use; + std::string rdkafka_stat; + }; + public: KafkaConsumer( ConsumerPtr consumer_, @@ -69,6 +105,18 @@ public: auto currentTimestamp() const { return current[-1].get_timestamp(); } const auto & currentHeaderList() const { return current[-1].get_header_list(); } String currentPayload() const { return current[-1].get_payload(); } + void setExceptionInfo(const cppkafka::Error & err); + void setExceptionInfo(const String & text); + void setRDKafkaStat(const std::string & stat_json_string) + { + std::lock_guard lock(rdkafka_stat_mutex); + rdkafka_stat = stat_json_string; + } + void inUse() { in_use = true; } + void notInUse() { in_use = false; } + + // For system.kafka_consumers + Stat getStat() const; private: using Messages = std::vector; @@ -105,12 +153,33 @@ private: std::optional assignment; const Names topics; + /// system.kafka_consumers data is retrieved asynchronously + /// so we have to protect exceptions_buffer + mutable std::mutex exception_mutex; + const size_t EXCEPTIONS_DEPTH = 10; + ExceptionsBuffer exceptions_buffer; + + std::atomic last_exception_timestamp_usec = 0; + std::atomic last_poll_timestamp_usec = 0; + std::atomic num_messages_read = 0; + std::atomic last_commit_timestamp_usec = 0; + std::atomic num_commits = 0; + std::atomic last_rebalance_timestamp_usec = 0; + std::atomic num_rebalance_assignments = 0; + std::atomic num_rebalance_revocations = 0; + std::atomic in_use = 0; + + mutable std::mutex rdkafka_stat_mutex; + std::string rdkafka_stat; + void drain(); void cleanUnprocessed(); void resetIfStopped(); /// Return number of messages with an error. size_t filterMessageErrors(); ReadBufferPtr getNextMessage(); + + std::string getMemberId() const; }; } diff --git a/src/Storages/Kafka/KafkaSource.cpp b/src/Storages/Kafka/KafkaSource.cpp index ba242417058..cd83a6a1422 100644 --- a/src/Storages/Kafka/KafkaSource.cpp +++ b/src/Storages/Kafka/KafkaSource.cpp @@ -133,6 +133,7 @@ Chunk KafkaSource::generateImpl() { e.addMessage("while parsing Kafka message (topic: {}, partition: {}, offset: {})'", consumer->currentTopic(), consumer->currentPartition(), consumer->currentOffset()); + consumer->setExceptionInfo(e.message()); throw std::move(e); } }; diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 2aba76c1a3f..54db0f29cb8 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -250,15 +250,16 @@ StorageKafka::StorageKafka( : IStorage(table_id_) , WithContext(context_->getGlobalContext()) , kafka_settings(std::move(kafka_settings_)) - , topics(parseTopics(getContext()->getMacros()->expand(kafka_settings->kafka_topic_list.value))) - , brokers(getContext()->getMacros()->expand(kafka_settings->kafka_broker_list.value)) - , group(getContext()->getMacros()->expand(kafka_settings->kafka_group_name.value)) + , macros_info{.table_id = table_id_} + , topics(parseTopics(getContext()->getMacros()->expand(kafka_settings->kafka_topic_list.value, macros_info))) + , brokers(getContext()->getMacros()->expand(kafka_settings->kafka_broker_list.value, macros_info)) + , group(getContext()->getMacros()->expand(kafka_settings->kafka_group_name.value, macros_info)) , client_id( kafka_settings->kafka_client_id.value.empty() ? getDefaultClientId(table_id_) - : getContext()->getMacros()->expand(kafka_settings->kafka_client_id.value)) + : getContext()->getMacros()->expand(kafka_settings->kafka_client_id.value, macros_info)) , format_name(getContext()->getMacros()->expand(kafka_settings->kafka_format.value)) , max_rows_per_message(kafka_settings->kafka_max_rows_per_message.value) - , schema_name(getContext()->getMacros()->expand(kafka_settings->kafka_schema.value)) + , schema_name(getContext()->getMacros()->expand(kafka_settings->kafka_schema.value, macros_info)) , num_consumers(kafka_settings->kafka_num_consumers.value) , log(&Poco::Logger::get("StorageKafka (" + table_id_.table_name + ")")) , semaphore(0, static_cast(num_consumers)) @@ -415,7 +416,9 @@ void StorageKafka::startup() { try { - pushConsumer(createConsumer(i)); + auto consumer = createConsumer(i); + pushConsumer(consumer); + all_consumers.push_back(consumer); ++num_created_consumers; } catch (const cppkafka::Exception &) @@ -455,6 +458,7 @@ void StorageKafka::shutdown() void StorageKafka::pushConsumer(KafkaConsumerPtr consumer) { std::lock_guard lock(mutex); + consumer->notInUse(); consumers.push_back(consumer); semaphore.set(); CurrentMetrics::sub(CurrentMetrics::KafkaConsumersInUse, 1); @@ -483,6 +487,7 @@ KafkaConsumerPtr StorageKafka::popConsumer(std::chrono::milliseconds timeout) auto consumer = consumers.back(); consumers.pop_back(); CurrentMetrics::add(CurrentMetrics::KafkaConsumersInUse, 1); + consumer->inUse(); return consumer; } @@ -511,7 +516,11 @@ KafkaConsumerPtr StorageKafka::createConsumer(size_t consumer_number) size_t default_queued_min_messages = 100000; // we don't want to decrease the default conf.set("queued.min.messages", std::max(getMaxBlockSize(),default_queued_min_messages)); - updateConfiguration(conf); + /// a reference to the consumer is needed in statistic callback + /// although the consumer does not exist when callback is being registered + /// shared_ptr> comes to the rescue + auto consumer_weak_ptr_ptr = std::make_shared(); + updateConfiguration(conf, consumer_weak_ptr_ptr); // those settings should not be changed by users. conf.set("enable.auto.commit", "false"); // We manually commit offsets after a stream successfully finished @@ -522,13 +531,20 @@ KafkaConsumerPtr StorageKafka::createConsumer(size_t consumer_number) auto consumer_impl = std::make_shared(conf); consumer_impl->set_destroy_flags(RD_KAFKA_DESTROY_F_NO_CONSUMER_CLOSE); + KafkaConsumerPtr kafka_consumer_ptr; + /// NOTE: we pass |stream_cancelled| by reference here, so the buffers should not outlive the storage. if (thread_per_consumer) { auto& stream_cancelled = tasks[consumer_number]->stream_cancelled; - return std::make_shared(consumer_impl, log, getPollMaxBatchSize(), getPollTimeoutMillisecond(), intermediate_commit, stream_cancelled, topics); + kafka_consumer_ptr = std::make_shared(consumer_impl, log, getPollMaxBatchSize(), getPollTimeoutMillisecond(), intermediate_commit, stream_cancelled, topics); } - return std::make_shared(consumer_impl, log, getPollMaxBatchSize(), getPollTimeoutMillisecond(), intermediate_commit, tasks.back()->stream_cancelled, topics); + else + { + kafka_consumer_ptr = std::make_shared(consumer_impl, log, getPollMaxBatchSize(), getPollTimeoutMillisecond(), intermediate_commit, tasks.back()->stream_cancelled, topics); + } + *consumer_weak_ptr_ptr = kafka_consumer_ptr; + return kafka_consumer_ptr; } size_t StorageKafka::getMaxBlockSize() const @@ -561,7 +577,8 @@ String StorageKafka::getConfigPrefix() const return CONFIG_KAFKA_TAG; } -void StorageKafka::updateConfiguration(cppkafka::Configuration & kafka_config) +void StorageKafka::updateConfiguration(cppkafka::Configuration & kafka_config, + std::shared_ptr kafka_consumer_weak_ptr_ptr) { // Update consumer configuration from the configuration. Example: // @@ -641,6 +658,26 @@ void StorageKafka::updateConfiguration(cppkafka::Configuration & kafka_config) LOG_IMPL(log, client_logs_level, poco_level, "[rdk:{}] {}", facility, message); }); + if (kafka_consumer_weak_ptr_ptr) + { + if (!config.has(config_prefix + "." + "statistics_interval_ms")) + { + kafka_config.set("statistics.interval.ms", "3000"); // every 3 seconds by default. set to 0 to disable. + } + + if (kafka_config.get("statistics.interval.ms") != "0") + { + kafka_config.set_stats_callback([kafka_consumer_weak_ptr_ptr](cppkafka::KafkaHandleBase &, const std::string & stat_json_string) + { + auto kafka_consumer_ptr = kafka_consumer_weak_ptr_ptr->lock(); + if (kafka_consumer_ptr) + { + kafka_consumer_ptr->setRDKafkaStat(stat_json_string); + } + }); + } + } + // Configure interceptor to change thread name // // TODO: add interceptors support into the cppkafka. @@ -951,7 +988,7 @@ void registerStorageKafka(StorageFactory & factory) "of getting data from Kafka, consider using a setting kafka_thread_per_consumer=1, " "and ensure you have enough threads " "in MessageBrokerSchedulePool (background_message_broker_schedule_pool_size). " - "See also https://clickhouse.com/docs/integrations/kafka/kafka-table-engine#tuning-performance", max_consumers); + "See also https://clickhouse.com/docs/en/integrations/kafka#tuning-performance", max_consumers); } else if (num_consumers < 1) { diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index 09aa091ef18..77e1370c2b7 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -22,9 +23,12 @@ class Configuration; namespace DB { +class StorageSystemKafkaConsumers; + struct StorageKafkaInterceptors; using KafkaConsumerPtr = std::shared_ptr; +using KafkaConsumerWeakPtr = std::weak_ptr; /** Implements a Kafka queue table engine that can be used as a persistent queue / buffer, * or as a basic building block for creating pipelines with a continuous insertion / ETL. @@ -76,9 +80,19 @@ public: Names getVirtualColumnNames() const; HandleKafkaErrorMode getHandleKafkaErrorMode() const { return kafka_settings->kafka_handle_error_mode; } + struct SafeConsumers + { + std::shared_ptr storage_ptr; + std::unique_lock lock; + std::vector & consumers; + }; + + SafeConsumers getSafeConsumers() { return {shared_from_this(), std::unique_lock(mutex), all_consumers}; } + private: // Configuration and state std::unique_ptr kafka_settings; + Macros::MacroExpansionInfo macros_info; const Names topics; const String brokers; const String group; @@ -99,6 +113,7 @@ private: size_t num_created_consumers = 0; /// number of actually created consumers. std::vector consumers; /// available consumers + std::vector all_consumers; /// busy (belong to a KafkaSource) and vacant consumers std::mutex mutex; @@ -127,7 +142,12 @@ private: std::atomic shutdown_called = false; // Update Kafka configuration with values from CH user configuration. - void updateConfiguration(cppkafka::Configuration & kafka_config); + void updateConfiguration(cppkafka::Configuration & kafka_config, std::shared_ptr); + void updateConfiguration(cppkafka::Configuration & kafka_config) + { + updateConfiguration(kafka_config, std::make_shared()); + } + String getConfigPrefix() const; void threadFunc(size_t idx); @@ -140,6 +160,7 @@ private: bool streamToViews(); bool checkDependencies(const StorageID & table_id); + }; } diff --git a/src/Storages/LiveView/LiveViewSink.h b/src/Storages/LiveView/LiveViewSink.h index e163400f2af..792133ced64 100644 --- a/src/Storages/LiveView/LiveViewSink.h +++ b/src/Storages/LiveView/LiveViewSink.h @@ -32,11 +32,8 @@ public: void onFinish() override { - UInt128 key; - String key_str; - - new_hash->get128(key); - key_str = getHexUIntLowercase(key); + const auto key = new_hash->get128(); + const auto key_str = getHexUIntLowercase(key); std::lock_guard lock(storage.mutex); diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 173bb128c4a..5719529533e 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -681,7 +681,6 @@ QueryPipelineBuilder StorageLiveView::completeQuery(Pipes pipes) bool StorageLiveView::getNewBlocks(const std::lock_guard & lock) { SipHash hash; - UInt128 key; BlocksPtr new_blocks = std::make_shared(); BlocksMetadataPtr new_blocks_metadata = std::make_shared(); @@ -713,7 +712,7 @@ bool StorageLiveView::getNewBlocks(const std::lock_guard & lock) new_blocks->push_back(block); } - hash.get128(key); + const auto key = hash.get128(); /// Update blocks only if hash keys do not match /// NOTE: hash could be different for the same result diff --git a/src/Storages/MarkCache.h b/src/Storages/MarkCache.h index 0f4af57fc8e..495cebb080f 100644 --- a/src/Storages/MarkCache.h +++ b/src/Storages/MarkCache.h @@ -50,13 +50,9 @@ public: /// Calculate key from path to file and offset. static UInt128 hash(const String & path_to_file) { - UInt128 key; - SipHash hash; hash.update(path_to_file.data(), path_to_file.size() + 1); - hash.get128(key); - - return key; + return hash.get128(); } template diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h index 310890eba1e..5092fbdd864 100644 --- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h +++ b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h @@ -9,6 +9,9 @@ namespace DB { +static constexpr auto DISTANCE_FUNCTION_L2 = "L2Distance"; +static constexpr auto DISTANCE_FUNCTION_COSINE = "cosineDistance"; + /// Approximate Nearest Neighbour queries have a similar structure: /// - reference vector from which all distances are calculated /// - metric name (e.g L2Distance, LpDistance, etc.) diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index e1921f45eda..618dbd845ae 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -331,6 +331,7 @@ void DataPartStorageOnDiskBase::backup( const NameSet & files_without_checksums, const String & path_in_backup, const BackupSettings & backup_settings, + const ReadSettings & read_settings, bool make_temporary_hard_links, BackupEntries & backup_entries, TemporaryFilesOnDisks * temp_dirs) const @@ -350,7 +351,7 @@ void DataPartStorageOnDiskBase::backup( temp_dir_it = temp_dirs->emplace(disk, std::make_shared(disk, "tmp/")).first; temp_dir_owner = temp_dir_it->second; - fs::path temp_dir = temp_dir_owner->getPath(); + fs::path temp_dir = temp_dir_owner->getRelativePath(); temp_part_dir = temp_dir / part_path_in_backup.relative_path(); disk->createDirectories(temp_part_dir); } @@ -382,7 +383,7 @@ void DataPartStorageOnDiskBase::backup( if (files_without_checksums.contains(filepath)) { - backup_entries.emplace_back(filepath_in_backup, std::make_unique(disk, filepath_on_disk, copy_encrypted)); + backup_entries.emplace_back(filepath_in_backup, std::make_unique(disk, filepath_on_disk, read_settings, copy_encrypted)); continue; } diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h index 648bc908f59..6c2987e4971 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h @@ -55,6 +55,7 @@ public: const NameSet & files_without_checksums, const String & path_in_backup, const BackupSettings & backup_settings, + const ReadSettings & read_settings, bool make_temporary_hard_links, BackupEntries & backup_entries, TemporaryFilesOnDisks * temp_dirs) const override; diff --git a/src/Storages/MergeTree/GinIndexStore.cpp b/src/Storages/MergeTree/GinIndexStore.cpp index 91e831270d4..5b798ecc8a5 100644 --- a/src/Storages/MergeTree/GinIndexStore.cpp +++ b/src/Storages/MergeTree/GinIndexStore.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index 8dbf5caa168..19af6085547 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -221,6 +221,7 @@ public: const NameSet & files_without_checksums, const String & path_in_backup, const BackupSettings & backup_settings, + const ReadSettings & read_settings, bool make_temporary_hard_links, BackupEntries & backup_entries, TemporaryFilesOnDisks * temp_dirs) const = 0; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 6d7b6b39a40..6564fc67767 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1780,7 +1780,8 @@ void IMergeTreeDataPart::renameToDetached(const String & prefix) part_is_probably_removed_from_disk = true; } -DataPartStoragePtr IMergeTreeDataPart::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & /*metadata_snapshot*/) const +DataPartStoragePtr IMergeTreeDataPart::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & /*metadata_snapshot*/, + const DiskTransactionPtr & disk_transaction) const { /// Avoid unneeded duplicates of broken parts if we try to detach the same broken part multiple times. /// Otherwise it may pollute detached/ with dirs with _tryN suffix and we will fail to remove broken part after 10 attempts. @@ -1795,7 +1796,8 @@ DataPartStoragePtr IMergeTreeDataPart::makeCloneInDetached(const String & prefix IDataPartStorage::ClonePartParams params { .copy_instead_of_hardlink = isStoredOnRemoteDiskWithZeroCopySupport() && storage.supportsReplication() && storage_settings->allow_remote_fs_zero_copy_replication, - .make_source_readonly = true + .make_source_readonly = true, + .external_transaction = disk_transaction }; return getDataPartStorage().freeze( storage.relative_data_path, @@ -1983,6 +1985,12 @@ IndexSize IMergeTreeDataPart::getSecondaryIndexSize(const String & secondary_ind return ColumnSize{}; } +bool IMergeTreeDataPart::hasSecondaryIndex(const String & index_name) const +{ + auto file_name = INDEX_FILE_PREFIX + index_name; + return checksums.has(file_name + ".idx") || checksums.has(file_name + ".idx2"); +} + void IMergeTreeDataPart::accumulateColumnSizes(ColumnToSize & column_to_size) const { for (const auto & [column_name, size] : columns_sizes) @@ -2052,14 +2060,8 @@ String IMergeTreeDataPart::getZeroLevelPartBlockID(std::string_view token) const hash.update(token.data(), token.size()); } - union - { - char bytes[16]; - UInt64 words[2]; - } hash_value; - hash.get128(hash_value.bytes); - - return info.partition_id + "_" + toString(hash_value.words[0]) + "_" + toString(hash_value.words[1]); + const auto hash_value = hash.get128(); + return info.partition_id + "_" + toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]); } IMergeTreeDataPart::uint128 IMergeTreeDataPart::getActualChecksumByFile(const String & file_name) const diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index af6906e004d..a8e053a9c7b 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -89,7 +89,7 @@ public: virtual MergeTreeReaderPtr getReader( const NamesAndTypesList & columns_, - const StorageMetadataPtr & metadata_snapshot, + const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, @@ -122,6 +122,9 @@ public: /// Otherwise return information about secondary index size on disk. IndexSize getSecondaryIndexSize(const String & secondary_index_name) const; + /// Returns true if there is materialized index with specified name in part. + bool hasSecondaryIndex(const String & index_name) const; + /// Return information about column size on disk for all columns in part ColumnSize getTotalColumnsSize() const { return total_columns_size; } @@ -368,7 +371,8 @@ public: virtual void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists); /// Makes clone of a part in detached/ directory via hard links - virtual DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const; + virtual DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot, + const DiskTransactionPtr & disk_transaction) const; /// Makes full clone of part in specified subdirectory (relative to storage data directory, e.g. "detached") on another disk MutableDataPartStoragePtr makeCloneOnDisk(const DiskPtr & disk, const String & directory_name) const; diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index f9b97a6a05d..4bb8c400691 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -24,7 +24,7 @@ namespace ErrorCodes IMergeTreeReader::IMergeTreeReader( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, const NamesAndTypesList & columns_, - const StorageMetadataPtr & metadata_snapshot_, + const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, const MarkRanges & all_mark_ranges_, @@ -35,7 +35,7 @@ IMergeTreeReader::IMergeTreeReader( , uncompressed_cache(uncompressed_cache_) , mark_cache(mark_cache_) , settings(settings_) - , metadata_snapshot(metadata_snapshot_) + , storage_snapshot(storage_snapshot_) , all_mark_ranges(all_mark_ranges_) , alter_conversions(data_part_info_for_read->getAlterConversions()) /// For wide parts convert plain arrays of Nested to subcolumns @@ -71,7 +71,7 @@ void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_e res_columns, num_rows, Nested::convertToSubcolumns(requested_columns), Nested::convertToSubcolumns(available_columns), - partially_read_columns, metadata_snapshot); + partially_read_columns, storage_snapshot->metadata); should_evaluate_missing_defaults = std::any_of( res_columns.begin(), res_columns.end(), [](const auto & column) { return column == nullptr; }); @@ -110,7 +110,10 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns } auto dag = DB::evaluateMissingDefaults( - additional_columns, requested_columns, metadata_snapshot->getColumns(), data_part_info_for_read->getContext()); + additional_columns, requested_columns, + storage_snapshot->metadata->getColumns(), + data_part_info_for_read->getContext()); + if (dag) { dag->addMaterializingOutputActions(); @@ -216,7 +219,7 @@ void IMergeTreeReader::performRequiredConversions(Columns & res_columns) const } } -IMergeTreeReader::ColumnPositionLevel IMergeTreeReader::findColumnForOffsets(const NameAndTypePair & required_column) const +IMergeTreeReader::ColumnNameLevel IMergeTreeReader::findColumnForOffsets(const NameAndTypePair & required_column) const { auto get_offsets_streams = [](const auto & serialization, const auto & name_in_storage) { @@ -238,11 +241,11 @@ IMergeTreeReader::ColumnPositionLevel IMergeTreeReader::findColumnForOffsets(con auto required_offsets_streams = get_offsets_streams(getSerializationInPart(required_column), required_name_in_storage); size_t max_matched_streams = 0; - ColumnPositionLevel position_level; + ColumnNameLevel name_level; /// Find column that has maximal number of matching /// offsets columns with required_column. - for (const auto & part_column : data_part_info_for_read->getColumns()) + for (const auto & part_column : Nested::convertToSubcolumns(data_part_info_for_read->getColumns())) { auto name_in_storage = Nested::extractTableName(part_column.name); if (name_in_storage != required_name_in_storage) @@ -261,14 +264,14 @@ IMergeTreeReader::ColumnPositionLevel IMergeTreeReader::findColumnForOffsets(con it = current_it; } - if (i && (!position_level || i > max_matched_streams)) + if (i && (!name_level || i > max_matched_streams)) { max_matched_streams = i; - position_level.emplace(*data_part_info_for_read->getColumnPosition(part_column.name), it->second); + name_level.emplace(part_column.name, it->second); } } - return position_level; + return name_level; } void IMergeTreeReader::checkNumberOfColumns(size_t num_columns_to_read) const diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h index fcab35fb4c2..dafe7b2d4ba 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.h +++ b/src/Storages/MergeTree/IMergeTreeReader.h @@ -24,7 +24,7 @@ public: IMergeTreeReader( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, const NamesAndTypesList & columns_, - const StorageMetadataPtr & metadata_snapshot_, + const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, const MarkRanges & all_mark_ranges_, @@ -92,22 +92,23 @@ protected: MergeTreeReaderSettings settings; - StorageMetadataPtr metadata_snapshot; + StorageSnapshotPtr storage_snapshot; MarkRanges all_mark_ranges; /// Position and level (of nesting). - using ColumnPositionLevel = std::optional>; + using ColumnNameLevel = std::optional>; + /// In case of part of the nested column does not exists, offsets should be /// read, but only the offsets for the current column, that is why it /// returns pair of size_t, not just one. - ColumnPositionLevel findColumnForOffsets(const NameAndTypePair & column) const; + ColumnNameLevel findColumnForOffsets(const NameAndTypePair & column) const; NameSet partially_read_columns; -private: /// Alter conversions, which must be applied on fly if required AlterConversionsPtr alter_conversions; +private: /// Columns that are requested to read. NamesAndTypesList requested_columns; diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 3f02a6b197e..42731bac19b 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -764,7 +764,9 @@ KeyCondition::KeyCondition( ++key_index; } - auto filter_node = buildFilterNode(query, additional_filter_asts); + ASTPtr filter_node; + if (query) + filter_node = buildFilterNode(query, additional_filter_asts); if (!filter_node) { diff --git a/src/Storages/MergeTree/LeaderElection.h b/src/Storages/MergeTree/LeaderElection.h index f694ecab8e3..2e48892563b 100644 --- a/src/Storages/MergeTree/LeaderElection.h +++ b/src/Storages/MergeTree/LeaderElection.h @@ -36,7 +36,7 @@ void checkNoOldLeaders(Poco::Logger * log, ZooKeeper & zookeeper, const String p if (code == Coordination::Error::ZNONODE) return; else if (code != Coordination::Error::ZOK) - throw KeeperException(code, path); + throw KeeperException::fromPath(code, path); Coordination::Requests ops; diff --git a/src/Storages/MergeTree/MarkRange.cpp b/src/Storages/MergeTree/MarkRange.cpp index c6c197919f4..bd8546f04cc 100644 --- a/src/Storages/MergeTree/MarkRange.cpp +++ b/src/Storages/MergeTree/MarkRange.cpp @@ -83,12 +83,12 @@ size_t MarkRanges::getNumberOfMarks() const void MarkRanges::serialize(WriteBuffer & out) const { - writeIntBinary(this->size(), out); + writeBinaryLittleEndian(this->size(), out); for (const auto & [begin, end] : *this) { - writeIntBinary(begin, out); - writeIntBinary(end, out); + writeBinaryLittleEndian(begin, out); + writeBinaryLittleEndian(end, out); } } @@ -100,13 +100,13 @@ String MarkRanges::describe() const void MarkRanges::deserialize(ReadBuffer & in) { size_t size = 0; - readIntBinary(size, in); + readBinaryLittleEndian(size, in); this->resize(size); for (size_t i = 0; i < size; ++i) { - readIntBinary((*this)[i].begin, in); - readIntBinary((*this)[i].end, in); + readBinaryLittleEndian((*this)[i].begin, in); + readBinaryLittleEndian((*this)[i].end, in); } } diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp index 1cd2d11da50..8fbb163384e 100644 --- a/src/Storages/MergeTree/MergeList.cpp +++ b/src/Storages/MergeTree/MergeList.cpp @@ -1,20 +1,16 @@ +#include #include #include -#include +#include #include #include #include -#include namespace DB { - -MergeListElement::MergeListElement( - const StorageID & table_id_, - FutureMergedMutatedPartPtr future_part, - const ContextPtr & context) +MergeListElement::MergeListElement(const StorageID & table_id_, FutureMergedMutatedPartPtr future_part, const ContextPtr & context) : table_id{table_id_} , partition_id{future_part->part_info.partition_id} , result_part_name{future_part->name} @@ -40,6 +36,10 @@ MergeListElement::MergeListElement( { source_data_version = future_part->parts[0]->info.getDataVersion(); is_mutation = (result_part_info.getDataVersion() != source_data_version); + + WriteBufferFromString out(partition); + const auto & part = future_part->parts[0]; + part->partition.serializeText(part->storage, out, {}); } thread_group = ThreadGroup::createForBackgroundProcess(context); @@ -53,6 +53,7 @@ MergeInfo MergeListElement::getInfo() const res.result_part_name = result_part_name; res.result_part_path = result_part_path; res.partition_id = partition_id; + res.partition = partition; res.is_mutation = is_mutation; res.elapsed = watch.elapsedSeconds(); res.progress = progress.load(std::memory_order_relaxed); diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h index ffa87e75505..d40af6abf43 100644 --- a/src/Storages/MergeTree/MergeList.h +++ b/src/Storages/MergeTree/MergeList.h @@ -35,6 +35,7 @@ struct MergeInfo Array source_part_names; Array source_part_paths; std::string partition_id; + std::string partition; bool is_mutation; Float64 elapsed; Float64 progress; @@ -67,6 +68,7 @@ struct MergeListElement : boost::noncopyable { const StorageID table_id; std::string partition_id; + std::string partition; const std::string result_part_name; const std::string result_part_path; diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 3eba9a9de24..d7836ac01b0 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -191,7 +191,6 @@ ChunkAndProgress IMergeTreeSelectAlgorithm::read() } void IMergeTreeSelectAlgorithm::initializeMergeTreeReadersForCurrentTask( - const StorageMetadataPtr & metadata_snapshot, const IMergeTreeReader::ValueSizeMap & value_size_map, const ReadBufferFromFileBase::ProfileCallback & profile_callback) { @@ -206,7 +205,7 @@ void IMergeTreeSelectAlgorithm::initializeMergeTreeReadersForCurrentTask( else { reader = task->data_part->getReader( - task->task_columns.columns, metadata_snapshot, task->mark_ranges, + task->task_columns.columns, storage_snapshot, task->mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), task->alter_conversions, reader_settings, value_size_map, profile_callback); } @@ -222,8 +221,8 @@ void IMergeTreeSelectAlgorithm::initializeMergeTreeReadersForCurrentTask( { initializeMergeTreePreReadersForPart( task->data_part, task->alter_conversions, - task->task_columns, metadata_snapshot, - task->mark_ranges, value_size_map, profile_callback); + task->task_columns, task->mark_ranges, + value_size_map, profile_callback); } } @@ -231,18 +230,17 @@ void IMergeTreeSelectAlgorithm::initializeMergeTreeReadersForPart( const MergeTreeData::DataPartPtr & data_part, const AlterConversionsPtr & alter_conversions, const MergeTreeReadTaskColumns & task_columns, - const StorageMetadataPtr & metadata_snapshot, const MarkRanges & mark_ranges, const IMergeTreeReader::ValueSizeMap & value_size_map, const ReadBufferFromFileBase::ProfileCallback & profile_callback) { reader = data_part->getReader( - task_columns.columns, metadata_snapshot, mark_ranges, + task_columns.columns, storage_snapshot, mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), alter_conversions, reader_settings, value_size_map, profile_callback); initializeMergeTreePreReadersForPart( - data_part, alter_conversions, task_columns, metadata_snapshot, + data_part, alter_conversions, task_columns, mark_ranges, value_size_map, profile_callback); } @@ -250,7 +248,6 @@ void IMergeTreeSelectAlgorithm::initializeMergeTreePreReadersForPart( const MergeTreeData::DataPartPtr & data_part, const AlterConversionsPtr & alter_conversions, const MergeTreeReadTaskColumns & task_columns, - const StorageMetadataPtr & metadata_snapshot, const MarkRanges & mark_ranges, const IMergeTreeReader::ValueSizeMap & value_size_map, const ReadBufferFromFileBase::ProfileCallback & profile_callback) @@ -262,7 +259,7 @@ void IMergeTreeSelectAlgorithm::initializeMergeTreePreReadersForPart( { pre_reader_for_step.push_back( data_part->getReader( - {LightweightDeleteDescription::FILTER_COLUMN}, metadata_snapshot, + {LightweightDeleteDescription::FILTER_COLUMN}, storage_snapshot, mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), alter_conversions, reader_settings, value_size_map, profile_callback)); } @@ -271,7 +268,7 @@ void IMergeTreeSelectAlgorithm::initializeMergeTreePreReadersForPart( { pre_reader_for_step.push_back( data_part->getReader( - pre_columns_per_step, metadata_snapshot, mark_ranges, + pre_columns_per_step, storage_snapshot, mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), alter_conversions, reader_settings, value_size_map, profile_callback)); } diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h index 7b6dc50060a..42043c03e85 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h @@ -120,7 +120,6 @@ protected: /// Sets up data readers for each step of prewhere and where void initializeMergeTreeReadersForCurrentTask( - const StorageMetadataPtr & metadata_snapshot, const IMergeTreeReader::ValueSizeMap & value_size_map, const ReadBufferFromFileBase::ProfileCallback & profile_callback); @@ -128,7 +127,6 @@ protected: const MergeTreeData::DataPartPtr & data_part, const AlterConversionsPtr & alter_conversions, const MergeTreeReadTaskColumns & task_columns, - const StorageMetadataPtr & metadata_snapshot, const MarkRanges & mark_ranges, const IMergeTreeReader::ValueSizeMap & value_size_map, const ReadBufferFromFileBase::ProfileCallback & profile_callback); @@ -207,7 +205,6 @@ private: const MergeTreeData::DataPartPtr & data_part, const AlterConversionsPtr & alter_conversions, const MergeTreeReadTaskColumns & task_columns, - const StorageMetadataPtr & metadata_snapshot, const MarkRanges & mark_ranges, const IMergeTreeReader::ValueSizeMap & value_size_map, const ReadBufferFromFileBase::ProfileCallback & profile_callback); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 013a9d6923c..2c09ab9725c 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -187,8 +187,8 @@ static void checkSuspiciousIndices(const ASTFunction * index_function) std::unordered_set unique_index_expression_hashes; for (const auto & child : index_function->arguments->children) { - IAST::Hash hash = child->getTreeHash(); - UInt64 first_half_of_hash = hash.first; + const IAST::Hash hash = child->getTreeHash(); + const auto & first_half_of_hash = hash.low64; if (!unique_index_expression_hashes.emplace(first_half_of_hash).second) throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -1217,10 +1217,10 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart( auto part_size_str = res.size_of_part ? formatReadableSizeWithBinarySuffix(*res.size_of_part) : "failed to calculate size"; LOG_ERROR(log, - "Detaching broken part {}{} (size: {}). " + "Detaching broken part {} (size: {}). " "If it happened after update, it is likely because of backward incompatibility. " "You need to resolve this manually", - getFullPathOnDisk(part_disk_ptr), part_name, part_size_str); + fs::path(getFullPathOnDisk(part_disk_ptr)) / part_name, part_size_str); }; try @@ -2328,7 +2328,7 @@ size_t MergeTreeData::clearOldPartsFromFilesystem(bool force) removePartsFinally(parts_to_remove); /// This is needed to close files to avoid they reside on disk after being deleted. /// NOTE: we can drop files from cache more selectively but this is good enough. - getContext()->dropMMappedFileCache(); + getContext()->clearMMappedFileCache(); return parts_to_remove.size(); } @@ -2619,8 +2619,50 @@ size_t MergeTreeData::clearOldBrokenPartsFromDetachedDirectory() if (detached_parts.empty()) return 0; - PartsTemporaryRename renamed_parts(*this, "detached/"); + auto get_last_touched_time = [&](const DetachedPartInfo & part_info) -> time_t + { + auto path = fs::path(relative_data_path) / "detached" / part_info.dir_name; + time_t last_change_time = part_info.disk->getLastChanged(path); + time_t last_modification_time = part_info.disk->getLastModified(path).epochTime(); + return std::max(last_change_time, last_modification_time); + }; + time_t ttl_seconds = getSettings()->merge_tree_clear_old_broken_detached_parts_ttl_timeout_seconds; + + size_t unfinished_deleting_parts = 0; + time_t current_time = time(nullptr); + for (const auto & part_info : detached_parts) + { + if (!part_info.dir_name.starts_with("deleting_")) + continue; + + time_t startup_time = current_time - static_cast(Context::getGlobalContextInstance()->getUptimeSeconds()); + time_t last_touch_time = get_last_touched_time(part_info); + + /// Maybe it's being deleted right now (for example, in ALTER DROP DETACHED) + bool had_restart = last_touch_time < startup_time; + bool ttl_expired = last_touch_time + ttl_seconds <= current_time; + if (!had_restart && !ttl_expired) + continue; + + /// We were trying to delete this detached part but did not finish deleting, probably because the server crashed + LOG_INFO(log, "Removing detached part {} that we failed to remove previously", part_info.dir_name); + try + { + removeDetachedPart(part_info.disk, fs::path(relative_data_path) / "detached" / part_info.dir_name / "", part_info.dir_name); + ++unfinished_deleting_parts; + } + catch (...) + { + tryLogCurrentException(log); + } + } + + if (!getSettings()->merge_tree_enable_clear_old_broken_detached) + return unfinished_deleting_parts; + + const auto full_path = fs::path(relative_data_path) / "detached"; + size_t removed_count = 0; for (const auto & part_info : detached_parts) { if (!part_info.valid_name || part_info.prefix.empty()) @@ -2635,31 +2677,24 @@ size_t MergeTreeData::clearOldBrokenPartsFromDetachedDirectory() if (!can_be_removed_by_timeout) continue; - time_t current_time = time(nullptr); - ssize_t threshold = current_time - getSettings()->merge_tree_clear_old_broken_detached_parts_ttl_timeout_seconds; - auto path = fs::path(relative_data_path) / "detached" / part_info.dir_name; - time_t last_change_time = part_info.disk->getLastChanged(path); - time_t last_modification_time = part_info.disk->getLastModified(path).epochTime(); - time_t last_touch_time = std::max(last_change_time, last_modification_time); + ssize_t threshold = current_time - ttl_seconds; + time_t last_touch_time = get_last_touched_time(part_info); if (last_touch_time == 0 || last_touch_time >= threshold) continue; - renamed_parts.addPart(part_info.dir_name, "deleting_" + part_info.dir_name, part_info.disk); - } + const String & old_name = part_info.dir_name; + String new_name = "deleting_" + part_info.dir_name; + part_info.disk->moveFile(fs::path(full_path) / old_name, fs::path(full_path) / new_name); - LOG_INFO(log, "Will clean up {} detached parts", renamed_parts.old_and_new_names.size()); - - renamed_parts.tryRenameAll(); - - for (auto & [old_name, new_name, disk] : renamed_parts.old_and_new_names) - { - removeDetachedPart(disk, fs::path(relative_data_path) / "detached" / new_name / "", old_name); + removeDetachedPart(part_info.disk, fs::path(relative_data_path) / "detached" / new_name / "", old_name); LOG_WARNING(log, "Removed broken detached part {} due to a timeout for broken detached parts", old_name); - old_name.clear(); + ++removed_count; } - return renamed_parts.old_and_new_names.size(); + LOG_INFO(log, "Cleaned up {} detached parts", removed_count); + + return removed_count + unfinished_deleting_parts; } size_t MergeTreeData::clearOldWriteAheadLogs() @@ -2799,7 +2834,7 @@ void MergeTreeData::rename(const String & new_table_path, const StorageID & new_ } if (!getStorageID().hasUUID()) - getContext()->dropCaches(); + getContext()->clearCaches(); /// TODO: remove const_cast for (const auto & part : data_parts_by_info) @@ -2840,9 +2875,9 @@ void MergeTreeData::dropAllData() } /// Tables in atomic databases have UUID and stored in persistent locations. - /// No need to drop caches (that are keyed by filesystem path) because collision is not possible. + /// No need to clear caches (that are keyed by filesystem path) because collision is not possible. if (!getStorageID().hasUUID()) - getContext()->dropCaches(); + getContext()->clearCaches(); /// Removing of each data part before recursive removal of directory is to speed-up removal, because there will be less number of syscalls. NameSet part_names_failed; @@ -3166,6 +3201,10 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context } } + if (command.type == AlterCommand::MODIFY_QUERY) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "ALTER MODIFY QUERY is not supported by MergeTree engines family"); + if (command.type == AlterCommand::MODIFY_ORDER_BY && !is_custom_partitioned) { throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -4031,7 +4070,7 @@ void MergeTreeData::restoreAndActivatePart(const DataPartPtr & part, DataPartsLo void MergeTreeData::outdateUnexpectedPartAndCloneToDetached(const DataPartPtr & part_to_detach) { LOG_INFO(log, "Cloning part {} to unexpected_{} and making it obsolete.", part_to_detach->getDataPartStorage().getPartDirectory(), part_to_detach->name); - part_to_detach->makeCloneInDetached("unexpected", getInMemoryMetadataPtr()); + part_to_detach->makeCloneInDetached("unexpected", getInMemoryMetadataPtr(), /*disk_transaction*/ {}); DataPartsLock lock = lockParts(); part_to_detach->is_unexpected_local_part = true; @@ -5148,12 +5187,12 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts( const DataPartsVector & data_parts, const String & data_path_in_backup, const BackupSettings & backup_settings, + const ReadSettings & read_settings, const ContextPtr & local_context) { MergeTreeData::PartsBackupEntries res; std::map> temp_dirs; TableLockHolder table_lock; - ReadSettings read_settings = local_context->getBackupReadSettings(); for (const auto & part : data_parts) { @@ -5187,6 +5226,7 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts( part->getFileNamesWithoutChecksums(), data_path_in_backup, backup_settings, + read_settings, make_temporary_hard_links, backup_entries_from_part, &temp_dirs); @@ -5199,6 +5239,7 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts( projection_part->getFileNamesWithoutChecksums(), fs::path{data_path_in_backup} / part->name, backup_settings, + read_settings, make_temporary_hard_links, backup_entries_from_part, &temp_dirs); @@ -5266,7 +5307,7 @@ public: auto it = temp_dirs.find(disk); if (it == temp_dirs.end()) it = temp_dirs.emplace(disk, std::make_shared(disk, "tmp/")).first; - return it->second->getPath(); + return it->second->getRelativePath(); } private: @@ -5791,18 +5832,21 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const { const String source_dir = "detached/"; - std::map name_to_disk; - /// Let's compose a list of parts that should be added. if (attach_part) { const String part_id = partition->as().value.safeGet(); validateDetachedPartName(part_id); - auto disk = getDiskForDetachedPart(part_id); - renamed_parts.addPart(part_id, "attaching_" + part_id, disk); - - if (MergeTreePartInfo::tryParsePartName(part_id, format_version)) - name_to_disk[part_id] = getDiskForDetachedPart(part_id); + if (temporary_parts.contains(String(DETACHED_DIR_NAME) + "/" + part_id)) + { + LOG_WARNING(log, "Will not try to attach part {} because its directory is temporary, " + "probably it's being detached right now", part_id); + } + else + { + auto disk = getDiskForDetachedPart(part_id); + renamed_parts.addPart(part_id, "attaching_" + part_id, disk); + } } else { @@ -5819,6 +5863,12 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const for (const auto & part_info : detached_parts) { + if (temporary_parts.contains(String(DETACHED_DIR_NAME) + "/" + part_info.dir_name)) + { + LOG_WARNING(log, "Will not try to attach part {} because its directory is temporary, " + "probably it's being detached right now", part_info.dir_name); + continue; + } LOG_DEBUG(log, "Found part {}", part_info.dir_name); active_parts.add(part_info.dir_name); } @@ -5829,6 +5879,8 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const for (const auto & part_info : detached_parts) { const String containing_part = active_parts.getContainingPart(part_info.dir_name); + if (containing_part.empty()) + continue; LOG_DEBUG(log, "Found containing part {} for part {}", containing_part, part_info.dir_name); @@ -8429,7 +8481,7 @@ void MergeTreeData::incrementMergedPartsProfileEvent(MergeTreeDataPartType type) } } -MergeTreeData::MutableDataPartPtr MergeTreeData::createEmptyPart( +std::pair MergeTreeData::createEmptyPart( MergeTreePartInfo & new_part_info, const MergeTreePartition & partition, const String & new_part_name, const MergeTreeTransactionPtr & txn) { @@ -8448,6 +8500,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::createEmptyPart( ReservationPtr reservation = reserveSpacePreferringTTLRules(metadata_snapshot, 0, move_ttl_infos, time(nullptr), 0, true); VolumePtr data_part_volume = createVolumeFromReservation(reservation, volume); + auto tmp_dir_holder = getTemporaryPartDirectoryHolder(EMPTY_PART_TMP_PREFIX + new_part_name); auto new_data_part = getDataPartBuilder(new_part_name, data_part_volume, EMPTY_PART_TMP_PREFIX + new_part_name) .withBytesAndRowsOnDisk(0, 0) .withPartInfo(new_part_info) @@ -8507,7 +8560,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::createEmptyPart( out.finalizePart(new_data_part, sync_on_insert); new_data_part_storage->precommitTransaction(); - return new_data_part; + return std::make_pair(std::move(new_data_part), std::move(tmp_dir_holder)); } bool MergeTreeData::allowRemoveStaleMovingParts() const diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 5e6b043c31c..e4801cffa36 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -936,7 +936,9 @@ public: WriteAheadLogPtr getWriteAheadLog(); constexpr static auto EMPTY_PART_TMP_PREFIX = "tmp_empty_"; - MergeTreeData::MutableDataPartPtr createEmptyPart(MergeTreePartInfo & new_part_info, const MergeTreePartition & partition, const String & new_part_name, const MergeTreeTransactionPtr & txn); + std::pair createEmptyPart( + MergeTreePartInfo & new_part_info, const MergeTreePartition & partition, + const String & new_part_name, const MergeTreeTransactionPtr & txn); MergeTreeDataFormatVersion format_version; @@ -1336,7 +1338,7 @@ protected: using PartsBackupEntries = std::vector; /// Makes backup entries to backup the parts of this table. - PartsBackupEntries backupParts(const DataPartsVector & data_parts, const String & data_path_in_backup, const BackupSettings & backup_settings, const ContextPtr & local_context); + PartsBackupEntries backupParts(const DataPartsVector & data_parts, const String & data_path_in_backup, const BackupSettings & backup_settings, const ReadSettings & read_settings, const ContextPtr & local_context); class RestoredPartsHolder; diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index 6628cd68eaf..b4d405312e0 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -187,15 +187,15 @@ bool MergeTreeDataPartChecksums::readV3(ReadBuffer & in) String name; Checksum sum; - readBinary(name, in); + readStringBinary(name, in); readVarUInt(sum.file_size, in); - readPODBinary(sum.file_hash, in); - readBinary(sum.is_compressed, in); + readBinaryLittleEndian(sum.file_hash, in); + readBinaryLittleEndian(sum.is_compressed, in); if (sum.is_compressed) { readVarUInt(sum.uncompressed_size, in); - readPODBinary(sum.uncompressed_hash, in); + readBinaryLittleEndian(sum.uncompressed_hash, in); } files.emplace(std::move(name), sum); @@ -223,15 +223,15 @@ void MergeTreeDataPartChecksums::write(WriteBuffer & to) const const String & name = it.first; const Checksum & sum = it.second; - writeBinary(name, out); + writeStringBinary(name, out); writeVarUInt(sum.file_size, out); - writePODBinary(sum.file_hash, out); - writeBinary(sum.is_compressed, out); + writeBinaryLittleEndian(sum.file_hash, out); + writeBinaryLittleEndian(sum.is_compressed, out); if (sum.is_compressed) { writeVarUInt(sum.uncompressed_size, out); - writePODBinary(sum.uncompressed_hash, out); + writeBinaryLittleEndian(sum.uncompressed_hash, out); } } } @@ -323,9 +323,7 @@ MergeTreeDataPartChecksums::Checksum::uint128 MergeTreeDataPartChecksums::getTot hash_of_all_files.update(checksum.file_hash); } - MergeTreeDataPartChecksums::Checksum::uint128 ret; - hash_of_all_files.get128(reinterpret_cast(&ret)); - return ret; + return getSipHash128AsPair(hash_of_all_files); } void MinimalisticDataPartChecksums::serialize(WriteBuffer & to) const @@ -339,9 +337,9 @@ void MinimalisticDataPartChecksums::serializeWithoutHeader(WriteBuffer & to) con writeVarUInt(num_compressed_files, to); writeVarUInt(num_uncompressed_files, to); - writePODBinary(hash_of_all_files, to); - writePODBinary(hash_of_uncompressed_files, to); - writePODBinary(uncompressed_hash_of_compressed_files, to); + writeBinaryLittleEndian(hash_of_all_files, to); + writeBinaryLittleEndian(hash_of_uncompressed_files, to); + writeBinaryLittleEndian(uncompressed_hash_of_compressed_files, to); } String MinimalisticDataPartChecksums::getSerializedString() const @@ -382,9 +380,9 @@ void MinimalisticDataPartChecksums::deserializeWithoutHeader(ReadBuffer & in) readVarUInt(num_compressed_files, in); readVarUInt(num_uncompressed_files, in); - readPODBinary(hash_of_all_files, in); - readPODBinary(hash_of_uncompressed_files, in); - readPODBinary(uncompressed_hash_of_compressed_files, in); + readBinaryLittleEndian(hash_of_all_files, in); + readBinaryLittleEndian(hash_of_uncompressed_files, in); + readBinaryLittleEndian(uncompressed_hash_of_compressed_files, in); } void MinimalisticDataPartChecksums::computeTotalChecksums(const MergeTreeDataPartChecksums & full_checksums_) @@ -415,14 +413,9 @@ void MinimalisticDataPartChecksums::computeTotalChecksums(const MergeTreeDataPar } } - auto get_hash = [] (SipHash & hash, uint128 & data) - { - hash.get128(data); - }; - - get_hash(hash_of_all_files_state, hash_of_all_files); - get_hash(hash_of_uncompressed_files_state, hash_of_uncompressed_files); - get_hash(uncompressed_hash_of_compressed_files_state, uncompressed_hash_of_compressed_files); + hash_of_all_files = getSipHash128AsPair(hash_of_all_files_state); + hash_of_uncompressed_files = getSipHash128AsPair(hash_of_uncompressed_files_state); + uncompressed_hash_of_compressed_files = getSipHash128AsPair(uncompressed_hash_of_compressed_files_state); } String MinimalisticDataPartChecksums::getSerializedString(const MergeTreeDataPartChecksums & full_checksums, bool minimalistic) diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 9c47608e364..c6d059498ff 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -30,7 +30,7 @@ MergeTreeDataPartCompact::MergeTreeDataPartCompact( IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader( const NamesAndTypesList & columns_to_read, - const StorageMetadataPtr & metadata_snapshot, + const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, @@ -43,7 +43,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader( auto * load_marks_threadpool = reader_settings.read_settings.load_marks_asynchronously ? &read_info->getContext()->getLoadMarksThreadpool() : nullptr; return std::make_unique( - read_info, columns_to_read, metadata_snapshot, uncompressed_cache, + read_info, columns_to_read, storage_snapshot, uncompressed_cache, mark_cache, mark_ranges, reader_settings, load_marks_threadpool, avg_value_size_hints, profile_callback); } @@ -115,7 +115,7 @@ void MergeTreeDataPartCompact::loadIndexGranularityImpl( { marks_reader->ignore(columns_count * sizeof(MarkInCompressedFile)); size_t granularity; - readIntBinary(granularity, *marks_reader); + readBinaryLittleEndian(granularity, *marks_reader); index_granularity_.appendMark(granularity); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h index 08764eedb43..2bbac766c8e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h @@ -30,7 +30,7 @@ public: MergeTreeReaderPtr getReader( const NamesAndTypesList & columns, - const StorageMetadataPtr & metadata_snapshot, + const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index 468747a6c36..f04e08838a9 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -17,6 +17,7 @@ namespace DB namespace ErrorCodes { extern const int DIRECTORY_ALREADY_EXISTS; + extern const int NOT_IMPLEMENTED; } MergeTreeDataPartInMemory::MergeTreeDataPartInMemory( @@ -32,7 +33,7 @@ MergeTreeDataPartInMemory::MergeTreeDataPartInMemory( IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartInMemory::getReader( const NamesAndTypesList & columns_to_read, - const StorageMetadataPtr & metadata_snapshot, + const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, UncompressedCache * /* uncompressed_cache */, MarkCache * /* mark_cache */, @@ -45,7 +46,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartInMemory::getReader( auto ptr = std::static_pointer_cast(shared_from_this()); return std::make_unique( - read_info, ptr, columns_to_read, metadata_snapshot, mark_ranges, reader_settings); + read_info, ptr, columns_to_read, storage_snapshot, mark_ranges, reader_settings); } IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartInMemory::getWriter( @@ -138,8 +139,12 @@ MutableDataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String & return new_data_part_storage; } -DataPartStoragePtr MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const +DataPartStoragePtr MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix, + const StorageMetadataPtr & metadata_snapshot, + const DiskTransactionPtr & disk_transaction) const { + if (disk_transaction) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "InMemory parts are not compatible with disk transactions"); String detached_path = *getRelativePathForDetachedPart(prefix, /* broken */ false); return flushToDisk(detached_path, metadata_snapshot); } @@ -167,7 +172,7 @@ IMergeTreeDataPart::Checksum MergeTreeDataPartInMemory::calculateBlockChecksum() column.column->updateHashFast(hash); checksum.uncompressed_size = block.bytes(); - hash.get128(checksum.uncompressed_hash); + checksum.uncompressed_hash = getSipHash128AsPair(hash); return checksum; } diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h index 2698b69b38e..95f7b796f9a 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -19,7 +19,7 @@ public: MergeTreeReaderPtr getReader( const NamesAndTypesList & columns, - const StorageMetadataPtr & metadata_snapshot, + const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, @@ -42,7 +42,8 @@ public: bool hasColumnFiles(const NameAndTypePair & column) const override { return !!getColumnPosition(column.getNameInStorage()); } String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; } void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) override; - DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const override; + DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot, + const DiskTransactionPtr & disk_transaction) const override; std::optional getColumnModificationTime(const String & /* column_name */) const override { return {}; } MutableDataPartStoragePtr flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index 2d886e2058b..20600909ce4 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -29,7 +29,7 @@ MergeTreeDataPartWide::MergeTreeDataPartWide( IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader( const NamesAndTypesList & columns_to_read, - const StorageMetadataPtr & metadata_snapshot, + const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, @@ -41,7 +41,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader( auto read_info = std::make_shared(shared_from_this(), alter_conversions); return std::make_unique( read_info, columns_to_read, - metadata_snapshot, uncompressed_cache, + storage_snapshot, uncompressed_cache, mark_cache, mark_ranges, reader_settings, avg_value_size_hints, profile_callback); } @@ -130,13 +130,13 @@ void MergeTreeDataPartWide::loadIndexGranularityImpl( MarkInCompressedFile mark; size_t granularity; - readBinary(mark.offset_in_compressed_file, *marks_reader); - readBinary(mark.offset_in_decompressed_block, *marks_reader); + readBinaryLittleEndian(mark.offset_in_compressed_file, *marks_reader); + readBinaryLittleEndian(mark.offset_in_decompressed_block, *marks_reader); ++marks_count; if (index_granularity_info_.mark_type.adaptive) { - readIntBinary(granularity, *marks_reader); + readBinaryLittleEndian(granularity, *marks_reader); index_granularity_.appendMark(granularity); } } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h index 0b2ffeb4b18..2076a1ec028 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h @@ -25,7 +25,7 @@ public: MergeTreeReaderPtr getReader( const NamesAndTypesList & columns, - const StorageMetadataPtr & metadata_snapshot, + const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 5e1da21da5b..3475130bf24 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -228,8 +228,8 @@ void MergeTreeDataPartWriterCompact::writeDataBlock(const Block & block, const G }; - writeIntBinary(plain_hashing.count(), marks_out); - writeIntBinary(static_cast(0), marks_out); + writeBinaryLittleEndian(plain_hashing.count(), marks_out); + writeBinaryLittleEndian(static_cast(0), marks_out); writeColumnSingleGranule( block.getByName(name_and_type->name), data_part->getSerialization(name_and_type->name), @@ -239,7 +239,7 @@ void MergeTreeDataPartWriterCompact::writeDataBlock(const Block & block, const G prev_stream->hashing_buf.next(); } - writeIntBinary(granule.rows_to_write, marks_out); + writeBinaryLittleEndian(granule.rows_to_write, marks_out); } } @@ -270,10 +270,10 @@ void MergeTreeDataPartWriterCompact::fillDataChecksums(IMergeTreeDataPart::Check { for (size_t i = 0; i < columns_list.size(); ++i) { - writeIntBinary(plain_hashing.count(), marks_out); - writeIntBinary(static_cast(0), marks_out); + writeBinaryLittleEndian(plain_hashing.count(), marks_out); + writeBinaryLittleEndian(static_cast(0), marks_out); } - writeIntBinary(static_cast(0), marks_out); + writeBinaryLittleEndian(static_cast(0), marks_out); } for (const auto & [_, stream] : streams_by_codec) @@ -365,8 +365,9 @@ void MergeTreeDataPartWriterCompact::addToChecksums(MergeTreeDataPartChecksums & { uncompressed_size += stream->hashing_buf.count(); auto stream_hash = stream->hashing_buf.getHash(); + transformEndianness(stream_hash); uncompressed_hash = CityHash_v1_0_2::CityHash128WithSeed( - reinterpret_cast(&stream_hash), sizeof(stream_hash), uncompressed_hash); + reinterpret_cast(&stream_hash), sizeof(stream_hash), uncompressed_hash); } checksums.files[data_file_name].is_compressed = true; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index f57ffa5ee14..79b72d4ae39 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -313,13 +313,13 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializeSkipIndices(const Block if (stream.compressed_hashing.offset() >= settings.min_compress_block_size) stream.compressed_hashing.next(); - writeIntBinary(stream.plain_hashing.count(), marks_out); - writeIntBinary(stream.compressed_hashing.offset(), marks_out); + writeBinaryLittleEndian(stream.plain_hashing.count(), marks_out); + writeBinaryLittleEndian(stream.compressed_hashing.offset(), marks_out); /// Actually this numbers is redundant, but we have to store them /// to be compatible with the normal .mrk2 file format if (settings.can_use_adaptive_granularity) - writeIntBinary(1UL, marks_out); + writeBinaryLittleEndian(1UL, marks_out); } size_t pos = granule.start_row; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index f9fe6f2c8ab..bcf340e0f55 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -275,10 +275,10 @@ void MergeTreeDataPartWriterWide::flushMarkToFile(const StreamNameAndMark & stre Stream & stream = *column_streams[stream_with_mark.stream_name]; WriteBuffer & marks_out = stream.compress_marks ? stream.marks_compressed_hashing : stream.marks_hashing; - writeIntBinary(stream_with_mark.mark.offset_in_compressed_file, marks_out); - writeIntBinary(stream_with_mark.mark.offset_in_decompressed_block, marks_out); + writeBinaryLittleEndian(stream_with_mark.mark.offset_in_compressed_file, marks_out); + writeBinaryLittleEndian(stream_with_mark.mark.offset_in_decompressed_block, marks_out); if (settings.can_use_adaptive_granularity) - writeIntBinary(rows_in_mark, marks_out); + writeBinaryLittleEndian(rows_in_mark, marks_out); } StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn( @@ -452,10 +452,10 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai "Incorrect number of marks in memory {}, on disk (at least) {}", index_granularity.getMarksCount(), mark_num + 1); - DB::readBinary(offset_in_compressed_file, *mrk_in); - DB::readBinary(offset_in_decompressed_block, *mrk_in); + readBinaryLittleEndian(offset_in_compressed_file, *mrk_in); + readBinaryLittleEndian(offset_in_decompressed_block, *mrk_in); if (settings.can_use_adaptive_granularity) - DB::readBinary(index_granularity_rows, *mrk_in); + readBinaryLittleEndian(index_granularity_rows, *mrk_in); else index_granularity_rows = data_part->index_granularity_info.fixed_index_granularity; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index ee515106591..d4374611757 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1031,6 +1031,10 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd }; size_t num_threads = std::min(num_streams, parts.size()); + if (settings.max_threads_for_indexes) + { + num_threads = std::min(num_streams, settings.max_threads_for_indexes); + } if (num_threads <= 1) { diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index ea5d64212f5..d6ba7cb3093 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -39,6 +39,7 @@ namespace ProfileEvents extern const Event MergeTreeDataProjectionWriterRows; extern const Event MergeTreeDataProjectionWriterUncompressedBytes; extern const Event MergeTreeDataProjectionWriterCompressedBytes; + extern const Event RejectedInserts; } namespace DB @@ -58,7 +59,8 @@ void buildScatterSelector( const ColumnRawPtrs & columns, PODArray & partition_num_to_first_row, IColumn::Selector & selector, - size_t max_parts) + size_t max_parts, + ContextPtr context) { /// Use generic hashed variant since partitioning is unlikely to be a bottleneck. using Data = HashMap; @@ -66,6 +68,8 @@ void buildScatterSelector( size_t num_rows = columns[0]->size(); size_t partitions_count = 0; + size_t throw_on_limit = context->getSettingsRef().throw_on_max_partitions_per_insert_block; + for (size_t i = 0; i < num_rows; ++i) { Data::key_type key = hash128(i, columns.size(), columns); @@ -75,7 +79,9 @@ void buildScatterSelector( if (inserted) { - if (max_parts && partitions_count >= max_parts) + if (max_parts && partitions_count >= max_parts && throw_on_limit) + { + ProfileEvents::increment(ProfileEvents::RejectedInserts); throw Exception(ErrorCodes::TOO_MANY_PARTS, "Too many partitions for single INSERT block (more than {}). " "The limit is controlled by 'max_partitions_per_insert_block' setting. " @@ -85,6 +91,7 @@ void buildScatterSelector( "for a table is under 1000..10000. Please note, that partitioning is not intended " "to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). " "Partitions are intended for data manipulation (DROP PARTITION, etc).", max_parts); + } partition_num_to_first_row.push_back(i); it->getMapped() = partitions_count; @@ -102,6 +109,18 @@ void buildScatterSelector( if (partitions_count > 1) selector[i] = it->getMapped(); } + // Checking partitions per insert block again here outside the loop above + // so we can log the total number of partitions that would have parts created + if (max_parts && partitions_count >= max_parts && !throw_on_limit) + { + const auto & client_info = context->getClientInfo(); + Poco::Logger * log = &Poco::Logger::get("MergeTreeDataWriter"); + + LOG_WARNING(log, "INSERT query from initial_user {} (query ID: {}) inserted a block " + "that created parts in {} partitions. This is being logged " + "rather than throwing an exception as throw_on_max_partitions_per_insert_block=false.", + client_info.initial_user, client_info.initial_query_id, partitions_count); + } } /// Computes ttls and updates ttl infos @@ -240,7 +259,7 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts( PODArray partition_num_to_first_row; IColumn::Selector selector; - buildScatterSelector(partition_columns, partition_num_to_first_row, selector, max_parts); + buildScatterSelector(partition_columns, partition_num_to_first_row, selector, max_parts, context); auto async_insert_info_with_partition = scatterAsyncInsertInfoBySelector(async_insert_info, selector, partition_num_to_first_row.size()); diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp index f77cfe4fed0..13577229a75 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp @@ -25,7 +25,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } - template AnnoyIndexWithSerialization::AnnoyIndexWithSerialization(size_t dimensions) : Base::AnnoyIndex(dimensions) @@ -224,9 +223,9 @@ bool MergeTreeIndexConditionAnnoy::alwaysUnknownOrTrue() const std::vector MergeTreeIndexConditionAnnoy::getUsefulRanges(MergeTreeIndexGranulePtr idx_granule) const { - if (distance_function == "L2Distance") + if (distance_function == DISTANCE_FUNCTION_L2) return getUsefulRangesImpl(idx_granule); - else if (distance_function == "cosineDistance") + else if (distance_function == DISTANCE_FUNCTION_COSINE) return getUsefulRangesImpl(idx_granule); std::unreachable(); } @@ -289,9 +288,9 @@ MergeTreeIndexAnnoy::MergeTreeIndexAnnoy(const IndexDescription & index_, UInt64 MergeTreeIndexGranulePtr MergeTreeIndexAnnoy::createIndexGranule() const { - if (distance_function == "L2Distance") + if (distance_function == DISTANCE_FUNCTION_L2) return std::make_shared>(index.name, index.sample_block); - else if (distance_function == "cosineDistance") + else if (distance_function == DISTANCE_FUNCTION_COSINE) return std::make_shared>(index.name, index.sample_block); std::unreachable(); } @@ -299,9 +298,9 @@ MergeTreeIndexGranulePtr MergeTreeIndexAnnoy::createIndexGranule() const MergeTreeIndexAggregatorPtr MergeTreeIndexAnnoy::createIndexAggregator() const { /// TODO: Support more metrics. Available metrics: https://github.com/spotify/annoy/blob/master/src/annoymodule.cc#L151-L171 - if (distance_function == "L2Distance") + if (distance_function == DISTANCE_FUNCTION_L2) return std::make_shared>(index.name, index.sample_block, trees); - else if (distance_function == "cosineDistance") + else if (distance_function == DISTANCE_FUNCTION_COSINE) return std::make_shared>(index.name, index.sample_block, trees); std::unreachable(); } @@ -313,14 +312,13 @@ MergeTreeIndexConditionPtr MergeTreeIndexAnnoy::createIndexCondition(const Selec MergeTreeIndexPtr annoyIndexCreator(const IndexDescription & index) { - static constexpr auto default_trees = 100uz; - static constexpr auto default_distance_function = "L2Distance"; - - String distance_function = default_distance_function; + static constexpr auto DEFAULT_DISTANCE_FUNCTION = DISTANCE_FUNCTION_L2; + String distance_function = DEFAULT_DISTANCE_FUNCTION; if (!index.arguments.empty()) distance_function = index.arguments[0].get(); - UInt64 trees = default_trees; + static constexpr auto DEFAULT_TREES = 100uz; + UInt64 trees = DEFAULT_TREES; if (index.arguments.size() > 1) trees = index.arguments[1].get(); @@ -350,8 +348,8 @@ void annoyIndexValidator(const IndexDescription & index, bool /* attach */) if (!index.arguments.empty()) { String distance_name = index.arguments[0].get(); - if (distance_name != "L2Distance" && distance_name != "cosineDistance") - throw Exception(ErrorCodes::INCORRECT_DATA, "Annoy index only supports distance functions 'L2Distance' and 'cosineDistance'"); + if (distance_name != DISTANCE_FUNCTION_L2 && distance_name != DISTANCE_FUNCTION_COSINE) + throw Exception(ErrorCodes::INCORRECT_DATA, "Annoy index only supports distance functions '{}' and '{}'", DISTANCE_FUNCTION_L2, DISTANCE_FUNCTION_COSINE); } /// Check data type of indexed column: diff --git a/src/Storages/MergeTree/MergeTreeIndexInverted.cpp b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp index 6b4919c545d..325df6ffb6f 100644 --- a/src/Storages/MergeTree/MergeTreeIndexInverted.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp @@ -491,6 +491,10 @@ bool MergeTreeConditionInverted::traverseASTEquals( DataTypePtr const_type; if (argument.tryGetConstant(const_value, const_type)) { + auto const_data_type = WhichDataType(const_type); + if (!const_data_type.isStringOrFixedString() && !const_data_type.isArray()) + return false; + key_column_num = header.getPositionByName(map_keys_index_column_name); key_exists = true; } diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 05ea7d15603..9868d4f37f3 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -256,10 +256,6 @@ MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( if (!key_columns.contains(name)) key_columns.insert(name); - ASTPtr ast_filter_node = buildFilterNode(query_info.query); - if (!ast_filter_node) - return; - if (context->getSettingsRef().allow_experimental_analyzer) { if (!query_info.filter_actions_dag) @@ -280,6 +276,10 @@ MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( } else { + ASTPtr ast_filter_node = buildFilterNode(query_info.query); + if (!ast_filter_node) + return; + if (checkASTUseless(ast_filter_node)) return; @@ -457,11 +457,10 @@ const ActionsDAG::Node * MergeTreeIndexConditionSet::operatorFromDAG(const Actio if (arguments_size != 1) return nullptr; - auto bit_wrapper_function = FunctionFactory::instance().get("__bitWrapperFunc", context); - const auto & bit_wrapper_func_node = result_dag->addFunction(bit_wrapper_function, {arguments[0]}, {}); + const ActionsDAG::Node * argument = &traverseDAG(*arguments[0], result_dag, context, node_to_result_node); auto bit_swap_last_two_function = FunctionFactory::instance().get("__bitSwapLastTwo", context); - return &result_dag->addFunction(bit_swap_last_two_function, {&bit_wrapper_func_node}, {}); + return &result_dag->addFunction(bit_swap_last_two_function, {argument}, {}); } else if (function_name == "and" || function_name == "indexHint" || function_name == "or") { @@ -615,6 +614,9 @@ bool MergeTreeIndexConditionSet::operatorFromAST(ASTPtr & node) } else if (func->name == "and" || func->name == "indexHint") { + if (args.size() < 2) + return false; + auto last_arg = args.back(); args.pop_back(); @@ -634,6 +636,9 @@ bool MergeTreeIndexConditionSet::operatorFromAST(ASTPtr & node) } else if (func->name == "or") { + if (args.size() < 2) + return false; + auto last_arg = args.back(); args.pop_back(); diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp new file mode 100644 index 00000000000..d2433517766 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp @@ -0,0 +1,377 @@ +#ifdef ENABLE_USEARCH + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wpass-failed" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_ALLOCATE_MEMORY; + extern const int ILLEGAL_COLUMN; + extern const int INCORRECT_DATA; + extern const int INCORRECT_NUMBER_OF_COLUMNS; + extern const int INCORRECT_QUERY; + extern const int LOGICAL_ERROR; +} + +template +USearchIndexWithSerialization::USearchIndexWithSerialization(size_t dimensions) + : Base(Base::make(unum::usearch::metric_punned_t(dimensions, Metric))) +{ +} + +template +void USearchIndexWithSerialization::serialize([[maybe_unused]] WriteBuffer & ostr) const +{ + auto callback = [&ostr](void * from, size_t n) + { + ostr.write(reinterpret_cast(from), n); + return true; + }; + + Base::stream(callback); +} + +template +void USearchIndexWithSerialization::deserialize([[maybe_unused]] ReadBuffer & istr) +{ + BufferBase::Position & pos = istr.position(); + unum::usearch::memory_mapped_file_t memory_map(pos, istr.buffer().size() - istr.count()); + Base::view(std::move(memory_map)); + pos += Base::stream_length(); + + auto copy = Base::copy(); + if (!copy) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not copy usearch index"); + Base::swap(copy.index); +} + +template +size_t USearchIndexWithSerialization::getDimensions() const +{ + return Base::dimensions(); +} + +template +MergeTreeIndexGranuleUSearch::MergeTreeIndexGranuleUSearch( + const String & index_name_, + const Block & index_sample_block_) + : index_name(index_name_) + , index_sample_block(index_sample_block_) + , index(nullptr) +{ +} + +template +MergeTreeIndexGranuleUSearch::MergeTreeIndexGranuleUSearch( + const String & index_name_, + const Block & index_sample_block_, + USearchIndexWithSerializationPtr index_) + : index_name(index_name_) + , index_sample_block(index_sample_block_) + , index(std::move(index_)) +{ +} + +template +void MergeTreeIndexGranuleUSearch::serializeBinary(WriteBuffer & ostr) const +{ + /// Number of dimensions is required in the index constructor, + /// so it must be written and read separately from the other part + writeIntBinary(static_cast(index->getDimensions()), ostr); // write dimension + index->serialize(ostr); +} + +template +void MergeTreeIndexGranuleUSearch::deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion /*version*/) +{ + UInt64 dimension; + readIntBinary(dimension, istr); + index = std::make_shared>(dimension); + index->deserialize(istr); +} + +template +MergeTreeIndexAggregatorUSearch::MergeTreeIndexAggregatorUSearch( + const String & index_name_, + const Block & index_sample_block_) + : index_name(index_name_) + , index_sample_block(index_sample_block_) +{ +} + +template +MergeTreeIndexGranulePtr MergeTreeIndexAggregatorUSearch::getGranuleAndReset() +{ + auto granule = std::make_shared>(index_name, index_sample_block, index); + index = nullptr; + return granule; +} + +template +void MergeTreeIndexAggregatorUSearch::update(const Block & block, size_t * pos, size_t limit) +{ + if (*pos >= block.rows()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "The provided position is not less than the number of block rows. Position: {}, Block rows: {}.", + *pos, + block.rows()); + + size_t rows_read = std::min(limit, block.rows() - *pos); + if (rows_read == 0) + return; + + if (index_sample_block.columns() > 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected block with single column"); + + const String & index_column_name = index_sample_block.getByPosition(0).name; + ColumnPtr column_cut = block.getByName(index_column_name).column->cut(*pos, rows_read); + + if (const auto & column_array = typeid_cast(column_cut.get())) + { + const auto & data = column_array->getData(); + const auto & array = typeid_cast(data).getData(); + + if (array.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Array has 0 rows, {} rows expected", rows_read); + + const auto & offsets = column_array->getOffsets(); + const size_t num_rows = offsets.size(); + + + /// Check all sizes are the same + size_t size = offsets[0]; + for (size_t i = 0; i < num_rows - 1; ++i) + if (offsets[i + 1] - offsets[i] != size) + throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column {} must have equal length", index_column_name); + + + index = std::make_shared>(size); + + /// Add all rows of block + if (!index->reserve(unum::usearch::ceil2(index->size() + num_rows))) + throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for usearch index"); + + if (auto rc = index->add(index->size(), array.data()); !rc) + throw Exception(ErrorCodes::INCORRECT_DATA, rc.error.release()); + for (size_t current_row = 1; current_row < num_rows; ++current_row) + if (auto rc = index->add(index->size(), &array[offsets[current_row - 1]]); !rc) + throw Exception(ErrorCodes::INCORRECT_DATA, rc.error.release()); + + } + else if (const auto & column_tuple = typeid_cast(column_cut.get())) + { + const auto & columns = column_tuple->getColumns(); + std::vector> data{column_tuple->size(), std::vector()}; + for (const auto & column : columns) + { + const auto & pod_array = typeid_cast(column.get())->getData(); + for (size_t i = 0; i < pod_array.size(); ++i) + data[i].push_back(pod_array[i]); + } + + if (data.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Tuple has 0 rows, {} rows expected", rows_read); + + index = std::make_shared>(data[0].size()); + + if (!index->reserve(unum::usearch::ceil2(index->size() + data.size()))) + throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for usearch index"); + + for (const auto & item : data) + if (auto rc = index->add(index->size(), item.data()); !rc) + throw Exception(ErrorCodes::INCORRECT_DATA, rc.error.release()); + } + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected Array or Tuple column"); + + *pos += rows_read; +} + +MergeTreeIndexConditionUSearch::MergeTreeIndexConditionUSearch( + const IndexDescription & /*index_description*/, + const SelectQueryInfo & query, + const String & distance_function_, + ContextPtr context) + : ann_condition(query, context) + , distance_function(distance_function_) +{ +} + +bool MergeTreeIndexConditionUSearch::mayBeTrueOnGranule(MergeTreeIndexGranulePtr /*idx_granule*/) const +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "mayBeTrueOnGranule is not supported for ANN skip indexes"); +} + +bool MergeTreeIndexConditionUSearch::alwaysUnknownOrTrue() const +{ + return ann_condition.alwaysUnknownOrTrue(distance_function); +} + +std::vector MergeTreeIndexConditionUSearch::getUsefulRanges(MergeTreeIndexGranulePtr idx_granule) const +{ + if (distance_function == DISTANCE_FUNCTION_L2) + return getUsefulRangesImpl(idx_granule); + else if (distance_function == DISTANCE_FUNCTION_COSINE) + return getUsefulRangesImpl(idx_granule); + std::unreachable(); +} + +template +std::vector MergeTreeIndexConditionUSearch::getUsefulRangesImpl(MergeTreeIndexGranulePtr idx_granule) const +{ + const UInt64 limit = ann_condition.getLimit(); + const UInt64 index_granularity = ann_condition.getIndexGranularity(); + const std::optional comparison_distance = ann_condition.getQueryType() == ApproximateNearestNeighborInformation::Type::Where + ? std::optional(ann_condition.getComparisonDistanceForWhereQuery()) + : std::nullopt; + + if (comparison_distance && comparison_distance.value() < 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to optimize query with where without distance"); + + const std::vector reference_vector = ann_condition.getReferenceVector(); + const auto granule = std::dynamic_pointer_cast>(idx_granule); + if (granule == nullptr) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Granule has the wrong type"); + + const USearchIndexWithSerializationPtr index = granule->index; + if (ann_condition.getDimensions() != index->dimensions()) + throw Exception( + ErrorCodes::INCORRECT_QUERY, + "The dimension of the space in the request ({}) " + "does not match the dimension in the index ({})", + ann_condition.getDimensions(), + index->dimensions()); + + auto result = index->search(reference_vector.data(), limit); + std::vector neighbors(result.size()); /// indexes of dots which were closest to the reference vector + std::vector distances(result.size()); + result.dump_to(neighbors.data(), distances.data()); + + std::vector granule_numbers; + granule_numbers.reserve(neighbors.size()); + for (size_t i = 0; i < neighbors.size(); ++i) + { + if (comparison_distance && distances[i] > comparison_distance) + continue; + granule_numbers.push_back(neighbors[i] / index_granularity); + } + + /// make unique + std::sort(granule_numbers.begin(), granule_numbers.end()); + granule_numbers.erase(std::unique(granule_numbers.begin(), granule_numbers.end()), granule_numbers.end()); + + return granule_numbers; +} + +MergeTreeIndexUSearch::MergeTreeIndexUSearch(const IndexDescription & index_, const String & distance_function_) + : IMergeTreeIndex(index_) + , distance_function(distance_function_) +{ +} + +MergeTreeIndexGranulePtr MergeTreeIndexUSearch::createIndexGranule() const +{ + if (distance_function == DISTANCE_FUNCTION_L2) + return std::make_shared>(index.name, index.sample_block); + else if (distance_function == DISTANCE_FUNCTION_COSINE) + return std::make_shared>(index.name, index.sample_block); + std::unreachable(); +} + +MergeTreeIndexAggregatorPtr MergeTreeIndexUSearch::createIndexAggregator() const +{ + if (distance_function == DISTANCE_FUNCTION_L2) + return std::make_shared>(index.name, index.sample_block); + else if (distance_function == DISTANCE_FUNCTION_COSINE) + return std::make_shared>(index.name, index.sample_block); + std::unreachable(); +} + +MergeTreeIndexConditionPtr MergeTreeIndexUSearch::createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const +{ + return std::make_shared(index, query, distance_function, context); +}; + +MergeTreeIndexPtr usearchIndexCreator(const IndexDescription & index) +{ + static constexpr auto default_distance_function = DISTANCE_FUNCTION_L2; + String distance_function = default_distance_function; + if (!index.arguments.empty()) + distance_function = index.arguments[0].get(); + + return std::make_shared(index, distance_function); +} + +void usearchIndexValidator(const IndexDescription & index, bool /* attach */) +{ + /// Check number and type of USearch index arguments: + + if (index.arguments.size() > 1) + throw Exception(ErrorCodes::INCORRECT_QUERY, "USearch index must not have more than one parameters"); + + if (!index.arguments.empty() && index.arguments[0].getType() != Field::Types::String) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Distance function argument of USearch index must be of type String"); + + /// Check that the index is created on a single column + + if (index.column_names.size() != 1 || index.data_types.size() != 1) + throw Exception(ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS, "USearch indexes must be created on a single column"); + + /// Check that a supported metric was passed as first argument + + if (!index.arguments.empty()) + { + String distance_name = index.arguments[0].get(); + if (distance_name != DISTANCE_FUNCTION_L2 && distance_name != DISTANCE_FUNCTION_COSINE) + throw Exception(ErrorCodes::INCORRECT_DATA, "USearch index only supports distance functions '{}' and '{}'", DISTANCE_FUNCTION_L2, DISTANCE_FUNCTION_COSINE); + } + + /// Check data type of indexed column: + + auto throw_unsupported_underlying_column_exception = []() + { + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, "USearch indexes can only be created on columns of type Array(Float32) and Tuple(Float32)"); + }; + + DataTypePtr data_type = index.sample_block.getDataTypes()[0]; + + if (const auto * data_type_array = typeid_cast(data_type.get())) + { + TypeIndex nested_type_index = data_type_array->getNestedType()->getTypeId(); + if (!WhichDataType(nested_type_index).isFloat32()) + throw_unsupported_underlying_column_exception(); + } + else if (const auto * data_type_tuple = typeid_cast(data_type.get())) + { + const DataTypes & inner_types = data_type_tuple->getElements(); + for (const auto & inner_type : inner_types) + { + TypeIndex nested_type_index = inner_type->getTypeId(); + if (!WhichDataType(nested_type_index).isFloat32()) + throw_unsupported_underlying_column_exception(); + } + } + else + throw_unsupported_underlying_column_exception(); +} + +} + +#endif diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.h b/src/Storages/MergeTree/MergeTreeIndexUSearch.h new file mode 100644 index 00000000000..f1fde934fd5 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.h @@ -0,0 +1,106 @@ +#pragma once + +#ifdef ENABLE_USEARCH + +#include + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wpass-failed" +#include +#pragma clang diagnostic pop + +namespace DB +{ + +template +class USearchIndexWithSerialization : public unum::usearch::index_dense_t +{ + using Base = unum::usearch::index_dense_t; + +public: + explicit USearchIndexWithSerialization(size_t dimensions); + void serialize(WriteBuffer & ostr) const; + void deserialize(ReadBuffer & istr); + size_t getDimensions() const; +}; + +template +using USearchIndexWithSerializationPtr = std::shared_ptr>; + +template +struct MergeTreeIndexGranuleUSearch final : public IMergeTreeIndexGranule +{ + MergeTreeIndexGranuleUSearch(const String & index_name_, const Block & index_sample_block_); + MergeTreeIndexGranuleUSearch(const String & index_name_, const Block & index_sample_block_, USearchIndexWithSerializationPtr index_); + + ~MergeTreeIndexGranuleUSearch() override = default; + + void serializeBinary(WriteBuffer & ostr) const override; + void deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) override; + + bool empty() const override { return !index.get(); } + + const String index_name; + const Block index_sample_block; + USearchIndexWithSerializationPtr index; +}; + +template +struct MergeTreeIndexAggregatorUSearch final : IMergeTreeIndexAggregator +{ + MergeTreeIndexAggregatorUSearch(const String & index_name_, const Block & index_sample_block); + ~MergeTreeIndexAggregatorUSearch() override = default; + + bool empty() const override { return !index || index->size() == 0; } + MergeTreeIndexGranulePtr getGranuleAndReset() override; + void update(const Block & block, size_t * pos, size_t limit) override; + + const String index_name; + const Block index_sample_block; + USearchIndexWithSerializationPtr index; +}; + + +class MergeTreeIndexConditionUSearch final : public IMergeTreeIndexConditionApproximateNearestNeighbor +{ +public: + MergeTreeIndexConditionUSearch(const IndexDescription & index_description, const SelectQueryInfo & query, const String & distance_function, ContextPtr context); + + ~MergeTreeIndexConditionUSearch() override = default; + + bool alwaysUnknownOrTrue() const override; + bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override; + std::vector getUsefulRanges(MergeTreeIndexGranulePtr idx_granule) const override; + +private: + template + std::vector getUsefulRangesImpl(MergeTreeIndexGranulePtr idx_granule) const; + + const ApproximateNearestNeighborCondition ann_condition; + const String distance_function; +}; + + +class MergeTreeIndexUSearch : public IMergeTreeIndex +{ +public: + MergeTreeIndexUSearch(const IndexDescription & index_, const String & distance_function_); + + ~MergeTreeIndexUSearch() override = default; + + MergeTreeIndexGranulePtr createIndexGranule() const override; + MergeTreeIndexAggregatorPtr createIndexAggregator() const override; + MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const override; + + bool mayBenefitFromIndexForIn(const ASTPtr & /*node*/) const override { return false; } + +private: + const String distance_function; +}; + + +} + + +#endif + diff --git a/src/Storages/MergeTree/MergeTreeIndices.cpp b/src/Storages/MergeTree/MergeTreeIndices.cpp index 6ae96d00171..322cdd35afe 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.cpp +++ b/src/Storages/MergeTree/MergeTreeIndices.cpp @@ -132,6 +132,11 @@ MergeTreeIndexFactory::MergeTreeIndexFactory() registerValidator("annoy", annoyIndexValidator); #endif +#ifdef ENABLE_USEARCH + registerCreator("usearch", usearchIndexCreator); + registerValidator("usearch", usearchIndexValidator); +#endif + registerCreator("inverted", invertedIndexCreator); registerValidator("inverted", invertedIndexValidator); diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h index 1ad6b082223..40128bab9d0 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.h +++ b/src/Storages/MergeTree/MergeTreeIndices.h @@ -238,6 +238,11 @@ MergeTreeIndexPtr annoyIndexCreator(const IndexDescription & index); void annoyIndexValidator(const IndexDescription & index, bool attach); #endif +#ifdef ENABLE_USEARCH +MergeTreeIndexPtr usearchIndexCreator(const IndexDescription& index); +void usearchIndexValidator(const IndexDescription& index, bool attach); +#endif + MergeTreeIndexPtr invertedIndexCreator(const IndexDescription& index); void invertedIndexValidator(const IndexDescription& index, bool attach); diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp index 5c722eec380..c9b22c8a03e 100644 --- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp +++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp @@ -160,7 +160,7 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksImpl() size_t granularity; reader->readStrict( reinterpret_cast(plain_marks.data() + i * columns_in_mark), columns_in_mark * sizeof(MarkInCompressedFile)); - readIntBinary(granularity, *reader); + readBinaryLittleEndian(granularity, *reader); } if (!reader->eof()) @@ -170,6 +170,16 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksImpl() mrk_path, marks_count, expected_uncompressed_size); } +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + std::ranges::for_each( + plain_marks, + [](auto & plain_mark) + { + plain_mark.offset_in_compressed_file = std::byteswap(plain_mark.offset_in_compressed_file); + plain_mark.offset_in_decompressed_block = std::byteswap(plain_mark.offset_in_decompressed_block); + }); +#endif + auto res = std::make_shared(plain_marks); ProfileEvents::increment(ProfileEvents::LoadedMarksCount, marks_count * columns_in_mark); diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index bce33438229..ddeaf69136a 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -84,15 +84,7 @@ namespace } void operator() (const UUID & x) const { -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - auto tmp_x = x.toUnderType(); - char * start = reinterpret_cast(&tmp_x); - char * end = start + sizeof(tmp_x); - std::reverse(start, end); - operator()(tmp_x); -#else operator()(x.toUnderType()); -#endif } void operator() (const IPv4 & x) const { @@ -265,12 +257,12 @@ String MergeTreePartition::getID(const Block & partition_key_sample) const for (const Field & field : value) applyVisitor(hashing_visitor, field); - char hash_data[16]; - hash.get128(hash_data); - result.resize(32); - for (size_t i = 0; i < 16; ++i) + const auto hash_data = getSipHash128AsArray(hash); + const auto hash_size = hash_data.size(); + result.resize(hash_size * 2); + for (size_t i = 0; i < hash_size; ++i) #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - writeHexByteLowercase(hash_data[16 - 1 - i], &result[2 * i]); + writeHexByteLowercase(hash_data[hash_size - 1 - i], &result[2 * i]); #else writeHexByteLowercase(hash_data[i], &result[2 * i]); #endif @@ -371,6 +363,12 @@ void MergeTreePartition::serializeText(const MergeTreeData & storage, WriteBuffe const auto & partition_key_sample = metadata_snapshot->getPartitionKey().sample_block; size_t key_size = partition_key_sample.columns(); + // In some cases we create empty parts and then value is empty. + if (value.empty()) + { + writeCString("tuple()", out); + return; + } if (key_size == 0) { writeCString("tuple()", out); diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index e9e2138d995..901801d8187 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -97,7 +97,7 @@ std::future MergeTreePrefetchedReadPool::createPrefetchedRea Priority priority) const { auto reader = data_part.getReader( - columns, storage_snapshot->metadata, required_ranges, + columns, storage_snapshot, required_ranges, uncompressed_cache, mark_cache, alter_conversions, reader_settings, IMergeTreeReader::ValueSizeMap{}, profile_callback); diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index f65e66ff52d..feefca68e7b 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -17,7 +17,7 @@ namespace ErrorCodes MergeTreeReaderCompact::MergeTreeReaderCompact( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, NamesAndTypesList columns_, - const StorageMetadataPtr & metadata_snapshot_, + const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, MarkRanges mark_ranges_, @@ -29,7 +29,7 @@ MergeTreeReaderCompact::MergeTreeReaderCompact( : IMergeTreeReader( data_part_info_for_read_, columns_, - metadata_snapshot_, + storage_snapshot_, uncompressed_cache_, mark_cache_, mark_ranges_, @@ -130,7 +130,7 @@ void MergeTreeReaderCompact::fillColumnPositions() size_t columns_num = columns_to_read.size(); column_positions.resize(columns_num); - read_only_offsets.resize(columns_num); + columns_for_offsets.resize(columns_num); for (size_t i = 0; i < columns_num; ++i) { @@ -149,20 +149,48 @@ void MergeTreeReaderCompact::fillColumnPositions() position.reset(); } + /// If array of Nested column is missing in part, + /// we have to read its offsets if they exist. if (!position && is_array) { - /// If array of Nested column is missing in part, - /// we have to read its offsets if they exist. - auto position_level = findColumnForOffsets(column_to_read); - if (position_level.has_value()) + NameAndTypePair column_to_read_with_subcolumns = column_to_read; + auto [name_in_storage, subcolumn_name] = Nested::splitName(column_to_read.name); + + /// If it is a part of Nested, we need to get the column from + /// storage metatadata which is converted to Nested type with subcolumns. + /// It is required for proper counting of shared streams. + if (!subcolumn_name.empty()) { - column_positions[i].emplace(position_level->first); - read_only_offsets[i].emplace(position_level->second); + /// If column is renamed get the new name from storage metadata. + if (alter_conversions->columnHasNewName(name_in_storage)) + name_in_storage = alter_conversions->getColumnNewName(name_in_storage); + + if (!storage_columns_with_collected_nested) + { + auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withExtendedObjects(); + auto storage_columns_list = Nested::collect(storage_snapshot->getColumns(options)); + storage_columns_with_collected_nested = ColumnsDescription(std::move(storage_columns_list)); + } + + column_to_read_with_subcolumns = storage_columns_with_collected_nested + ->getColumnOrSubcolumn( + GetColumnsOptions::All, + Nested::concatenateName(name_in_storage, subcolumn_name)); + } + + auto name_level_for_offsets = findColumnForOffsets(column_to_read_with_subcolumns); + + if (name_level_for_offsets.has_value()) + { + column_positions[i] = data_part_info_for_read->getColumnPosition(name_level_for_offsets->first); + columns_for_offsets[i] = name_level_for_offsets; partially_read_columns.insert(column_to_read.name); } } else + { column_positions[i] = std::move(position); + } } } @@ -203,7 +231,7 @@ size_t MergeTreeReaderCompact::readRows( auto & column = res_columns[pos]; size_t column_size_before_reading = column->size(); - readData(columns_to_read[pos], column, from_mark, current_task_last_mark, *column_positions[pos], rows_to_read, read_only_offsets[pos]); + readData(columns_to_read[pos], column, from_mark, current_task_last_mark, *column_positions[pos], rows_to_read, columns_for_offsets[pos]); size_t read_rows_in_column = column->size() - column_size_before_reading; if (read_rows_in_column != rows_to_read) @@ -239,23 +267,37 @@ size_t MergeTreeReaderCompact::readRows( void MergeTreeReaderCompact::readData( const NameAndTypePair & name_and_type, ColumnPtr & column, size_t from_mark, size_t current_task_last_mark, size_t column_position, size_t rows_to_read, - std::optional only_offsets_level) + ColumnNameLevel name_level_for_offsets) { const auto & [name, type] = name_and_type; + std::optional column_for_offsets; + + if (name_level_for_offsets.has_value()) + { + const auto & part_columns = data_part_info_for_read->getColumnsDescription(); + column_for_offsets = part_columns.getPhysical(name_level_for_offsets->first); + } adjustUpperBound(current_task_last_mark); /// Must go before seek. if (!isContinuousReading(from_mark, column_position)) seekToMark(from_mark, column_position); + /// If we read only offsets we have to read prefix anyway + /// to preserve correctness of serialization. + auto buffer_getter_for_prefix = [&](const auto &) -> ReadBuffer * + { + return data_buffer; + }; + auto buffer_getter = [&](const ISerialization::SubstreamPath & substream_path) -> ReadBuffer * { /// Offset stream from another column could be read, in case of current /// column does not exists (see findColumnForOffsets() in /// MergeTreeReaderCompact::fillColumnPositions()) - bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; - if (only_offsets_level.has_value()) + if (name_level_for_offsets.has_value()) { + bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; if (!is_offsets) return nullptr; @@ -275,7 +317,7 @@ void MergeTreeReaderCompact::readData( /// /// Here only_offsets_level is the level of the alternative stream, /// and substream_path.size() is the level of the current stream. - if (only_offsets_level.value() < ISerialization::getArrayLevel(substream_path)) + if (name_level_for_offsets->second < ISerialization::getArrayLevel(substream_path)) return nullptr; } @@ -283,22 +325,32 @@ void MergeTreeReaderCompact::readData( }; ISerialization::DeserializeBinaryBulkStatePtr state; + ISerialization::DeserializeBinaryBulkStatePtr state_for_prefix; + ISerialization::DeserializeBinaryBulkSettings deserialize_settings; - deserialize_settings.getter = buffer_getter; deserialize_settings.avg_value_size_hint = avg_value_size_hints[name]; if (name_and_type.isSubcolumn()) { - const auto & type_in_storage = name_and_type.getTypeInStorage(); - const auto & name_in_storage = name_and_type.getNameInStorage(); + NameAndTypePair name_type_in_storage{name_and_type.getNameInStorage(), name_and_type.getTypeInStorage()}; - auto serialization = getSerializationInPart({name_in_storage, type_in_storage}); - ColumnPtr temp_column = type_in_storage->createColumn(*serialization); + /// In case of reading onlys offset use the correct serialization for reading of the prefix + auto serialization = getSerializationInPart(name_type_in_storage); + ColumnPtr temp_column = name_type_in_storage.type->createColumn(*serialization); + if (column_for_offsets) + { + auto serialization_for_prefix = getSerializationInPart(*column_for_offsets); + + deserialize_settings.getter = buffer_getter_for_prefix; + serialization_for_prefix->deserializeBinaryBulkStatePrefix(deserialize_settings, state_for_prefix); + } + + deserialize_settings.getter = buffer_getter; serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, state); serialization->deserializeBinaryBulkWithMultipleStreams(temp_column, rows_to_read, deserialize_settings, state, nullptr); - auto subcolumn = type_in_storage->getSubcolumn(name_and_type.getSubcolumnName(), temp_column); + auto subcolumn = name_type_in_storage.type->getSubcolumn(name_and_type.getSubcolumnName(), temp_column); /// TODO: Avoid extra copying. if (column->empty()) @@ -308,13 +360,24 @@ void MergeTreeReaderCompact::readData( } else { + /// In case of reading only offsets use the correct serialization for reading the prefix auto serialization = getSerializationInPart(name_and_type); + + if (column_for_offsets) + { + auto serialization_for_prefix = getSerializationInPart(*column_for_offsets); + + deserialize_settings.getter = buffer_getter_for_prefix; + serialization_for_prefix->deserializeBinaryBulkStatePrefix(deserialize_settings, state_for_prefix); + } + + deserialize_settings.getter = buffer_getter; serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, state); serialization->deserializeBinaryBulkWithMultipleStreams(column, rows_to_read, deserialize_settings, state, nullptr); } /// The buffer is left in inconsistent state after reading single offsets - if (only_offsets_level.has_value()) + if (name_level_for_offsets.has_value()) last_read_granule.reset(); else last_read_granule.emplace(from_mark, column_position); diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.h b/src/Storages/MergeTree/MergeTreeReaderCompact.h index f180d7508f7..cf706526363 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.h +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.h @@ -21,7 +21,7 @@ public: MergeTreeReaderCompact( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, NamesAndTypesList columns_, - const StorageMetadataPtr & metadata_snapshot_, + const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, MarkRanges mark_ranges_, @@ -52,12 +52,19 @@ private: MergeTreeMarksLoader marks_loader; + /// Storage columns with collected separate arrays of Nested to columns of Nested type. + /// They maybe be needed for finding offsets of missed Nested columns in parts. + /// They are rarely used and are heavy to initialized, so we create them + /// only on demand and cache in this field. + std::optional storage_columns_with_collected_nested; + /// Positions of columns in part structure. using ColumnPositions = std::vector>; ColumnPositions column_positions; + /// Should we read full column or only it's offsets. /// Element of the vector is the level of the alternative stream. - std::vector> read_only_offsets; + std::vector columns_for_offsets; /// For asynchronous reading from remote fs. Same meaning as in MergeTreeReaderStream. std::optional last_right_offset; @@ -68,8 +75,8 @@ private: void seekToMark(size_t row_index, size_t column_index); void readData(const NameAndTypePair & name_and_type, ColumnPtr & column, size_t from_mark, - size_t current_task_last_mark, size_t column_position, size_t rows_to_read, - std::optional only_offsets_level); + size_t current_task_last_mark, size_t column_position, + size_t rows_to_read, ColumnNameLevel name_level_for_offsets); /// Returns maximal value of granule size in compressed file from @mark_ranges. /// This value is used as size of read buffer. @@ -84,7 +91,6 @@ private: ReadBufferFromFileBase::ProfileCallback profile_callback; clockid_t clock_type; - bool initialized = false; }; diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp index fed8032fb17..bacd86511f5 100644 --- a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp @@ -19,13 +19,13 @@ MergeTreeReaderInMemory::MergeTreeReaderInMemory( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, DataPartInMemoryPtr data_part_, NamesAndTypesList columns_, - const StorageMetadataPtr & metadata_snapshot_, + const StorageSnapshotPtr & storage_snapshot_, MarkRanges mark_ranges_, MergeTreeReaderSettings settings_) : IMergeTreeReader( data_part_info_for_read_, columns_, - metadata_snapshot_, + storage_snapshot_, nullptr, nullptr, mark_ranges_, @@ -42,7 +42,7 @@ MergeTreeReaderInMemory::MergeTreeReaderInMemory( { if (auto offsets_position = findColumnForOffsets(column_to_read)) { - positions_for_offsets[column_to_read.name] = offsets_position->first; + positions_for_offsets[column_to_read.name] = *data_part_info_for_read->getColumnPosition(offsets_position->first); partially_read_columns.insert(column_to_read.name); } } diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.h b/src/Storages/MergeTree/MergeTreeReaderInMemory.h index cb67bc46eae..e26a98f0916 100644 --- a/src/Storages/MergeTree/MergeTreeReaderInMemory.h +++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.h @@ -18,7 +18,7 @@ public: MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, DataPartInMemoryPtr data_part_, NamesAndTypesList columns_, - const StorageMetadataPtr & metadata_snapshot_, + const StorageSnapshotPtr & storage_snapshot_, MarkRanges mark_ranges_, MergeTreeReaderSettings settings_); diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 140fb6da5df..a0fe2dc63b4 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -24,7 +24,7 @@ namespace MergeTreeReaderWide::MergeTreeReaderWide( MergeTreeDataPartInfoForReaderPtr data_part_info_, NamesAndTypesList columns_, - const StorageMetadataPtr & metadata_snapshot_, + const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, MarkRanges mark_ranges_, @@ -35,7 +35,7 @@ MergeTreeReaderWide::MergeTreeReaderWide( : IMergeTreeReader( data_part_info_, columns_, - metadata_snapshot_, + storage_snapshot_, uncompressed_cache_, mark_cache_, mark_ranges_, diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.h b/src/Storages/MergeTree/MergeTreeReaderWide.h index c31b1baf32e..2a850cc2814 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.h +++ b/src/Storages/MergeTree/MergeTreeReaderWide.h @@ -17,7 +17,7 @@ public: MergeTreeReaderWide( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, NamesAndTypesList columns_, - const StorageMetadataPtr & metadata_snapshot_, + const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, MarkRanges mark_ranges_, diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index ce4ba69c08c..e5a45ad5554 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -65,7 +65,7 @@ void MergeTreeSelectAlgorithm::initializeReaders() initializeMergeTreeReadersForPart( data_part, alter_conversions, task_columns, - storage_snapshot->getMetadataForQuery(), all_mark_ranges, {}, {}); + all_mark_ranges, /*value_size_map=*/ {}, /*profile_callback=*/ {}); } diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 5a6d59bf0be..8a9faa5cee4 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -138,6 +138,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( ReadSettings read_settings; if (read_with_direct_io) read_settings.direct_io_threshold = 1; + read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true; MergeTreeReaderSettings reader_settings = { @@ -150,7 +151,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( mark_ranges.emplace(MarkRanges{MarkRange(0, data_part->getMarksCount())}); reader = data_part->getReader( - columns_for_reader, storage_snapshot->metadata, + columns_for_reader, storage_snapshot, *mark_ranges, /* uncompressed_cache = */ nullptr, mark_cache.get(), alter_conversions, reader_settings, {}, {}); } diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp index 01094d65ac5..892ae9ead87 100644 --- a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp @@ -45,8 +45,6 @@ void MergeTreeThreadSelectAlgorithm::finalizeNewTask() /// Allows pool to reduce number of threads in case of too slow reads. auto profile_callback = [this](ReadBufferFromFileBase::ProfileInfo info_) { pool->profileFeedback(info_); }; - const auto & metadata_snapshot = storage_snapshot->metadata; - IMergeTreeReader::ValueSizeMap value_size_map; if (reader && part_name != last_read_part_name) @@ -57,7 +55,7 @@ void MergeTreeThreadSelectAlgorithm::finalizeNewTask() /// task->reader.valid() means there is a prefetched reader in this test, use it. const bool init_new_readers = !reader || task->reader.valid() || part_name != last_read_part_name; if (init_new_readers) - initializeMergeTreeReadersForCurrentTask(metadata_snapshot, value_size_map, profile_callback); + initializeMergeTreeReadersForCurrentTask(value_size_map, profile_callback); last_read_part_name = part_name; } diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 5efb7286685..466dbb45a4d 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -226,7 +226,7 @@ static bool isConditionGood(const RPNBuilderTreeNode & condition, const NameSet return false; } -void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTreeNode & node, const WhereOptimizerContext & where_optimizer_context) const +void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTreeNode & node, const WhereOptimizerContext & where_optimizer_context, std::set & pk_positions) const { auto function_node_optional = node.toFunctionNodeOrNull(); @@ -237,7 +237,7 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTree for (size_t i = 0; i < arguments_size; ++i) { auto argument = function_node_optional->getArgumentAt(i); - analyzeImpl(res, argument, where_optimizer_context); + analyzeImpl(res, argument, where_optimizer_context, pk_positions); } } else @@ -270,6 +270,7 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTree cond.good = cond.viable; /// Find min position in PK of any column that is used in this condition. cond.min_position_in_primary_key = findMinPosition(cond.table_columns, primary_key_names_positions); + pk_positions.emplace(cond.min_position_in_primary_key); } res.emplace_back(std::move(cond)); @@ -281,7 +282,29 @@ MergeTreeWhereOptimizer::Conditions MergeTreeWhereOptimizer::analyze(const RPNBu const WhereOptimizerContext & where_optimizer_context) const { Conditions res; - analyzeImpl(res, node, where_optimizer_context); + std::set pk_positions; + analyzeImpl(res, node, where_optimizer_context, pk_positions); + + /// E.g., if the primary key is (a, b, c) but the condition is a = 1 and c = 1, + /// we should only put (a = 1) to the tail of PREWHERE, + /// and treat (c = 1) as a normal column. + if (where_optimizer_context.move_primary_key_columns_to_end_of_prewhere) + { + Int64 min_valid_pk_pos = -1; + for (auto pk_pos : pk_positions) + { + if (pk_pos != min_valid_pk_pos + 1) + break; + min_valid_pk_pos = pk_pos; + } + for (auto & cond : res) + { + if (cond.min_position_in_primary_key > min_valid_pk_pos) + cond.min_position_in_primary_key = std::numeric_limits::max() - 1; + } + LOG_TRACE(log, "The min valid primary key position for moving to the tail of PREWHERE is {}", min_valid_pk_pos); + } + return res; } diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h index fb5e84b67c6..dd9dc803f35 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h @@ -108,7 +108,7 @@ private: std::optional optimizeImpl(const RPNBuilderTreeNode & node, const WhereOptimizerContext & where_optimizer_context) const; - void analyzeImpl(Conditions & res, const RPNBuilderTreeNode & node, const WhereOptimizerContext & where_optimizer_context) const; + void analyzeImpl(Conditions & res, const RPNBuilderTreeNode & node, const WhereOptimizerContext & where_optimizer_context, std::set & pk_positions) const; /// Transform conjunctions chain in WHERE expression to Conditions list. Conditions analyze(const RPNBuilderTreeNode & node, const WhereOptimizerContext & where_optimizer_context) const; diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 491c36433ca..865c2a87240 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -453,6 +453,7 @@ static ExecuteTTLType shouldExecuteTTL(const StorageMetadataPtr & metadata_snaps /// Return set of indices which should be recalculated during mutation also /// wraps input stream into additional expression stream static std::set getIndicesToRecalculate( + const MergeTreeDataPartPtr & source_part, QueryPipelineBuilder & builder, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, @@ -463,10 +464,15 @@ static std::set getIndicesToRecalculate( std::set indices_to_recalc; ASTPtr indices_recalc_expr_list = std::make_shared(); const auto & indices = metadata_snapshot->getSecondaryIndices(); + bool is_full_part_storage = isFullPartStorage(source_part->getDataPartStorage()); for (const auto & index : indices) { - if (materialized_indices.contains(index.name)) + bool need_recalculate = + materialized_indices.contains(index.name) + || (!is_full_part_storage && source_part->hasSecondaryIndex(index.name)); + + if (need_recalculate) { if (indices_to_recalc.insert(index_factory.get(index)).second) { @@ -496,15 +502,23 @@ static std::set getIndicesToRecalculate( } static std::set getProjectionsToRecalculate( + const MergeTreeDataPartPtr & source_part, const StorageMetadataPtr & metadata_snapshot, const NameSet & materialized_projections) { std::set projections_to_recalc; + bool is_full_part_storage = isFullPartStorage(source_part->getDataPartStorage()); + for (const auto & projection : metadata_snapshot->getProjections()) { - if (materialized_projections.contains(projection.name)) + bool need_recalculate = + materialized_projections.contains(projection.name) + || (!is_full_part_storage && source_part->hasProjection(projection.name)); + + if (need_recalculate) projections_to_recalc.insert(&projection); } + return projections_to_recalc; } @@ -1279,14 +1293,20 @@ private: removed_indices.insert(command.column_name); } + bool is_full_part_storage = isFullPartStorage(ctx->new_data_part->getDataPartStorage()); const auto & indices = ctx->metadata_snapshot->getSecondaryIndices(); + MergeTreeIndices skip_indices; for (const auto & idx : indices) { if (removed_indices.contains(idx.name)) continue; - if (ctx->materialized_indices.contains(idx.name)) + bool need_recalculate = + ctx->materialized_indices.contains(idx.name) + || (!is_full_part_storage && ctx->source_part->hasSecondaryIndex(idx.name)); + + if (need_recalculate) { skip_indices.push_back(MergeTreeIndexFactory::instance().get(idx)); } @@ -1319,7 +1339,11 @@ private: if (removed_projections.contains(projection.name)) continue; - if (ctx->materialized_projections.contains(projection.name)) + bool need_recalculate = + ctx->materialized_projections.contains(projection.name) + || (!is_full_part_storage && ctx->source_part->hasProjection(projection.name)); + + if (need_recalculate) { ctx->projections_to_build.push_back(&projection); } @@ -1841,6 +1865,7 @@ bool MutateTask::prepare() context_for_reading->setSetting("max_threads", 1); context_for_reading->setSetting("allow_asynchronous_read_from_io_pool_for_merge_tree", false); context_for_reading->setSetting("max_streams_for_merge_tree_reading", Field(0)); + context_for_reading->setSetting("read_from_filesystem_cache_if_exists_otherwise_bypass_cache", 1); MutationHelpers::splitAndModifyMutationCommands( ctx->source_part, ctx->metadata_snapshot, @@ -1920,9 +1945,16 @@ bool MutateTask::prepare() else /// TODO: check that we modify only non-key columns in this case. { ctx->indices_to_recalc = MutationHelpers::getIndicesToRecalculate( - ctx->mutating_pipeline_builder, ctx->metadata_snapshot, ctx->context, ctx->materialized_indices); + ctx->source_part, + ctx->mutating_pipeline_builder, + ctx->metadata_snapshot, + ctx->context, + ctx->materialized_indices); - ctx->projections_to_recalc = MutationHelpers::getProjectionsToRecalculate(ctx->metadata_snapshot, ctx->materialized_projections); + ctx->projections_to_recalc = MutationHelpers::getProjectionsToRecalculate( + ctx->source_part, + ctx->metadata_snapshot, + ctx->materialized_projections); ctx->files_to_skip = MutationHelpers::collectFilesToSkip( ctx->source_part, diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp index 3b382b7b32d..76b8080f64c 100644 --- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp +++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp @@ -684,7 +684,7 @@ CancellationCode PartMovesBetweenShardsOrchestrator::killPartMoveToShard(const U continue; } else - throw Coordination::Exception(code, entry.znode_path); + throw Coordination::Exception::fromPath(code, entry.znode_path); } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 07cfced8362..b72c148a4e8 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -149,8 +149,7 @@ Float32 ReplicatedMergeTreeCleanupThread::iterate() /// do it under share lock cleaned_other += storage.clearOldWriteAheadLogs(); cleaned_part_like += storage.clearOldTemporaryDirectories(storage.getSettings()->temporary_directories_lifetime.totalSeconds()); - if (storage.getSettings()->merge_tree_enable_clear_old_broken_detached) - cleaned_part_like += storage.clearOldBrokenPartsFromDetachedDirectory(); + cleaned_part_like += storage.clearOldBrokenPartsFromDetachedDirectory(); } /// This is loose condition: no problem if we actually had lost leadership at this moment diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.cpp index 88f4a3ec66f..24d907dbad6 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.cpp @@ -12,9 +12,7 @@ static std::array getSipHash(const String & str) { SipHash hash; hash.update(str.data(), str.size()); - std::array result; - hash.get128(result.data()); - return result; + return getSipHash128AsArray(hash); } ReplicatedMergeTreePartHeader ReplicatedMergeTreePartHeader::fromColumnsAndChecksumsZNodes( diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 21d5597e614..8827368f1d5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -77,15 +77,6 @@ void ReplicatedMergeTreeQueue::initialize(zkutil::ZooKeeperPtr zookeeper) virtual_parts.add(part_name, nullptr); } - /// Drop parts can negatively affect virtual parts. So when we load parts - /// from zookeeper we can break invariant with virtual parts. To fix this we - /// have it here. - for (const LogEntryPtr & entry : queue) - { - if (entry->isDropPart(format_version)) - virtual_parts.removePartAndCoveredParts(*entry->getDropRange(format_version)); - } - LOG_TRACE(log, "Queue initialized"); } @@ -866,7 +857,7 @@ ActiveDataPartSet getPartNamesToMutate( } -void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallback watch_callback) +void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallbackPtr watch_callback) { std::lock_guard lock(update_mutations_mutex); @@ -2169,7 +2160,7 @@ CommittingBlocks BaseMergePredicate::getCommitti { auto & response = locks_children[i]; if (response.error != Coordination::Error::ZOK && !partition_ids_hint) - throw Coordination::Exception(response.error, paths[i]); + throw Coordination::Exception::fromPath(response.error, paths[i]); if (response.error != Coordination::Error::ZOK) { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 611866877d8..d5d85e58cb5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -335,7 +335,7 @@ public: /// Load new mutation entries. If something new is loaded, schedule storage.merge_selecting_task. /// If watch_callback is not empty, will call it when new mutations appear in ZK. - void updateMutations(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallback watch_callback = {}); + void updateMutations(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallbackPtr watch_callback = {}); /// Remove a mutation from ZooKeeper and from the local set. Returns the removed entry or nullptr /// if it could not be found. Called during KILL MUTATION query execution. diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 0db3464a637..d5b23b14524 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -254,14 +254,9 @@ namespace for (const auto & col : cols) col->updateHashWithValue(j, hash); } - union - { - char bytes[16]; - UInt64 words[2]; - } hash_value; - hash.get128(hash_value.bytes); - block_id_vec.push_back(partition_id + "_" + DB::toString(hash_value.words[0]) + "_" + DB::toString(hash_value.words[1])); + const auto hash_value = hash.get128(); + block_id_vec.push_back(partition_id + "_" + DB::toString(hash_value.items[0]) + "_" + DB::toString(hash_value.items[1])); } else block_id_vec.push_back(partition_id + "_" + std::string(token)); @@ -351,7 +346,7 @@ size_t ReplicatedMergeTreeSinkImpl::checkQuorumPrecondition(const if (active_replicas < quorum_size) { if (Coordination::isHardwareError(keeper_error)) - throw Coordination::Exception("Failed to check number of alive replicas", keeper_error); + throw Coordination::Exception::fromMessage(keeper_error, "Failed to check number of alive replicas"); throw Exception(ErrorCodes::TOO_FEW_LIVE_REPLICAS, "Number of alive replicas ({}) is less than requested quorum ({}/{}).", active_replicas, quorum_size, replicas_number); @@ -633,8 +628,8 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithFa delayed_chunk.reset(); } -template -void ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::MutableDataPartPtr & part) +template<> +bool ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::MutableDataPartPtr & part) { /// NOTE: No delay in this case. That's Ok. auto origin_zookeeper = storage.getZooKeeper(); @@ -649,8 +644,13 @@ void ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData: try { part->version.setCreationTID(Tx::PrehistoricTID, nullptr); - commitPart(zookeeper, part, BlockIDsType(), replicas_num, true); - PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, watch.elapsed(), profile_events_scope.getSnapshot())); + String block_id = deduplicate ? fmt::format("{}_{}", part->info.partition_id, part->checksums.getTotalChecksumHex()) : ""; + bool deduplicated = commitPart(zookeeper, part, block_id, replicas_num, /* writing_existing_part */ true).second; + + /// Set a special error code if the block is duplicate + int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0; + PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, watch.elapsed(), profile_events_scope.getSnapshot()), ExecutionStatus(error)); + return deduplicated; } catch (...) { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 868590efa25..4a192a822f5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -56,7 +56,7 @@ public: String getName() const override { return "ReplicatedMergeTreeSink"; } /// For ATTACHing existing data on filesystem. - void writeExistingPart(MergeTreeData::MutableDataPartPtr & part); + bool writeExistingPart(MergeTreeData::MutableDataPartPtr & part); /// For proper deduplication in MaterializedViews bool lastBlockIsDuplicate() const override diff --git a/src/Storages/MergeTree/ZooKeeperRetries.h b/src/Storages/MergeTree/ZooKeeperRetries.h index 512c0800de7..e46c3f974c7 100644 --- a/src/Storages/MergeTree/ZooKeeperRetries.h +++ b/src/Storages/MergeTree/ZooKeeperRetries.h @@ -159,7 +159,7 @@ public: void setKeeperError(Coordination::Error code, std::string message) { - setKeeperError(std::make_exception_ptr(zkutil::KeeperException(message, code)), code, message); + setKeeperError(std::make_exception_ptr(zkutil::KeeperException::createDeprecated(message, code)), code, message); } template diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 1967357a840..71229f6656e 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -201,20 +201,20 @@ static IMergeTreeDataPart::Checksums checkDataPart( continue; auto checksum_it = checksums_data.files.find(file_name); - /// Skip files that we already calculated. Also skip metadata files that are not checksummed. if (checksum_it == checksums_data.files.end() && !files_without_checksums.contains(file_name)) { auto txt_checksum_it = checksums_txt_files.find(file_name); - if (txt_checksum_it == checksums_txt_files.end() || txt_checksum_it->second.uncompressed_size == 0) + if ((txt_checksum_it != checksums_txt_files.end() && txt_checksum_it->second.is_compressed)) + { + /// If we have both compressed and uncompressed in txt or its .cmrk(2/3) or .cidx, then calculate them + checksums_data.files[file_name] = checksum_compressed_file(data_part_storage, file_name); + } + else { /// The file is not compressed. checksum_file(file_name); } - else /// If we have both compressed and uncompressed in txt, then calculate them - { - checksums_data.files[file_name] = checksum_compressed_file(data_part_storage, file_name); - } } } diff --git a/src/Storages/MergeTree/localBackup.cpp b/src/Storages/MergeTree/localBackup.cpp index 6faacf3c066..4adde07da97 100644 --- a/src/Storages/MergeTree/localBackup.cpp +++ b/src/Storages/MergeTree/localBackup.cpp @@ -125,9 +125,6 @@ void localBackup( size_t try_no = 0; const size_t max_tries = 10; - CleanupOnFail cleanup(disk_transaction ? std::function([]{}) : - [disk, destination_path]() { disk->removeRecursive(destination_path); }); - /** Files in the directory can be permanently added and deleted. * If some file is deleted during an attempt to make a backup, then try again, * because it's important to take into account any new files that might appear. @@ -136,10 +133,30 @@ void localBackup( { try { - if (copy_instead_of_hardlinks && !disk_transaction) - disk->copyDirectoryContent(source_path, disk, destination_path); - else + if (disk_transaction) + { localBackupImpl(disk, disk_transaction.get(), source_path, destination_path, make_source_readonly, 0, max_level, copy_instead_of_hardlinks, files_to_copy_intead_of_hardlinks); + } + else if (copy_instead_of_hardlinks) + { + CleanupOnFail cleanup([disk, destination_path]() { disk->removeRecursive(destination_path); }); + disk->copyDirectoryContent(source_path, disk, destination_path); + cleanup.success(); + } + else + { + std::function cleaner; + if (disk->supportZeroCopyReplication()) + /// Note: this code will create garbage on s3. We should always remove `copy_instead_of_hardlinks` files. + /// The third argument should be a list of exceptions, but (looks like) it is ignored for keep_all_shared_data = true. + cleaner = [disk, destination_path]() { disk->removeSharedRecursive(destination_path, /*keep_all_shared_data*/ true, {}); }; + else + cleaner = [disk, destination_path]() { disk->removeRecursive(destination_path); }; + + CleanupOnFail cleanup(std::move(cleaner)); + localBackupImpl(disk, disk_transaction.get(), source_path, destination_path, make_source_readonly, 0, max_level, false, files_to_copy_intead_of_hardlinks); + cleanup.success(); + } } catch (const DB::ErrnoException & e) { @@ -166,8 +183,6 @@ void localBackup( break; } - - cleanup.success(); } } diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 48825361a16..cddf252a7e1 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -356,9 +356,8 @@ const ProjectionDescription & ProjectionsDescription::get(const String & project auto it = map.find(projection_name); if (it == map.end()) { - String exception_message = fmt::format("There is no projection {} in table", projection_name); - appendHintsMessage(exception_message, projection_name); - throw Exception::createDeprecated(exception_message, ErrorCodes::NO_SUCH_PROJECTION_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_PROJECTION_IN_TABLE, "There is no projection {} in table{}", + projection_name, getHintsMessage(projection_name)); } return *(it->second); @@ -401,9 +400,8 @@ void ProjectionsDescription::remove(const String & projection_name, bool if_exis if (if_exists) return; - String exception_message = fmt::format("There is no projection {} in table", projection_name); - appendHintsMessage(exception_message, projection_name); - throw Exception::createDeprecated(exception_message, ErrorCodes::NO_SUCH_PROJECTION_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_PROJECTION_IN_TABLE, "There is no projection {} in table{}", + projection_name, getHintsMessage(projection_name)); } projections.erase(it->second); diff --git a/src/Storages/S3Queue/S3QueueFilesMetadata.cpp b/src/Storages/S3Queue/S3QueueFilesMetadata.cpp new file mode 100644 index 00000000000..4624566a517 --- /dev/null +++ b/src/Storages/S3Queue/S3QueueFilesMetadata.cpp @@ -0,0 +1,353 @@ +#include "IO/VarInt.h" +#include "config.h" + +#if USE_AWS_S3 +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int TIMEOUT_EXCEEDED; +} + +namespace +{ + UInt64 getCurrentTime() + { + return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); + } +} + +void S3QueueFilesMetadata::S3QueueCollection::read(ReadBuffer & in) +{ + files = {}; + if (in.eof()) + return; + + size_t files_num; + in >> files_num >> "\n"; + while (files_num--) + { + TrackedCollectionItem item; + in >> item.file_path >> "\n"; + in >> item.timestamp >> "\n"; + in >> item.retries_count >> "\n"; + in >> item.last_exception >> "\n"; + files.push_back(item); + } +} + +void S3QueueFilesMetadata::S3QueueCollection::write(WriteBuffer & out) const +{ + out << files.size() << "\n"; + for (const auto & processed_file : files) + { + out << processed_file.file_path << "\n"; + out << processed_file.timestamp << "\n"; + out << processed_file.retries_count << "\n"; + out << processed_file.last_exception << "\n"; + } +} + +String S3QueueFilesMetadata::S3QueueCollection::toString() const +{ + WriteBufferFromOwnString out; + write(out); + return out.str(); +} + +S3QueueFilesMetadata::S3FilesCollection S3QueueFilesMetadata::S3QueueCollection::getFileNames() +{ + S3FilesCollection keys = {}; + for (const auto & pair : files) + keys.insert(pair.file_path); + return keys; +} + + +S3QueueFilesMetadata::S3QueueProcessedCollection::S3QueueProcessedCollection(const UInt64 & max_size_, const UInt64 & max_age_) + : max_size(max_size_), max_age(max_age_) +{ +} + +void S3QueueFilesMetadata::S3QueueProcessedCollection::parse(const String & collection_str) +{ + ReadBufferFromString buf(collection_str); + read(buf); + if (max_age > 0) // Remove old items + { + std::erase_if( + files, + [timestamp = getCurrentTime(), this](const TrackedCollectionItem & processed_file) + { return (timestamp - processed_file.timestamp) > max_age; }); + } +} + + +void S3QueueFilesMetadata::S3QueueProcessedCollection::add(const String & file_name) +{ + TrackedCollectionItem processed_file; + processed_file.file_path = file_name; + processed_file.timestamp = getCurrentTime(); + files.push_back(processed_file); + + /// TODO: it is strange that in parse() we take into account only max_age, but here only max_size. + while (files.size() > max_size) + { + files.pop_front(); + } +} + + +S3QueueFilesMetadata::S3QueueFailedCollection::S3QueueFailedCollection(const UInt64 & max_retries_count_) + : max_retries_count(max_retries_count_) +{ +} + +void S3QueueFilesMetadata::S3QueueFailedCollection::parse(const String & collection_str) +{ + ReadBufferFromString buf(collection_str); + read(buf); +} + + +bool S3QueueFilesMetadata::S3QueueFailedCollection::add(const String & file_name, const String & exception_message) +{ + auto failed_it = std::find_if( + files.begin(), files.end(), + [&file_name](const TrackedCollectionItem & s) { return s.file_path == file_name; }); + + if (failed_it == files.end()) + { + files.emplace_back(file_name, 0, max_retries_count, exception_message); + } + else if (failed_it->retries_count == 0 || --failed_it->retries_count == 0) + { + return false; + } + return true; +} + +S3QueueFilesMetadata::S3FilesCollection S3QueueFilesMetadata::S3QueueFailedCollection::getFileNames() +{ + S3FilesCollection failed_keys; + for (const auto & pair : files) + { + if (pair.retries_count == 0) + failed_keys.insert(pair.file_path); + } + return failed_keys; +} + +void S3QueueFilesMetadata::S3QueueProcessingCollection::parse(const String & collection_str) +{ + ReadBufferFromString rb(collection_str); + Strings result; + readQuoted(result, rb); + files = S3FilesCollection(result.begin(), result.end()); +} + +void S3QueueFilesMetadata::S3QueueProcessingCollection::add(const Strings & file_names) +{ + files.insert(file_names.begin(), file_names.end()); +} + +void S3QueueFilesMetadata::S3QueueProcessingCollection::remove(const String & file_name) +{ + files.erase(file_name); +} + +String S3QueueFilesMetadata::S3QueueProcessingCollection::toString() const +{ + return DB::toString(Strings(files.begin(), files.end())); +} + + +S3QueueFilesMetadata::S3QueueFilesMetadata( + const StorageS3Queue * storage_, + const S3QueueSettings & settings_) + : storage(storage_) + , mode(settings_.mode) + , max_set_size(settings_.s3queue_tracked_files_limit.value) + , max_set_age_sec(settings_.s3queue_tracked_file_ttl_sec.value) + , max_loading_retries(settings_.s3queue_loading_retries.value) + , zookeeper_processing_path(fs::path(storage->getZooKeeperPath()) / "processing") + , zookeeper_processed_path(fs::path(storage->getZooKeeperPath()) / "processed") + , zookeeper_failed_path(fs::path(storage->getZooKeeperPath()) / "failed") + , zookeeper_lock_path(fs::path(storage->getZooKeeperPath()) / "lock") + , log(&Poco::Logger::get("S3QueueFilesMetadata")) +{ +} + +void S3QueueFilesMetadata::setFileProcessed(const String & file_path) +{ + auto zookeeper = storage->getZooKeeper(); + auto lock = acquireLock(zookeeper); + + switch (mode) + { + case S3QueueMode::UNORDERED: + { + S3QueueProcessedCollection processed_files(max_set_size, max_set_age_sec); + processed_files.parse(zookeeper->get(zookeeper_processed_path)); + processed_files.add(file_path); + zookeeper->set(zookeeper_processed_path, processed_files.toString()); + break; + } + case S3QueueMode::ORDERED: + { + // Check that we set in ZooKeeper node only maximum processed file path. + // This check can be useful, when multiple table engines consume in ordered mode. + String max_file = getMaxProcessedFile(); + if (max_file.compare(file_path) <= 0) + zookeeper->set(zookeeper_processed_path, file_path); + break; + } + } + removeProcessingFile(file_path); +} + + +bool S3QueueFilesMetadata::setFileFailed(const String & file_path, const String & exception_message) +{ + auto zookeeper = storage->getZooKeeper(); + auto lock = acquireLock(zookeeper); + + S3QueueFailedCollection failed_collection(max_loading_retries); + failed_collection.parse(zookeeper->get(zookeeper_failed_path)); + const bool can_be_retried = failed_collection.add(file_path, exception_message); + zookeeper->set(zookeeper_failed_path, failed_collection.toString()); + removeProcessingFile(file_path); + return can_be_retried; +} + +S3QueueFilesMetadata::S3FilesCollection S3QueueFilesMetadata::getFailedFiles() +{ + auto zookeeper = storage->getZooKeeper(); + String failed_files = zookeeper->get(zookeeper_failed_path); + + S3QueueFailedCollection failed_collection(max_loading_retries); + failed_collection.parse(failed_files); + return failed_collection.getFileNames(); +} + +String S3QueueFilesMetadata::getMaxProcessedFile() +{ + auto zookeeper = storage->getZooKeeper(); + return zookeeper->get(zookeeper_processed_path); +} + +S3QueueFilesMetadata::S3FilesCollection S3QueueFilesMetadata::getProcessingFiles() +{ + auto zookeeper = storage->getZooKeeper(); + String processing_files; + if (!zookeeper->tryGet(zookeeper_processing_path, processing_files)) + return {}; + + S3QueueProcessingCollection processing_collection; + if (!processing_files.empty()) + processing_collection.parse(processing_files); + return processing_collection.getFileNames(); +} + +void S3QueueFilesMetadata::setFilesProcessing(const Strings & file_paths) +{ + auto zookeeper = storage->getZooKeeper(); + String processing_files; + zookeeper->tryGet(zookeeper_processing_path, processing_files); + + S3QueueProcessingCollection processing_collection; + if (!processing_files.empty()) + processing_collection.parse(processing_files); + processing_collection.add(file_paths); + + if (zookeeper->exists(zookeeper_processing_path)) + zookeeper->set(zookeeper_processing_path, processing_collection.toString()); + else + zookeeper->create(zookeeper_processing_path, processing_collection.toString(), zkutil::CreateMode::Ephemeral); +} + +void S3QueueFilesMetadata::removeProcessingFile(const String & file_path) +{ + auto zookeeper = storage->getZooKeeper(); + String processing_files; + zookeeper->tryGet(zookeeper_processing_path, processing_files); + + S3QueueProcessingCollection processing_collection; + processing_collection.parse(processing_files); + processing_collection.remove(file_path); + zookeeper->set(zookeeper_processing_path, processing_collection.toString()); +} + +S3QueueFilesMetadata::S3FilesCollection S3QueueFilesMetadata::getUnorderedProcessedFiles() +{ + auto zookeeper = storage->getZooKeeper(); + S3QueueProcessedCollection processed_collection(max_set_size, max_set_age_sec); + processed_collection.parse(zookeeper->get(zookeeper_processed_path)); + return processed_collection.getFileNames(); +} + +S3QueueFilesMetadata::S3FilesCollection S3QueueFilesMetadata::getProcessedFailedAndProcessingFiles() +{ + S3FilesCollection processed_and_failed_files = getFailedFiles(); + switch (mode) + { + case S3QueueMode::UNORDERED: + { + processed_and_failed_files.merge(getUnorderedProcessedFiles()); + break; + } + case S3QueueMode::ORDERED: + { + processed_and_failed_files.insert(getMaxProcessedFile()); + break; + } + } + processed_and_failed_files.merge(getProcessingFiles()); + return processed_and_failed_files; +} + +std::shared_ptr S3QueueFilesMetadata::acquireLock(zkutil::ZooKeeperPtr zookeeper) +{ + UInt32 retry_count = 200; + UInt32 sleep_ms = 100; + UInt32 retries = 0; + + while (true) + { + Coordination::Error code = zookeeper->tryCreate(zookeeper_lock_path, "", zkutil::CreateMode::Ephemeral); + if (code == Coordination::Error::ZNONODE || code == Coordination::Error::ZNODEEXISTS) + { + retries++; + if (retries > retry_count) + { + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Can't acquire zookeeper lock"); + } + sleepForMilliseconds(sleep_ms); + } + else if (code != Coordination::Error::ZOK) + { + throw Coordination::Exception::fromPath(code, zookeeper_lock_path); + } + else + { + return zkutil::EphemeralNodeHolder::existing(zookeeper_lock_path, *zookeeper); + } + } +} + +} + +#endif diff --git a/src/Storages/S3Queue/S3QueueFilesMetadata.h b/src/Storages/S3Queue/S3QueueFilesMetadata.h new file mode 100644 index 00000000000..577c71b2227 --- /dev/null +++ b/src/Storages/S3Queue/S3QueueFilesMetadata.h @@ -0,0 +1,127 @@ +#pragma once + +#if USE_AWS_S3 + +# include +# include +# include +# include + +namespace DB +{ +class StorageS3Queue; +struct S3QueueSettings; + +class S3QueueFilesMetadata +{ +public: + struct TrackedCollectionItem + { + TrackedCollectionItem() = default; + TrackedCollectionItem(const String & file_path_, UInt64 timestamp_, UInt64 retries_count_, const String & last_exception_) + : file_path(file_path_), timestamp(timestamp_), retries_count(retries_count_), last_exception(last_exception_) {} + String file_path; + UInt64 timestamp = 0; + UInt64 retries_count = 0; + String last_exception; + }; + + using S3FilesCollection = std::unordered_set; + using TrackedFiles = std::deque; + + S3QueueFilesMetadata(const StorageS3Queue * storage_, const S3QueueSettings & settings_); + + void setFilesProcessing(const Strings & file_paths); + void setFileProcessed(const String & file_path); + bool setFileFailed(const String & file_path, const String & exception_message); + + S3FilesCollection getProcessedFailedAndProcessingFiles(); + String getMaxProcessedFile(); + std::shared_ptr acquireLock(zkutil::ZooKeeperPtr zookeeper); + + struct S3QueueCollection + { + public: + virtual ~S3QueueCollection() = default; + virtual String toString() const; + S3FilesCollection getFileNames(); + + virtual void parse(const String & collection_str) = 0; + + protected: + TrackedFiles files; + + void read(ReadBuffer & in); + void write(WriteBuffer & out) const; + }; + + struct S3QueueProcessedCollection : public S3QueueCollection + { + public: + S3QueueProcessedCollection(const UInt64 & max_size_, const UInt64 & max_age_); + + void parse(const String & collection_str) override; + void add(const String & file_name); + + private: + const UInt64 max_size; + const UInt64 max_age; + }; + + struct S3QueueFailedCollection : S3QueueCollection + { + public: + S3QueueFailedCollection(const UInt64 & max_retries_count_); + + void parse(const String & collection_str) override; + bool add(const String & file_name, const String & exception_message); + + S3FilesCollection getFileNames(); + + private: + UInt64 max_retries_count; + }; + + struct S3QueueProcessingCollection + { + public: + S3QueueProcessingCollection() = default; + + void parse(const String & collection_str); + void add(const Strings & file_names); + void remove(const String & file_name); + + String toString() const; + const S3FilesCollection & getFileNames() const { return files; } + + private: + S3FilesCollection files; + }; + +private: + const StorageS3Queue * storage; + const S3QueueMode mode; + const UInt64 max_set_size; + const UInt64 max_set_age_sec; + const UInt64 max_loading_retries; + + const String zookeeper_processing_path; + const String zookeeper_processed_path; + const String zookeeper_failed_path; + const String zookeeper_lock_path; + + mutable std::mutex mutex; + Poco::Logger * log; + + S3FilesCollection getFailedFiles(); + S3FilesCollection getProcessingFiles(); + S3FilesCollection getUnorderedProcessedFiles(); + + void removeProcessingFile(const String & file_path); +}; + + +} + + +#endif diff --git a/src/Storages/S3Queue/S3QueueSettings.cpp b/src/Storages/S3Queue/S3QueueSettings.cpp new file mode 100644 index 00000000000..b74cf8d39bb --- /dev/null +++ b/src/Storages/S3Queue/S3QueueSettings.cpp @@ -0,0 +1,41 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_SETTING; +} + +IMPLEMENT_SETTINGS_TRAITS(S3QueueSettingsTraits, LIST_OF_S3QUEUE_SETTINGS) + +void S3QueueSettings::loadFromQuery(ASTStorage & storage_def) +{ + if (storage_def.settings) + { + try + { + applyChanges(storage_def.settings->changes); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::UNKNOWN_SETTING) + e.addMessage("for storage " + storage_def.engine->name); + throw; + } + } + else + { + auto settings_ast = std::make_shared(); + settings_ast->is_standalone = false; + storage_def.set(storage_def.settings, settings_ast); + } +} + +} diff --git a/src/Storages/S3Queue/S3QueueSettings.h b/src/Storages/S3Queue/S3QueueSettings.h new file mode 100644 index 00000000000..75defc4a57f --- /dev/null +++ b/src/Storages/S3Queue/S3QueueSettings.h @@ -0,0 +1,46 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ +class ASTStorage; + + +#define S3QUEUE_RELATED_SETTINGS(M, ALIAS) \ + M(S3QueueMode, \ + mode, \ + S3QueueMode::ORDERED, \ + "With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKepeer." \ + "With ordered mode, only the max name of the successfully consumed file stored.", \ + 0) \ + M(S3QueueAction, after_processing, S3QueueAction::KEEP, "Delete or keep file in S3 after successful processing", 0) \ + M(String, keeper_path, "", "Zookeeper node path", 0) \ + M(UInt64, s3queue_loading_retries, 0, "Retry loading up to specified number of times", 0) \ + M(UInt64, s3queue_polling_min_timeout_ms, 1000, "Minimal timeout before next polling", 0) \ + M(UInt64, s3queue_polling_max_timeout_ms, 10000, "Maximum timeout before next polling", 0) \ + M(UInt64, s3queue_polling_backoff_ms, 0, "Polling backoff", 0) \ + M(UInt64, s3queue_tracked_files_limit, 1000, "Max set size for tracking processed files in unordered mode in ZooKeeper", 0) \ + M(UInt64, \ + s3queue_tracked_file_ttl_sec, \ + 0, \ + "Maximum number of seconds to store processed files in ZooKeeper node (store forever by default)", \ + 0) \ + M(UInt64, s3queue_polling_size, 50, "Maximum files to fetch from S3 with SELECT", 0) + +#define LIST_OF_S3QUEUE_SETTINGS(M, ALIAS) \ + S3QUEUE_RELATED_SETTINGS(M, ALIAS) \ + FORMAT_FACTORY_SETTINGS(M, ALIAS) + +DECLARE_SETTINGS_TRAITS(S3QueueSettingsTraits, LIST_OF_S3QUEUE_SETTINGS) + + +struct S3QueueSettings : public BaseSettings +{ + void loadFromQuery(ASTStorage & storage_def); +}; + +} diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp new file mode 100644 index 00000000000..b435cd618de --- /dev/null +++ b/src/Storages/S3Queue/S3QueueSource.cpp @@ -0,0 +1,311 @@ +#include +#include +#include +#include "IO/ParallelReadBuffer.h" +#include "Parsers/ASTCreateQuery.h" +#include "config.h" + +#if USE_AWS_S3 + +# include + +# include + +# include +# include + +# include + +# include +# include + +# include +# include +# include +# include +# include +# include +# include + +# include + +# include +# include +# include + +# include + +# include + +# include +# include +# include + +# include +# include + + +namespace CurrentMetrics +{ +extern const Metric StorageS3Threads; +extern const Metric StorageS3ThreadsActive; +} + +namespace ProfileEvents +{ +extern const Event S3DeleteObjects; +extern const Event S3ListObjects; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int S3_ERROR; +} + + +StorageS3QueueSource::QueueGlobIterator::QueueGlobIterator( + const S3::Client & client_, + const S3::URI & globbed_uri_, + ASTPtr query, + const Block & virtual_header, + ContextPtr context, + UInt64 & max_poll_size_, + const S3Settings::RequestSettings & request_settings_) + : max_poll_size(max_poll_size_) + , glob_iterator(std::make_unique( + client_, globbed_uri_, query, virtual_header, context, nullptr, request_settings_)) +{ + /// todo(kssenii): remove this loop, it should not be here + while (true) + { + KeyWithInfo val = glob_iterator->next(); + if (val.key.empty()) + break; + keys_buf.push_back(val); + } +} + +Strings StorageS3QueueSource::QueueGlobIterator::filterProcessingFiles( + const S3QueueMode & engine_mode, std::unordered_set & exclude_keys, const String & max_file) +{ + for (const KeyWithInfo & val : keys_buf) + { + auto full_path = val.key; + if (exclude_keys.find(full_path) != exclude_keys.end()) + { + LOG_TEST(log, "File {} will be skipped, because it was found in exclude files list " + "(either already processed or failed to be processed)", val.key); + continue; + } + + if ((engine_mode == S3QueueMode::ORDERED) && (full_path.compare(max_file) <= 0)) + continue; + + if ((processing_keys.size() < max_poll_size) || (engine_mode == S3QueueMode::ORDERED)) + { + processing_keys.push_back(val); + } + else + { + break; + } + } + + if (engine_mode == S3QueueMode::ORDERED) + { + std::sort( + processing_keys.begin(), + processing_keys.end(), + [](const KeyWithInfo & lhs, const KeyWithInfo & rhs) { return lhs.key.compare(rhs.key) < 0; }); + + if (processing_keys.size() > max_poll_size) + { + processing_keys.erase(processing_keys.begin() + max_poll_size, processing_keys.end()); + } + } + + Strings keys; + for (const auto & key_info : processing_keys) + keys.push_back(key_info.key); + + processing_keys.push_back(KeyWithInfo()); + processing_iterator = processing_keys.begin(); + return keys; +} + + +StorageS3QueueSource::KeyWithInfo StorageS3QueueSource::QueueGlobIterator::next() +{ + std::lock_guard lock(mutex); + if (processing_iterator != processing_keys.end()) + { + return *processing_iterator++; + } + + return KeyWithInfo(); +} + +StorageS3QueueSource::StorageS3QueueSource( + const ReadFromFormatInfo & info, + const String & format_, + String name_, + ContextPtr context_, + std::optional format_settings_, + UInt64 max_block_size_, + const S3Settings::RequestSettings & request_settings_, + String compression_hint_, + const std::shared_ptr & client_, + const String & bucket_, + const String & version_id_, + std::shared_ptr file_iterator_, + std::shared_ptr files_metadata_, + const S3QueueAction & action_, + const size_t download_thread_num_) + : ISource(info.source_header) + , WithContext(context_) + , name(std::move(name_)) + , bucket(bucket_) + , version_id(version_id_) + , format(format_) + , columns_desc(info.columns_description) + , request_settings(request_settings_) + , client(client_) + , files_metadata(files_metadata_) + , requested_virtual_columns(info.requested_virtual_columns) + , requested_columns(info.requested_columns) + , file_iterator(file_iterator_) + , action(action_) +{ + internal_source = std::make_shared( + info, + format_, + name_, + context_, + format_settings_, + max_block_size_, + request_settings_, + compression_hint_, + client_, + bucket_, + version_id_, + file_iterator, + download_thread_num_, + /* query_info */ std::nullopt); + reader = std::move(internal_source->reader); + if (reader) + reader_future = std::move(internal_source->reader_future); +} + +StorageS3QueueSource::~StorageS3QueueSource() +{ + internal_source->create_reader_pool.wait(); +} + +String StorageS3QueueSource::getName() const +{ + return name; +} + +Chunk StorageS3QueueSource::generate() +{ + auto file_progress = getContext()->getFileProgressCallback(); + while (true) + { + if (isCancelled() || !reader) + { + if (reader) + reader->cancel(); + break; + } + + Chunk chunk; + bool success_in_pulling = false; + try + { + if (reader->pull(chunk)) + { + UInt64 num_rows = chunk.getNumRows(); + auto file_path = reader.getPath(); + + for (const auto & virtual_column : requested_virtual_columns) + { + if (virtual_column.name == "_path") + { + chunk.addColumn(virtual_column.type->createColumnConst(num_rows, file_path)->convertToFullColumnIfConst()); + } + else if (virtual_column.name == "_file") + { + size_t last_slash_pos = file_path.find_last_of('/'); + auto column = virtual_column.type->createColumnConst(num_rows, file_path.substr(last_slash_pos + 1)); + chunk.addColumn(column->convertToFullColumnIfConst()); + } + } + success_in_pulling = true; + } + } + catch (const Exception & e) + { + LOG_ERROR(log, "Exception in chunk pulling: {} ", e.displayText()); + files_metadata->setFileFailed(reader.getFile(), e.message()); + success_in_pulling = false; + } + if (success_in_pulling) + { + applyActionAfterProcessing(reader.getFile()); + files_metadata->setFileProcessed(reader.getFile()); + return chunk; + } + + + assert(reader_future.valid()); + reader = reader_future.get(); + + if (!reader) + break; + + /// Even if task is finished the thread may be not freed in pool. + /// So wait until it will be freed before scheduling a new task. + internal_source->create_reader_pool.wait(); + reader_future = internal_source->createReaderAsync(); + } + + return {}; +} + + +void StorageS3QueueSource::applyActionAfterProcessing(const String & file_path) +{ + switch (action) + { + case S3QueueAction::DELETE: + deleteProcessedObject(file_path); + break; + case S3QueueAction::KEEP: + break; + } +} + +void StorageS3QueueSource::deleteProcessedObject(const String & file_path) +{ + LOG_INFO(log, "Delete processed file {} from bucket {}", file_path, bucket); + + S3::DeleteObjectRequest request; + request.WithKey(file_path).WithBucket(bucket); + auto outcome = client->DeleteObject(request); + if (!outcome.IsSuccess()) + { + const auto & err = outcome.GetError(); + LOG_ERROR(log, "{} (Code: {})", err.GetMessage(), static_cast(err.GetErrorType())); + } + else + { + LOG_TRACE(log, "Object with path {} was removed from S3", file_path); + } +} + +} + +#endif diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h new file mode 100644 index 00000000000..523b8c0e81f --- /dev/null +++ b/src/Storages/S3Queue/S3QueueSource.h @@ -0,0 +1,124 @@ +#pragma once +#include "config.h" + +#if USE_AWS_S3 + +# include + +# include + +# include +# include +# include +# include +# include + +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + + +namespace DB +{ + + +class StorageS3QueueSource : public ISource, WithContext +{ +public: + using IIterator = StorageS3Source::IIterator; + using DisclosedGlobIterator = StorageS3Source::DisclosedGlobIterator; + using KeysWithInfo = StorageS3Source::KeysWithInfo; + using KeyWithInfo = StorageS3Source::KeyWithInfo; + class QueueGlobIterator : public IIterator + { + public: + QueueGlobIterator( + const S3::Client & client_, + const S3::URI & globbed_uri_, + ASTPtr query, + const Block & virtual_header, + ContextPtr context, + UInt64 & max_poll_size_, + const S3Settings::RequestSettings & request_settings_ = {}); + + KeyWithInfo next() override; + + Strings + filterProcessingFiles(const S3QueueMode & engine_mode, std::unordered_set & exclude_keys, const String & max_file = ""); + + private: + UInt64 max_poll_size; + KeysWithInfo keys_buf; + KeysWithInfo processing_keys; + mutable std::mutex mutex; + std::unique_ptr glob_iterator; + std::vector::iterator processing_iterator; + + Poco::Logger * log = &Poco::Logger::get("StorageS3QueueSourceIterator"); + }; + + static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); + + StorageS3QueueSource( + const ReadFromFormatInfo & info, + const String & format, + String name_, + ContextPtr context_, + std::optional format_settings_, + UInt64 max_block_size_, + const S3Settings::RequestSettings & request_settings_, + String compression_hint_, + const std::shared_ptr & client_, + const String & bucket, + const String & version_id, + std::shared_ptr file_iterator_, + std::shared_ptr files_metadata_, + const S3QueueAction & action_, + size_t download_thread_num); + + ~StorageS3QueueSource() override; + + String getName() const override; + + Chunk generate() override; + + +private: + String name; + String bucket; + String version_id; + String format; + ColumnsDescription columns_desc; + S3Settings::RequestSettings request_settings; + std::shared_ptr client; + + std::shared_ptr files_metadata; + using ReaderHolder = StorageS3Source::ReaderHolder; + ReaderHolder reader; + + NamesAndTypesList requested_virtual_columns; + NamesAndTypesList requested_columns; + std::shared_ptr file_iterator; + const S3QueueAction action; + + Poco::Logger * log = &Poco::Logger::get("StorageS3QueueSource"); + + std::future reader_future; + + mutable std::mutex mutex; + + std::shared_ptr internal_source; + void deleteProcessedObject(const String & file_path); + void applyActionAfterProcessing(const String & file_path); +}; + +} +#endif diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.cpp b/src/Storages/S3Queue/S3QueueTableMetadata.cpp new file mode 100644 index 00000000000..23eebb6ded9 --- /dev/null +++ b/src/Storages/S3Queue/S3QueueTableMetadata.cpp @@ -0,0 +1,115 @@ +#include + +#if USE_AWS_S3 + +# include +# include +# include +# include +# include +# include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int METADATA_MISMATCH; +} + +S3QueueTableMetadata::S3QueueTableMetadata(const StorageS3::Configuration & configuration, const S3QueueSettings & engine_settings) +{ + format_name = configuration.format; + after_processing = engine_settings.after_processing.toString(); + mode = engine_settings.mode.toString(); + s3queue_tracked_files_limit = engine_settings.s3queue_tracked_files_limit; + s3queue_tracked_file_ttl_sec = engine_settings.s3queue_tracked_file_ttl_sec; +} + + +String S3QueueTableMetadata::toString() const +{ + Poco::JSON::Object json; + json.set("after_processing", after_processing); + json.set("mode", mode); + json.set("s3queue_tracked_files_limit", s3queue_tracked_files_limit); + json.set("s3queue_tracked_file_ttl_sec", s3queue_tracked_file_ttl_sec); + json.set("format_name", format_name); + + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + oss.exceptions(std::ios::failbit); + Poco::JSON::Stringifier::stringify(json, oss); + return oss.str(); +} + +void S3QueueTableMetadata::read(const String & metadata_str) +{ + Poco::JSON::Parser parser; + auto json = parser.parse(metadata_str).extract(); + after_processing = json->getValue("after_processing"); + mode = json->getValue("mode"); + s3queue_tracked_files_limit = json->getValue("s3queue_tracked_files_limit"); + s3queue_tracked_file_ttl_sec = json->getValue("s3queue_tracked_file_ttl_sec"); + format_name = json->getValue("format_name"); +} + +S3QueueTableMetadata S3QueueTableMetadata::parse(const String & metadata_str) +{ + S3QueueTableMetadata metadata; + metadata.read(metadata_str); + return metadata; +} + + +void S3QueueTableMetadata::checkImmutableFieldsEquals(const S3QueueTableMetadata & from_zk) const +{ + if (after_processing != from_zk.after_processing) + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs " + "in action after processing. Stored in ZooKeeper: {}, local: {}", + DB::toString(from_zk.after_processing), + DB::toString(after_processing)); + + if (mode != from_zk.mode) + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in engine mode. " + "Stored in ZooKeeper: {}, local: {}", + DB::toString(from_zk.after_processing), + DB::toString(after_processing)); + + if (s3queue_tracked_files_limit != from_zk.s3queue_tracked_files_limit) + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in max set size. " + "Stored in ZooKeeper: {}, local: {}", + from_zk.s3queue_tracked_files_limit, + s3queue_tracked_files_limit); + + if (s3queue_tracked_file_ttl_sec != from_zk.s3queue_tracked_file_ttl_sec) + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in max set age. " + "Stored in ZooKeeper: {}, local: {}", + from_zk.s3queue_tracked_file_ttl_sec, + s3queue_tracked_file_ttl_sec); + + if (format_name != from_zk.format_name) + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in format name. " + "Stored in ZooKeeper: {}, local: {}", + from_zk.format_name, + format_name); +} + +void S3QueueTableMetadata::checkEquals(const S3QueueTableMetadata & from_zk) const +{ + checkImmutableFieldsEquals(from_zk); +} + +} + +#endif diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.h b/src/Storages/S3Queue/S3QueueTableMetadata.h new file mode 100644 index 00000000000..4b6fbc54825 --- /dev/null +++ b/src/Storages/S3Queue/S3QueueTableMetadata.h @@ -0,0 +1,43 @@ +#pragma once + +#if USE_AWS_S3 + +# include +# include +# include + +namespace DB +{ + +class WriteBuffer; +class ReadBuffer; + +/** The basic parameters of S3Queue table engine for saving in ZooKeeper. + * Lets you verify that they match local ones. + */ +struct S3QueueTableMetadata +{ + String format_name; + String after_processing; + String mode; + UInt64 s3queue_tracked_files_limit; + UInt64 s3queue_tracked_file_ttl_sec; + + S3QueueTableMetadata() = default; + S3QueueTableMetadata(const StorageS3::Configuration & configuration, const S3QueueSettings & engine_settings); + + void read(const String & metadata_str); + static S3QueueTableMetadata parse(const String & metadata_str); + + String toString() const; + + void checkEquals(const S3QueueTableMetadata & from_zk) const; + +private: + void checkImmutableFieldsEquals(const S3QueueTableMetadata & from_zk) const; +}; + + +} + +#endif diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp new file mode 100644 index 00000000000..673c0dde1d2 --- /dev/null +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -0,0 +1,640 @@ +#include "config.h" + + +#if USE_AWS_S3 + +# include +# include +# include +# include +# include +# include +# include +# include +# include "IO/ParallelReadBuffer.h" + +# include + +# include + +# include + +# include +# include + +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + + +# include + +# include +# include +# include + +# include + +# include + +# include + +# include +# include +# include +# include + +namespace fs = std::filesystem; + +namespace ProfileEvents +{ +extern const Event S3DeleteObjects; +extern const Event S3ListObjects; +} + +namespace DB +{ + +static const String PARTITION_ID_WILDCARD = "{_partition_id}"; +static const auto MAX_THREAD_WORK_DURATION_MS = 60000; + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; + extern const int S3_ERROR; + extern const int NOT_IMPLEMENTED; + extern const int QUERY_NOT_ALLOWED; + extern const int NO_ZOOKEEPER; + extern const int REPLICA_ALREADY_EXISTS; + extern const int INCOMPATIBLE_COLUMNS; +} + + +StorageS3Queue::StorageS3Queue( + std::unique_ptr s3queue_settings_, + const StorageS3::Configuration & configuration_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const String & comment, + ContextPtr context_, + std::optional format_settings_, + ASTPtr partition_by_) + : IStorage(table_id_) + , WithContext(context_) + , s3queue_settings(std::move(s3queue_settings_)) + , after_processing(s3queue_settings->after_processing) + , configuration{configuration_} + , reschedule_processing_interval_ms(s3queue_settings->s3queue_polling_min_timeout_ms) + , format_settings(format_settings_) + , partition_by(partition_by_) + , log(&Poco::Logger::get("StorageS3Queue (" + table_id_.table_name + ")")) +{ + if (configuration.url.key.ends_with('/')) + configuration.url.key += '*'; + + if (!withGlobs()) + throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "S3Queue url must either end with '/' or contain globs"); + + String setting_zk_path = s3queue_settings->keeper_path; + if (setting_zk_path.empty()) + { + auto database = DatabaseCatalog::instance().getDatabase(table_id_.database_name); + bool is_in_replicated_database = database->getEngineName() == "Replicated"; + + auto default_path = getContext()->getSettingsRef().s3queue_default_zookeeper_path.value; + String zk_path_prefix; + + if (!default_path.empty()) + { + zk_path_prefix = default_path; + } + else if (is_in_replicated_database) + { + LOG_INFO(log, "S3Queue engine zookeeper path is not specified. " + "Using replicated database zookeeper path"); + + zk_path_prefix = fs::path(assert_cast(database.get())->getZooKeeperPath()) / "s3queue"; + } + else + { + throw Exception(ErrorCodes::NO_ZOOKEEPER, + "S3Queue keeper_path engine setting not specified, " + "s3queue_default_zookeeper_path_prefix not specified"); + } + + zk_path = zkutil::extractZooKeeperPath( + fs::path(zk_path_prefix) / toString(table_id_.uuid), /* check_starts_with_slash */ true, log); + } + else + { + /// We do not add table uuid here on purpose. + zk_path = zkutil::extractZooKeeperPath(s3queue_settings->keeper_path.value, /* check_starts_with_slash */ true, log); + } + + LOG_INFO(log, "Using zookeeper path: {}", zk_path); + + FormatFactory::instance().checkFormatName(configuration.format); + context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.url.uri); + StorageInMemoryMetadata storage_metadata; + configuration.update(context_); + + if (columns_.empty()) + { + auto columns = StorageS3::getTableStructureFromDataImpl(configuration, format_settings, context_); + storage_metadata.setColumns(columns); + } + else + storage_metadata.setColumns(columns_); + + storage_metadata.setConstraints(constraints_); + storage_metadata.setComment(comment); + setInMemoryMetadata(storage_metadata); + + auto metadata_snapshot = getInMemoryMetadataPtr(); + const bool is_first_replica = createTableIfNotExists(metadata_snapshot); + + if (!is_first_replica) + { + checkTableStructure(zk_path, metadata_snapshot); + } + + files_metadata = std::make_shared(this, *s3queue_settings); + + auto default_virtuals = NamesAndTypesList{ + {"_path", std::make_shared(std::make_shared())}, + {"_file", std::make_shared(std::make_shared())}}; + + auto columns = storage_metadata.getSampleBlock().getNamesAndTypesList(); + virtual_columns = getVirtualsForStorage(columns, default_virtuals); + for (const auto & column : virtual_columns) + virtual_block.insert({column.type->createColumn(), column.type, column.name}); + + auto poll_thread = getContext()->getSchedulePool().createTask("S3QueueStreamingTask", [this] { threadFunc(); }); + task = std::make_shared(std::move(poll_thread)); +} + + +bool StorageS3Queue::supportsSubcolumns() const +{ + return true; +} + +bool StorageS3Queue::supportsSubsetOfColumns() const +{ + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format); +} + +Pipe StorageS3Queue::read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum /*processed_stage*/, + size_t max_block_size, + size_t /* num_streams */) +{ + if (!local_context->getSettingsRef().stream_like_engine_allow_direct_select) + throw Exception( + ErrorCodes::QUERY_NOT_ALLOWED, "Direct select is not allowed. To enable use setting `stream_like_engine_allow_direct_select`"); + + if (mv_attached) + throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Cannot read from StorageS3Queue with attached materialized views"); + + auto query_configuration = updateConfigurationAndGetCopy(local_context); + + std::shared_ptr iterator_wrapper = createFileIterator(local_context, query_info.query); + + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(), getVirtuals()); + + const size_t max_download_threads = local_context->getSettingsRef().max_download_threads; + + return Pipe(std::make_shared( + read_from_format_info, + configuration.format, + getName(), + local_context, + format_settings, + max_block_size, + query_configuration.request_settings, + configuration.compression_method, + query_configuration.client, + query_configuration.url.bucket, + query_configuration.url.version_id, + iterator_wrapper, + files_metadata, + after_processing, + max_download_threads)); +} + +SinkToStoragePtr StorageS3Queue::write(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, bool) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Write is not supported by storage {}", getName()); +} + +void StorageS3Queue::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Truncate is not supported by storage {}", getName()); +} + +NamesAndTypesList StorageS3Queue::getVirtuals() const +{ + return virtual_columns; +} + +bool StorageS3Queue::supportsPartitionBy() const +{ + return true; +} + +void StorageS3Queue::startup() +{ + if (task) + task->holder->activateAndSchedule(); +} + +void StorageS3Queue::shutdown() +{ + shutdown_called = true; + if (task) + { + task->stream_cancelled = true; + task->holder->deactivate(); + } +} + +size_t StorageS3Queue::getTableDependentCount() const +{ + auto table_id = getStorageID(); + // Check if at least one direct dependency is attached + return DatabaseCatalog::instance().getDependentViews(table_id).size(); +} + +bool StorageS3Queue::hasDependencies(const StorageID & table_id) +{ + // Check if all dependencies are attached + auto view_ids = DatabaseCatalog::instance().getDependentViews(table_id); + LOG_TEST(log, "Number of attached views {} for {}", view_ids.size(), table_id.getNameForLogs()); + + if (view_ids.empty()) + return false; + + // Check the dependencies are ready? + for (const auto & view_id : view_ids) + { + auto view = DatabaseCatalog::instance().tryGetTable(view_id, getContext()); + if (!view) + return false; + + // If it materialized view, check it's target table + auto * materialized_view = dynamic_cast(view.get()); + if (materialized_view && !materialized_view->tryGetTargetTable()) + return false; + } + + return true; +} + +void StorageS3Queue::threadFunc() +{ + bool reschedule = true; + try + { + auto table_id = getStorageID(); + + auto dependencies_count = getTableDependentCount(); + if (dependencies_count) + { + auto start_time = std::chrono::steady_clock::now(); + + mv_attached.store(true); + // Keep streaming as long as there are attached views and streaming is not cancelled + while (!task->stream_cancelled) + { + if (!hasDependencies(table_id)) + { + /// For this case, we can not wait for watch thread to wake up + reschedule = true; + break; + } + + LOG_DEBUG(log, "Started streaming to {} attached views", dependencies_count); + streamToViews(); + + auto ts = std::chrono::steady_clock::now(); + auto duration = std::chrono::duration_cast(ts - start_time); + if (duration.count() > MAX_THREAD_WORK_DURATION_MS) + { + LOG_TRACE(log, "Thread work duration limit exceeded. Reschedule."); + reschedule = true; + break; + } + + reschedule_processing_interval_ms = s3queue_settings->s3queue_polling_min_timeout_ms; + } + } + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + + mv_attached.store(false); + + if (reschedule && !shutdown_called) + { + LOG_TRACE(log, "Reschedule S3 Queue thread func."); + /// Reschedule with backoff. + if (reschedule_processing_interval_ms < s3queue_settings->s3queue_polling_max_timeout_ms) + reschedule_processing_interval_ms += s3queue_settings->s3queue_polling_backoff_ms; + task->holder->scheduleAfter(reschedule_processing_interval_ms); + } +} + + +void StorageS3Queue::streamToViews() +{ + auto table_id = getStorageID(); + auto table = DatabaseCatalog::instance().getTable(table_id, getContext()); + if (!table) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Engine table {} doesn't exist.", table_id.getNameForLogs()); + + auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); + + // Create an INSERT query for streaming data + auto insert = std::make_shared(); + insert->table_id = table_id; + + size_t block_size = 100; + + auto s3queue_context = Context::createCopy(getContext()); + s3queue_context->makeQueryContext(); + auto query_configuration = updateConfigurationAndGetCopy(s3queue_context); + + // Create a stream for each consumer and join them in a union stream + // Only insert into dependent views and expect that input blocks contain virtual columns + InterpreterInsertQuery interpreter(insert, s3queue_context, false, true, true); + auto block_io = interpreter.execute(); + auto column_names = block_io.pipeline.getHeader().getNames(); + + // Create a stream for each consumer and join them in a union stream + + std::shared_ptr iterator_wrapper = createFileIterator(s3queue_context, nullptr); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(), getVirtuals()); + const size_t max_download_threads = s3queue_context->getSettingsRef().max_download_threads; + + auto pipe = Pipe(std::make_shared( + read_from_format_info, + configuration.format, + getName(), + s3queue_context, + format_settings, + block_size, + query_configuration.request_settings, + configuration.compression_method, + query_configuration.client, + query_configuration.url.bucket, + query_configuration.url.version_id, + iterator_wrapper, + files_metadata, + after_processing, + max_download_threads)); + + + std::atomic_size_t rows = 0; + { + block_io.pipeline.complete(std::move(pipe)); + block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); }); + CompletedPipelineExecutor executor(block_io.pipeline); + executor.execute(); + } +} + +StorageS3Queue::Configuration StorageS3Queue::updateConfigurationAndGetCopy(ContextPtr local_context) +{ + configuration.update(local_context); + return configuration; +} + +zkutil::ZooKeeperPtr StorageS3Queue::getZooKeeper() const +{ + std::lock_guard lock{zk_mutex}; + if (!zk_client || zk_client->expired()) + { + zk_client = getContext()->getZooKeeper(); + zk_client->sync(zk_path); + } + return zk_client; +} + + +bool StorageS3Queue::createTableIfNotExists(const StorageMetadataPtr & metadata_snapshot) +{ + auto zookeeper = getZooKeeper(); + zookeeper->createAncestors(zk_path); + + for (size_t i = 0; i < zk_create_table_retries; ++i) + { + Coordination::Requests ops; + bool is_first_replica = true; + if (zookeeper->exists(zk_path + "/metadata")) + { + if (!zookeeper->exists(zk_path + "/processing")) + ops.emplace_back(zkutil::makeCreateRequest(zk_path + "/processing", "", zkutil::CreateMode::Ephemeral)); + LOG_DEBUG(log, "This table {} is already created, will use existing metadata for checking engine settings", zk_path); + is_first_replica = false; + } + else + { + String metadata_str = S3QueueTableMetadata(configuration, *s3queue_settings).toString(); + ops.emplace_back(zkutil::makeCreateRequest(zk_path, "", zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(zk_path + "/processed", "", zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(zk_path + "/failed", "", zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(zk_path + "/processing", "", zkutil::CreateMode::Ephemeral)); + ops.emplace_back(zkutil::makeCreateRequest( + zk_path + "/columns", metadata_snapshot->getColumns().toString(), zkutil::CreateMode::Persistent)); + + ops.emplace_back(zkutil::makeCreateRequest(zk_path + "/metadata", metadata_str, zkutil::CreateMode::Persistent)); + } + + Coordination::Responses responses; + auto code = zookeeper->tryMulti(ops, responses); + if (code == Coordination::Error::ZNODEEXISTS) + { + LOG_INFO(log, "It looks like the table {} was created by another server at the same moment, will retry", zk_path); + continue; + } + else if (code != Coordination::Error::ZOK) + { + zkutil::KeeperMultiException::check(code, ops, responses); + } + + return is_first_replica; + } + + throw Exception( + ErrorCodes::REPLICA_ALREADY_EXISTS, + "Cannot create table, because it is created concurrently every time or because " + "of wrong zk_path or because of logical error"); +} + + +/** Verify that list of columns and table settings match those specified in ZK (/metadata). + * If not, throw an exception. + */ +void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const StorageMetadataPtr & metadata_snapshot) +{ + auto zookeeper = getZooKeeper(); + + S3QueueTableMetadata old_metadata(configuration, *s3queue_settings); + + Coordination::Stat metadata_stat; + String metadata_str = zookeeper->get(fs::path(zookeeper_prefix) / "metadata", &metadata_stat); + auto metadata_from_zk = S3QueueTableMetadata::parse(metadata_str); + old_metadata.checkEquals(metadata_from_zk); + + Coordination::Stat columns_stat; + auto columns_from_zk = ColumnsDescription::parse(zookeeper->get(fs::path(zookeeper_prefix) / "columns", &columns_stat)); + + const ColumnsDescription & old_columns = metadata_snapshot->getColumns(); + if (columns_from_zk != old_columns) + { + throw Exception( + ErrorCodes::INCOMPATIBLE_COLUMNS, + "Table columns structure in ZooKeeper is different from local table structure. Local columns:\n" + "{}\nZookeeper columns:\n{}", + old_columns.toString(), + columns_from_zk.toString()); + } +} + + +std::shared_ptr +StorageS3Queue::createFileIterator(ContextPtr local_context, ASTPtr query) +{ + auto it = std::make_shared( + *configuration.client, + configuration.url, + query, + virtual_block, + local_context, + s3queue_settings->s3queue_polling_size.value, + configuration.request_settings); + + auto zookeeper = getZooKeeper(); + auto lock = files_metadata->acquireLock(zookeeper); + S3QueueFilesMetadata::S3FilesCollection files_to_skip = files_metadata->getProcessedFailedAndProcessingFiles(); + + Strings files_to_process; + if (s3queue_settings->mode == S3QueueMode::UNORDERED) + { + files_to_process = it->filterProcessingFiles(s3queue_settings->mode, files_to_skip); + } + else + { + String max_processed_file = files_metadata->getMaxProcessedFile(); + files_to_process = it->filterProcessingFiles(s3queue_settings->mode, files_to_skip, max_processed_file); + } + + LOG_TEST(log, "Found files to process: {}", fmt::join(files_to_process, ", ")); + + files_metadata->setFilesProcessing(files_to_process); + return it; +} + +void StorageS3Queue::drop() +{ + auto zookeeper = getZooKeeper(); + if (zookeeper->exists(zk_path)) + zookeeper->removeRecursive(zk_path); +} + +void registerStorageS3QueueImpl(const String & name, StorageFactory & factory) +{ + factory.registerStorage( + name, + [](const StorageFactory::Arguments & args) + { + auto & engine_args = args.engine_args; + if (engine_args.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); + auto configuration = StorageS3::getConfiguration(engine_args, args.getLocalContext()); + + // Use format settings from global server context + settings from + // the SETTINGS clause of the create query. Settings from current + // session and user are ignored. + std::optional format_settings; + + auto s3queue_settings = std::make_unique(); + if (args.storage_def->settings) + { + s3queue_settings->loadFromQuery(*args.storage_def); + FormatFactorySettings user_format_settings; + + // Apply changed settings from global context, but ignore the + // unknown ones, because we only have the format settings here. + const auto & changes = args.getContext()->getSettingsRef().changes(); + for (const auto & change : changes) + { + if (user_format_settings.has(change.name)) + user_format_settings.set(change.name, change.value); + else + LOG_TRACE(&Poco::Logger::get("StorageS3"), "Remove: {}", change.name); + args.storage_def->settings->changes.removeSetting(change.name); + } + + for (const auto & change : args.storage_def->settings->changes) + { + if (user_format_settings.has(change.name)) + user_format_settings.applyChange(change); + } + format_settings = getFormatSettings(args.getContext(), user_format_settings); + } + else + { + format_settings = getFormatSettings(args.getContext()); + } + + ASTPtr partition_by; + if (args.storage_def->partition_by) + partition_by = args.storage_def->partition_by->clone(); + + return std::make_shared( + std::move(s3queue_settings), + std::move(configuration), + args.table_id, + args.columns, + args.constraints, + args.comment, + args.getContext(), + format_settings, + partition_by); + }, + { + .supports_settings = true, + .supports_sort_order = true, // for partition by + .supports_schema_inference = true, + .source_access_type = AccessType::S3, + }); +} + +void registerStorageS3Queue(StorageFactory & factory) +{ + return registerStorageS3QueueImpl("S3Queue", factory); +} + +} + + +#endif diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/S3Queue/StorageS3Queue.h new file mode 100644 index 00000000000..9737d5fcefa --- /dev/null +++ b/src/Storages/S3Queue/StorageS3Queue.h @@ -0,0 +1,146 @@ +#pragma once + +#include "config.h" + +#if USE_AWS_S3 + +# include + +# include +# include + +# include +# include +# include +# include +# include +# include + +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + +namespace Aws::S3 +{ +class Client; +} + +namespace DB +{ + + +class StorageS3Queue : public IStorage, WithContext +{ +public: + using Configuration = typename StorageS3::Configuration; + + StorageS3Queue( + std::unique_ptr s3queue_settings_, + const Configuration & configuration_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const String & comment, + ContextPtr context_, + std::optional format_settings_, + ASTPtr partition_by_ = nullptr); + + String getName() const override { return "S3Queue"; } + + Pipe read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) override; + + SinkToStoragePtr write( + const ASTPtr & query, + const StorageMetadataPtr & metadata_snapshot, + ContextPtr context, + bool async_insert) override; + + void truncate( + const ASTPtr & /*query*/, + const StorageMetadataPtr & /*metadata_snapshot*/, + ContextPtr /*local_context*/, + TableExclusiveLockHolder &) override; + + NamesAndTypesList getVirtuals() const override; + + bool supportsPartitionBy() const override; + + const auto & getFormatName() const { return configuration.format; } + + const String & getZooKeeperPath() const { return zk_path; } + + zkutil::ZooKeeperPtr getZooKeeper() const; + +private: + const std::unique_ptr s3queue_settings; + const S3QueueAction after_processing; + + std::shared_ptr files_metadata; + Configuration configuration; + NamesAndTypesList virtual_columns; + Block virtual_block; + UInt64 reschedule_processing_interval_ms; + + std::optional format_settings; + ASTPtr partition_by; + + String zk_path; + mutable zkutil::ZooKeeperPtr zk_client; + mutable std::mutex zk_mutex; + + std::atomic mv_attached = false; + std::atomic shutdown_called{false}; + Poco::Logger * log; + + bool supportsSubcolumns() const override; + bool withGlobs() const { return configuration.url.key.find_first_of("*?{") != std::string::npos; } + + void threadFunc(); + size_t getTableDependentCount() const; + bool hasDependencies(const StorageID & table_id); + + void startup() override; + void shutdown() override; + void drop() override; + + struct TaskContext + { + BackgroundSchedulePool::TaskHolder holder; + std::atomic stream_cancelled{false}; + explicit TaskContext(BackgroundSchedulePool::TaskHolder && task_) : holder(std::move(task_)) { } + }; + std::shared_ptr task; + + bool supportsSubsetOfColumns() const override; + + const UInt32 zk_create_table_retries = 1000; + bool createTableIfNotExists(const StorageMetadataPtr & metadata_snapshot); + void checkTableStructure(const String & zookeeper_prefix, const StorageMetadataPtr & metadata_snapshot); + + using KeysWithInfo = StorageS3QueueSource::KeysWithInfo; + + std::shared_ptr + createFileIterator(ContextPtr local_context, ASTPtr query); + + void streamToViews(); + Configuration updateConfigurationAndGetCopy(ContextPtr local_context); +}; + +} + +#endif diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 0f75562e0c1..8a3451e0662 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -262,6 +262,9 @@ struct SelectQueryInfo // If limit is not 0, that means it's a trivial limit query. UInt64 limit = 0; + /// For IStorageSystemOneBlock + std::vector columns_mask; + InputOrderInfoPtr getInputOrderInfo() const { return input_order_info ? input_order_info : (projection ? projection->input_order_info : nullptr); diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 365de2611ce..db94e76b568 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -86,7 +87,7 @@ const std::unordered_set optional_configuration_keys = { bool isConnectionString(const std::string & candidate) { - return candidate.starts_with("DefaultEndpointsProtocol"); + return !candidate.starts_with("http"); } } @@ -257,7 +258,7 @@ void registerStorageAzureBlob(StorageFactory & factory) throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); auto configuration = StorageAzureBlob::getConfiguration(engine_args, args.getLocalContext()); - auto client = StorageAzureBlob::createClient(configuration); + auto client = StorageAzureBlob::createClient(configuration, /* is_read_only */ false); // Use format settings from global server context + settings from // the SETTINGS clause of the create query. Settings from current // session and user are ignored. @@ -299,6 +300,7 @@ void registerStorageAzureBlob(StorageFactory & factory) args.constraints, args.comment, format_settings, + /* distributed_processing */ false, partition_by); }, { @@ -309,58 +311,113 @@ void registerStorageAzureBlob(StorageFactory & factory) }); } -AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration configuration) +static bool containerExists(std::unique_ptr &blob_service_client, std::string container_name) +{ + Azure::Storage::Blobs::ListBlobContainersOptions options; + options.Prefix = container_name; + options.PageSizeHint = 1; + + auto containers_list_response = blob_service_client->ListBlobContainers(options); + auto containers_list = containers_list_response.BlobContainers; + + for (const auto & container : containers_list) + { + if (container_name == container.Name) + return true; + } + return false; +} + +AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration configuration, bool is_read_only) { AzureClientPtr result; if (configuration.is_connection_string) { + std::unique_ptr blob_service_client = std::make_unique(BlobServiceClient::CreateFromConnectionString(configuration.connection_url)); result = std::make_unique(BlobContainerClient::CreateFromConnectionString(configuration.connection_url, configuration.container)); - result->CreateIfNotExists(); - } - else - { - if (configuration.account_name.has_value() && configuration.account_key.has_value()) + bool container_exists = containerExists(blob_service_client,configuration.container); + + if (!container_exists) { - auto storage_shared_key_credential = std::make_shared(*configuration.account_name, *configuration.account_key); - auto blob_service_client = std::make_unique(configuration.connection_url, storage_shared_key_credential); + if (is_read_only) + throw Exception( + ErrorCodes::DATABASE_ACCESS_DENIED, + "AzureBlobStorage container does not exist '{}'", + configuration.container); + try { - result = std::make_unique(blob_service_client->CreateBlobContainer(configuration.container).Value); - } - catch (const Azure::Storage::StorageException & e) + result->CreateIfNotExists(); + } catch (const Azure::Storage::StorageException & e) { - if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict) - { - auto final_url = configuration.connection_url - + (configuration.connection_url.back() == '/' ? "" : "/") - + configuration.container; - - result = std::make_unique(final_url, storage_shared_key_credential); - } - else + if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict + && e.ReasonPhrase == "The specified container already exists.")) { throw; } } } + } + else + { + std::shared_ptr storage_shared_key_credential; + if (configuration.account_name.has_value() && configuration.account_key.has_value()) + { + storage_shared_key_credential + = std::make_shared(*configuration.account_name, *configuration.account_key); + } + + std::unique_ptr blob_service_client; + if (storage_shared_key_credential) + { + blob_service_client = std::make_unique(configuration.connection_url, storage_shared_key_credential); + } else { - auto managed_identity_credential = std::make_shared(); - auto blob_service_client = std::make_unique(configuration.connection_url, managed_identity_credential); + blob_service_client = std::make_unique(configuration.connection_url); + } + + bool container_exists = containerExists(blob_service_client,configuration.container); + + std::string final_url; + size_t pos = configuration.connection_url.find('?'); + if (pos != std::string::npos) + { + auto url_without_sas = configuration.connection_url.substr(0, pos); + final_url = url_without_sas + (url_without_sas.back() == '/' ? "" : "/") + configuration.container + + configuration.connection_url.substr(pos); + } + else + final_url + = configuration.connection_url + (configuration.connection_url.back() == '/' ? "" : "/") + configuration.container; + + if (container_exists) + { + if (storage_shared_key_credential) + result = std::make_unique(final_url, storage_shared_key_credential); + else + result = std::make_unique(final_url); + } + else + { + if (is_read_only) + throw Exception( + ErrorCodes::DATABASE_ACCESS_DENIED, + "AzureBlobStorage container does not exist '{}'", + configuration.container); try { result = std::make_unique(blob_service_client->CreateBlobContainer(configuration.container).Value); - } - catch (const Azure::Storage::StorageException & e) + } catch (const Azure::Storage::StorageException & e) { - if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict) + if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict + && e.ReasonPhrase == "The specified container already exists.") { - auto final_url = configuration.connection_url - + (configuration.connection_url.back() == '/' ? "" : "/") - + configuration.container; - - result = std::make_unique(final_url, managed_identity_credential); + if (storage_shared_key_credential) + result = std::make_unique(final_url, storage_shared_key_credential); + else + result = std::make_unique(final_url); } else { @@ -392,12 +449,13 @@ StorageAzureBlob::StorageAzureBlob( const ConstraintsDescription & constraints_, const String & comment, std::optional format_settings_, + bool distributed_processing_, ASTPtr partition_by_) : IStorage(table_id_) , name("AzureBlobStorage") , configuration(configuration_) , object_storage(std::move(object_storage_)) - , distributed_processing(false) + , distributed_processing(distributed_processing_) , format_settings(format_settings_) , partition_by(partition_by_) { @@ -407,7 +465,7 @@ StorageAzureBlob::StorageAzureBlob( StorageInMemoryMetadata storage_metadata; if (columns_.empty()) { - auto columns = getTableStructureFromData(object_storage.get(), configuration, format_settings, context); + auto columns = getTableStructureFromData(object_storage.get(), configuration, format_settings, context, distributed_processing); storage_metadata.setColumns(columns); } else @@ -438,7 +496,7 @@ void StorageAzureBlob::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont { throw Exception( ErrorCodes::DATABASE_ACCESS_DENIED, - "S3 key '{}' contains globs, so the table is in readonly mode", + "AzureBlobStorage key '{}' contains globs, so the table is in readonly mode", configuration.blob_path); } @@ -603,7 +661,7 @@ private: Pipe StorageAzureBlob::read( const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, + const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, ContextPtr local_context, QueryProcessingStage::Enum /*processed_stage*/, @@ -615,17 +673,13 @@ Pipe StorageAzureBlob::read( Pipes pipes; - std::unordered_set column_names_set(column_names.begin(), column_names.end()); - std::vector requested_virtual_columns; - - for (const auto & virtual_column : getVirtuals()) - { - if (column_names_set.contains(virtual_column.name)) - requested_virtual_columns.push_back(virtual_column); - } - std::shared_ptr iterator_wrapper; - if (configuration.withGlobs()) + if (distributed_processing) + { + iterator_wrapper = std::make_shared(local_context, + local_context->getReadTaskCallback()); + } + else if (configuration.withGlobs()) { /// Iterate through disclosed globs and make a source for each file iterator_wrapper = std::make_shared( @@ -639,44 +693,21 @@ Pipe StorageAzureBlob::read( query_info.query, virtual_block, local_context, nullptr, local_context->getFileProgressCallback()); } - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - auto fetch_columns = column_names; - const auto & virtuals = getVirtuals(); - std::erase_if( - fetch_columns, - [&](const String & col) - { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); }); - - if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); - - columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - block_for_format = storage_snapshot->metadata->getSampleBlock(); - } - + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(), getVirtuals()); for (size_t i = 0; i < num_streams; ++i) { pipes.emplace_back(std::make_shared( - requested_virtual_columns, + read_from_format_info, configuration.format, getName(), - block_for_format, local_context, format_settings, - columns_description, max_block_size, configuration.compression_method, object_storage.get(), configuration.container, - iterator_wrapper)); + iterator_wrapper, + query_info)); } return Pipe::unitePipes(std::move(pipes)); @@ -762,11 +793,6 @@ bool StorageAzureBlob::supportsPartitionBy() const return true; } -bool StorageAzureBlob::supportsSubcolumns() const -{ - return FormatFactory::instance().checkIfFormatSupportsSubcolumns(configuration.format); -} - bool StorageAzureBlob::supportsSubsetOfColumns() const { return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format); @@ -827,6 +853,7 @@ StorageAzureBlobSource::GlobIterator::GlobIterator( blobs_with_metadata.emplace_back(blob_path_with_globs, object_metadata); if (outer_blobs) outer_blobs->emplace_back(blobs_with_metadata.back()); + is_finished = true; return; } @@ -845,8 +872,10 @@ RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next() { std::lock_guard lock(next_mutex); - if (is_finished) + if (is_finished && index >= blobs_with_metadata.size()) + { return {}; + } bool need_new_batch = blobs_with_metadata.empty() || index >= blobs_with_metadata.size(); @@ -1056,40 +1085,33 @@ Chunk StorageAzureBlobSource::generate() return {}; } -Block StorageAzureBlobSource::getHeader(Block sample_block, const std::vector & requested_virtual_columns) -{ - for (const auto & virtual_column : requested_virtual_columns) - sample_block.insert({virtual_column.type->createColumn(), virtual_column.type, virtual_column.name}); - - return sample_block; -} - StorageAzureBlobSource::StorageAzureBlobSource( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, const String & format_, String name_, - const Block & sample_block_, ContextPtr context_, std::optional format_settings_, - const ColumnsDescription & columns_, UInt64 max_block_size_, String compression_hint_, AzureObjectStorage * object_storage_, const String & container_, - std::shared_ptr file_iterator_) - :ISource(getHeader(sample_block_, requested_virtual_columns_), false) + std::shared_ptr file_iterator_, + const SelectQueryInfo & query_info_) + :ISource(info.source_header, false) , WithContext(context_) - , requested_virtual_columns(requested_virtual_columns_) + , requested_columns(info.requested_columns) + , requested_virtual_columns(info.requested_virtual_columns) , format(format_) , name(std::move(name_)) - , sample_block(sample_block_) + , sample_block(info.format_header) , format_settings(format_settings_) - , columns_desc(columns_) + , columns_desc(info.columns_description) , max_block_size(max_block_size_) , compression_hint(compression_hint_) , object_storage(std::move(object_storage_)) , container(container_) , file_iterator(file_iterator_) + , query_info(query_info_) , create_reader_pool(CurrentMetrics::ObjectStorageAzureThreads, CurrentMetrics::ObjectStorageAzureThreadsActive, 1) , create_reader_scheduler(threadPoolCallbackRunner(create_reader_pool, "AzureReader")) { @@ -1123,6 +1145,7 @@ StorageAzureBlobSource::ReaderHolder StorageAzureBlobSource::createReader() format, *read_buf, sample_block, getContext(), max_block_size, format_settings, std::nullopt, std::nullopt, /* is_remote_fs */ true, compression_method); + input_format->setQueryInfo(query_info, getContext()); QueryPipelineBuilder builder; builder.init(Pipe(input_format)); @@ -1134,6 +1157,13 @@ StorageAzureBlobSource::ReaderHolder StorageAzureBlobSource::createReader() { return std::make_shared(header, columns_desc, *input_format, getContext()); }); } + /// Add ExtractColumnsTransform to extract requested columns/subcolumns + /// from chunk read by IInputFormat. + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, requested_columns); + }); + auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); auto current_reader = std::make_unique(*pipeline); @@ -1168,11 +1198,17 @@ ColumnsDescription StorageAzureBlob::getTableStructureFromData( AzureObjectStorage * object_storage, const Configuration & configuration, const std::optional & format_settings, - ContextPtr ctx) + ContextPtr ctx, + bool distributed_processing) { RelativePathsWithMetadata read_keys; std::shared_ptr file_iterator; - if (configuration.withGlobs()) + if (distributed_processing) + { + file_iterator = std::make_shared(ctx, + ctx->getReadTaskCallback()); + } + else if (configuration.withGlobs()) { file_iterator = std::make_shared( object_storage, configuration.container, configuration.blob_path, nullptr, Block{}, ctx, &read_keys); @@ -1203,7 +1239,7 @@ ColumnsDescription StorageAzureBlob::getTableStructureFromData( return nullptr; } - /// S3 file iterator could get new keys after new iteration, check them in schema cache. + ///AzureBlobStorage file iterator could get new keys after new iteration, check them in schema cache. if (ctx->getSettingsRef().schema_inference_use_cache_for_azure && read_keys.size() > prev_read_keys_size) { columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end(), configuration, format_settings, ctx); diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index ad87da1f61a..b799f7c0aea 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -11,6 +11,8 @@ #include #include #include +#include +#include namespace DB { @@ -62,10 +64,11 @@ public: const ConstraintsDescription & constraints_, const String & comment, std::optional format_settings_, + bool distributed_processing_, ASTPtr partition_by_); static StorageAzureBlob::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context); - static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration); + static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration, bool is_read_only); static AzureObjectStorage::SettingsPtr createSettings(ContextPtr local_context); @@ -93,7 +96,7 @@ public: bool supportsPartitionBy() const override; - bool supportsSubcolumns() const override; + bool supportsSubcolumns() const override { return true; } bool supportsSubsetOfColumns() const override; @@ -107,7 +110,8 @@ public: AzureObjectStorage * object_storage, const Configuration & configuration, const std::optional & format_settings, - ContextPtr ctx); + ContextPtr ctx, + bool distributed_processing = false); private: std::string name; @@ -136,7 +140,6 @@ private: const String & format_name, const ContextPtr & ctx); - }; class StorageAzureBlobSource : public ISource, WithContext @@ -168,7 +171,7 @@ public: RelativePathWithMetadata next() override; ~GlobIterator() override = default; - private: + private: AzureObjectStorage * object_storage; std::string container; String blob_path_with_globs; @@ -193,6 +196,21 @@ public: std::function file_progress_callback; }; + class ReadIterator : public IIterator + { + public: + explicit ReadIterator(ContextPtr context_, + const ReadTaskCallback & callback_) + : IIterator(context_), callback(callback_) { } + RelativePathWithMetadata next() override + { + return {callback(), {}}; + } + + private: + ReadTaskCallback callback; + }; + class KeysIterator : public IIterator { public: @@ -222,18 +240,17 @@ public: }; StorageAzureBlobSource( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, const String & format_, String name_, - const Block & sample_block_, ContextPtr context_, std::optional format_settings_, - const ColumnsDescription & columns_, UInt64 max_block_size_, String compression_hint_, AzureObjectStorage * object_storage_, const String & container_, - std::shared_ptr file_iterator_); + std::shared_ptr file_iterator_, + const SelectQueryInfo & query_info_); ~StorageAzureBlobSource() override; @@ -241,10 +258,9 @@ public: String getName() const override; - static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); - private: - std::vector requested_virtual_columns; + NamesAndTypesList requested_columns; + NamesAndTypesList requested_virtual_columns; String format; String name; Block sample_block; @@ -255,6 +271,7 @@ private: AzureObjectStorage * object_storage; String container; std::shared_ptr file_iterator; + SelectQueryInfo query_info; struct ReaderHolder { diff --git a/src/Storages/StorageAzureBlobCluster.cpp b/src/Storages/StorageAzureBlobCluster.cpp new file mode 100644 index 00000000000..1a3f48731a7 --- /dev/null +++ b/src/Storages/StorageAzureBlobCluster.cpp @@ -0,0 +1,99 @@ +#include "Storages/StorageAzureBlobCluster.h" + +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +StorageAzureBlobCluster::StorageAzureBlobCluster( + const String & cluster_name_, + const StorageAzureBlob::Configuration & configuration_, + std::unique_ptr && object_storage_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + ContextPtr context_, + bool structure_argument_was_provided_) + : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageAzureBlobCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) + , configuration{configuration_} + , object_storage(std::move(object_storage_)) +{ + context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.getConnectionURL()); + StorageInMemoryMetadata storage_metadata; + + if (columns_.empty()) + { + /// `format_settings` is set to std::nullopt, because StorageAzureBlobCluster is used only as table function + auto columns = StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, /*format_settings=*/std::nullopt, context_, false); + storage_metadata.setColumns(columns); + } + else + storage_metadata.setColumns(columns_); + + storage_metadata.setConstraints(constraints_); + setInMemoryMetadata(storage_metadata); + + auto default_virtuals = NamesAndTypesList{ + {"_path", std::make_shared(std::make_shared())}, + {"_file", std::make_shared(std::make_shared())}}; + + auto columns = storage_metadata.getSampleBlock().getNamesAndTypesList(); + virtual_columns = getVirtualsForStorage(columns, default_virtuals); + for (const auto & column : virtual_columns) + virtual_block.insert({column.type->createColumn(), column.type, column.name}); +} + +void StorageAzureBlobCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) +{ + ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); + if (!expression_list) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function s3Cluster, got '{}'", queryToString(query)); + + TableFunctionAzureBlobStorageCluster::addColumnsStructureToArguments(expression_list->children, structure, context); +} + +RemoteQueryExecutor::Extension StorageAzureBlobCluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const +{ + auto iterator = std::make_shared( + object_storage.get(), configuration.container, configuration.blob_path, + query, virtual_block, context, nullptr); + auto callback = std::make_shared>([iterator]() mutable -> String{ return iterator->next().relative_path; }); + return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; +} + +NamesAndTypesList StorageAzureBlobCluster::getVirtuals() const +{ + return virtual_columns; +} + + +} + +#endif diff --git a/src/Storages/StorageAzureBlobCluster.h b/src/Storages/StorageAzureBlobCluster.h new file mode 100644 index 00000000000..6ef994fb873 --- /dev/null +++ b/src/Storages/StorageAzureBlobCluster.h @@ -0,0 +1,53 @@ +#pragma once + +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE + +#include +#include + +#include "Client/Connection.h" +#include +#include +#include + +namespace DB +{ + +class Context; + +class StorageAzureBlobCluster : public IStorageCluster +{ +public: + StorageAzureBlobCluster( + const String & cluster_name_, + const StorageAzureBlob::Configuration & configuration_, + std::unique_ptr && object_storage_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + ContextPtr context_, + bool structure_argument_was_provided_); + + std::string getName() const override { return "AzureBlobStorageCluster"; } + + NamesAndTypesList getVirtuals() const override; + + RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override; + +private: + void updateBeforeRead(const ContextPtr & /*context*/) override {} + + void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override; + + StorageAzureBlob::Configuration configuration; + NamesAndTypesList virtual_columns; + Block virtual_block; + std::unique_ptr object_storage; +}; + + +} + +#endif diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 9c05afd5284..eb154d0d943 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -996,8 +996,11 @@ void StorageBuffer::reschedule() std::unique_lock lock(buffer.tryLock()); if (lock.owns_lock()) { - min_first_write_time = buffer.first_write_time; - rows += buffer.data.rows(); + if (buffer.data) + { + min_first_write_time = std::min(min_first_write_time, buffer.first_write_time); + rows += buffer.data.rows(); + } } } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index a7aeb11e2d8..6f0072c4560 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -691,7 +691,11 @@ QueryTreeNodePtr buildQueryTreeDistributed(SelectQueryInfo & query_info, if (remote_storage_id.hasDatabase()) resolved_remote_storage_id = query_context->resolveStorageID(remote_storage_id); - auto storage = std::make_shared(resolved_remote_storage_id, distributed_storage_snapshot->metadata->getColumns(), distributed_storage_snapshot->object_columns); + auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); + + auto column_names_and_types = distributed_storage_snapshot->getColumns(get_column_options); + + auto storage = std::make_shared(resolved_remote_storage_id, ColumnsDescription{column_names_and_types}); auto table_node = std::make_shared(std::move(storage), query_context); if (table_expression_modifiers) @@ -1434,6 +1438,12 @@ ActionLock StorageDistributed::getActionLock(StorageActionBlockType type) void StorageDistributed::flushAndPrepareForShutdown() { + if (!getDistributedSettingsRef().flush_on_detach) + { + LOG_INFO(log, "Skip flushing data (due to flush_on_detach=0)"); + return; + } + try { flushClusterNodesAllData(getContext()); diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index 9fabf1a9fb6..df03301b5e8 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -92,6 +92,8 @@ StorageExecutable::StorageExecutable( .command_termination_timeout_seconds = settings.command_termination_timeout, .command_read_timeout_milliseconds = settings.command_read_timeout, .command_write_timeout_milliseconds = settings.command_write_timeout, + .stderr_reaction = settings.stderr_reaction, + .check_exit_code = settings.check_exit_code, .pool_size = settings.pool_size, .max_command_execution_time_seconds = settings.max_command_execution_time, diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 7e5e9d2b38c..6ec40b86c05 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -21,6 +22,8 @@ #include #include #include +#include +#include #include #include @@ -28,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -55,7 +59,6 @@ #include #include - namespace ProfileEvents { extern const Event CreatedReadBufferOrdinary; @@ -377,10 +380,33 @@ std::unique_ptr createReadBuffer( bool use_table_fd, int table_fd, const String & compression_method, - ContextPtr context) + ContextPtr context, + const String & path_to_archive = "") { CompressionMethod method; + if (!path_to_archive.empty()) + { + auto reader = createArchiveReader(path_to_archive); + + if (current_path.find_first_of("*?{") != std::string::npos) + { + auto matcher = std::make_shared(makeRegexpPatternFromGlobs(current_path)); + if (!matcher->ok()) + throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, + "Cannot compile regex from glob ({}): {}", current_path, matcher->error()); + + return reader->readFile([my_matcher = std::move(matcher)](const std::string & path) + { + return re2::RE2::FullMatch(path, *my_matcher); + }, /*throw_on_not_found=*/true); + } + else + { + return reader->readFile(current_path, /*throw_on_not_found=*/true); + } + } + if (use_table_fd) method = chooseCompressionMethod("", compression_method); else @@ -469,7 +495,8 @@ ColumnsDescription StorageFile::getTableStructureFromFile( const std::vector & paths, const String & compression_method, const std::optional & format_settings, - ContextPtr context) + ContextPtr context, + const std::vector & paths_to_archive) { if (format == "Distributed") { @@ -489,30 +516,62 @@ ColumnsDescription StorageFile::getTableStructureFromFile( if (context->getSettingsRef().schema_inference_use_cache_for_file) columns_from_cache = tryGetColumnsFromCache(paths, format, format_settings, context); - ReadBufferIterator read_buffer_iterator = [&, it = paths.begin(), first = true](ColumnsDescription &) mutable -> std::unique_ptr + ReadBufferIterator read_buffer_iterator; + if (paths_to_archive.empty()) { - String path; - struct stat file_stat; - do + read_buffer_iterator = [&, it = paths.begin(), first = true](ColumnsDescription &) mutable -> std::unique_ptr { - if (it == paths.end()) + String path; + struct stat file_stat; + do { - if (first) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually", - format); - return nullptr; + if (it == paths.end()) + { + if (first) + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually", + format); + return nullptr; + } + + path = *it++; + file_stat = getFileStat(path, false, -1, "File"); } + while (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0); - path = *it++; - file_stat = getFileStat(path, false, -1, "File"); - } - while (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0); + first = false; + return createReadBuffer(path, file_stat, false, -1, compression_method, context); + }; + } + else + { + read_buffer_iterator = [&, path_it = paths.begin(), archive_it = paths_to_archive.begin(), first = true](ColumnsDescription &) mutable -> std::unique_ptr + { + String path; + struct stat file_stat; + do + { + if (archive_it == paths_to_archive.end()) + { + if (first) + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually", + format); + return nullptr; + } - first = false; - return createReadBuffer(path, file_stat, false, -1, compression_method, context); - }; + path = *archive_it++; + file_stat = getFileStat(path, false, -1, "File"); + } + while (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0); + + first = false; + return createReadBuffer(*path_it, file_stat, false, -1, compression_method, context, path); + + }; + } ColumnsDescription columns; if (columns_from_cache) @@ -564,8 +623,17 @@ StorageFile::StorageFile(int table_fd_, CommonArguments args) StorageFile::StorageFile(const std::string & table_path_, const std::string & user_files_path, CommonArguments args) : StorageFile(args) { + if (!args.path_to_archive.empty()) + { + paths_to_archive = getPathsList(args.path_to_archive, user_files_path, args.getContext(), total_bytes_to_read); + paths = {table_path_}; + } + else + { + paths = getPathsList(table_path_, user_files_path, args.getContext(), total_bytes_to_read); + } + is_db_table = false; - paths = getPathsList(table_path_, user_files_path, args.getContext(), total_bytes_to_read); is_path_with_globs = paths.size() > 1; if (!paths.empty()) path_for_partitioned_write = paths.front(); @@ -619,7 +687,7 @@ void StorageFile::setStorageMetadata(CommonArguments args) columns = getTableStructureFromFileDescriptor(args.getContext()); else { - columns = getTableStructureFromFile(format_name, paths, compression_method, format_settings, args.getContext()); + columns = getTableStructureFromFile(format_name, paths, compression_method, format_settings, args.getContext(), paths_to_archive); if (!args.columns.empty() && args.columns != columns) throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Table structure and file structure are different"); } @@ -649,57 +717,81 @@ using StorageFilePtr = std::shared_ptr; class StorageFileSource : public ISource { public: - struct FilesInfo + class FilesIterator { + public: + explicit FilesIterator( + const Strings & files_, std::vector archives_, const IArchiveReader::NameFilter & name_filter_) + : files(files_), archives(std::move(archives_)), name_filter(name_filter_) + { + } + + String next() + { + const auto & fs = fromArchive() ? archives : files; + + auto current_index = index.fetch_add(1, std::memory_order_relaxed); + if (current_index >= fs.size()) + return ""; + + return fs[current_index]; + } + + bool fromArchive() const + { + return !archives.empty(); + } + + bool readSingleFileFromArchive() const + { + return !name_filter; + } + + bool passesFilter(const std::string & name) const + { + std::lock_guard lock(filter_mutex); + return name_filter(name); + } + + const String & getFileName() + { + if (files.size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected only 1 filename but got {}", files.size()); + + return files[0]; + } + private: std::vector files; - std::atomic next_file_to_read = 0; + std::vector archives; + mutable std::mutex filter_mutex; + IArchiveReader::NameFilter name_filter; - bool need_path_column = false; - bool need_file_column = false; - - size_t total_bytes_to_read = 0; + std::atomic index = 0; }; - using FilesInfoPtr = std::shared_ptr; - - static Block getBlockForSource(const Block & block_for_format, const FilesInfoPtr & files_info) - { - auto res = block_for_format; - if (files_info->need_path_column) - { - res.insert( - {DataTypeLowCardinality{std::make_shared()}.createColumn(), - std::make_shared(std::make_shared()), - "_path"}); - } - if (files_info->need_file_column) - { - res.insert( - {DataTypeLowCardinality{std::make_shared()}.createColumn(), - std::make_shared(std::make_shared()), - "_file"}); - } - return res; - } + using FilesIteratorPtr = std::shared_ptr; StorageFileSource( + const ReadFromFormatInfo & info, std::shared_ptr storage_, const StorageSnapshotPtr & storage_snapshot_, ContextPtr context_, + const SelectQueryInfo & query_info_, UInt64 max_block_size_, - FilesInfoPtr files_info_, - ColumnsDescription columns_description_, - const Block & block_for_format_, + FilesIteratorPtr files_iterator_, std::unique_ptr read_buf_) - : ISource(getBlockForSource(block_for_format_, files_info_), false) + : ISource(info.source_header, false) , storage(std::move(storage_)) , storage_snapshot(storage_snapshot_) - , files_info(std::move(files_info_)) + , files_iterator(std::move(files_iterator_)) , read_buf(std::move(read_buf_)) - , columns_description(std::move(columns_description_)) - , block_for_format(block_for_format_) + , columns_description(info.columns_description) + , requested_columns(info.requested_columns) + , requested_virtual_columns(info.requested_virtual_columns) + , block_for_format(info.format_header) , context(context_) + , query_info(query_info_) , max_block_size(max_block_size_) { if (!storage->use_table_fd) @@ -784,11 +876,72 @@ public: { if (!storage->use_table_fd) { - auto current_file = files_info->next_file_to_read.fetch_add(1); - if (current_file >= files_info->files.size()) - return {}; + if (files_iterator->fromArchive()) + { + if (files_iterator->readSingleFileFromArchive()) + { + auto archive = files_iterator->next(); + if (archive.empty()) + return {}; - current_path = files_info->files[current_file]; + struct stat file_stat = getFileStat(archive, storage->use_table_fd, storage->table_fd, storage->getName()); + if (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0) + continue; + + archive_reader = createArchiveReader(archive); + current_path = files_iterator->getFileName(); + read_buf = archive_reader->readFile(current_path, /*throw_on_not_found=*/false); + if (!read_buf) + continue; + } + else + { + while (true) + { + if (file_enumerator == nullptr) + { + auto archive = files_iterator->next(); + if (archive.empty()) + return {}; + + struct stat file_stat = getFileStat(archive, storage->use_table_fd, storage->table_fd, storage->getName()); + if (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0) + continue; + + archive_reader = createArchiveReader(archive); + file_enumerator = archive_reader->firstFile(); + continue; + } + + bool file_found = true; + while (!files_iterator->passesFilter(file_enumerator->getFileName())) + { + if (!file_enumerator->nextFile()) + { + file_found = false; + break; + } + } + + if (file_found) + { + current_path = file_enumerator->getFileName(); + break; + } + + file_enumerator = nullptr; + } + + chassert(file_enumerator); + read_buf = archive_reader->readFile(std::move(file_enumerator)); + } + } + else + { + current_path = files_iterator->next(); + if (current_path.empty()) + return {}; + } /// Special case for distributed format. Defaults are not needed here. if (storage->format_name == "Distributed") @@ -801,9 +954,12 @@ public: if (!read_buf) { - auto file_stat = getFileStat(current_path, storage->use_table_fd, storage->table_fd, storage->getName()); + struct stat file_stat; + file_stat = getFileStat(current_path, storage->use_table_fd, storage->table_fd, storage->getName()); + if (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0) continue; + read_buf = createReadBuffer(current_path, file_stat, storage->use_table_fd, storage->table_fd, storage->compression_method, context); } @@ -811,6 +967,7 @@ public: chassert(!storage->paths.empty()); const auto max_parsing_threads = std::max(settings.max_threads/ storage->paths.size(), 1UL); input_format = context->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size, storage->format_settings, max_parsing_threads); + input_format->setQueryInfo(query_info, context); QueryPipelineBuilder builder; builder.init(Pipe(input_format)); @@ -823,6 +980,13 @@ public: }); } + /// Add ExtractColumnsTransform to extract requested columns/subcolumns + /// from chunk read by IInputFormat. + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, requested_columns); + }); + pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); reader = std::make_unique(*pipeline); @@ -838,19 +1002,19 @@ public: progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); /// Enrich with virtual columns. - if (files_info->need_path_column) - { - auto column = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, current_path); - chunk.addColumn(column->convertToFullColumnIfConst()); - } - if (files_info->need_file_column) + for (const auto & virtual_column : requested_virtual_columns) { - size_t last_slash_pos = current_path.find_last_of('/'); - auto file_name = current_path.substr(last_slash_pos + 1); - - auto column = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, std::move(file_name)); - chunk.addColumn(column->convertToFullColumnIfConst()); + if (virtual_column.name == "_path") + { + chunk.addColumn(virtual_column.type->createColumnConst(num_rows, current_path)->convertToFullColumnIfConst()); + } + else if (virtual_column.name == "_file") + { + size_t last_slash_pos = current_path.find_last_of('/'); + auto file_name = current_path.substr(last_slash_pos + 1); + chunk.addColumn(virtual_column.type->createColumnConst(num_rows, file_name)->convertToFullColumnIfConst()); + } } return chunk; @@ -864,6 +1028,10 @@ public: reader.reset(); pipeline.reset(); input_format.reset(); + + if (files_iterator->fromArchive() && !files_iterator->readSingleFileFromArchive()) + file_enumerator = archive_reader->nextFile(std::move(read_buf)); + read_buf.reset(); } @@ -874,7 +1042,7 @@ public: private: std::shared_ptr storage; StorageSnapshotPtr storage_snapshot; - FilesInfoPtr files_info; + FilesIteratorPtr files_iterator; String current_path; Block sample_block; std::unique_ptr read_buf; @@ -882,10 +1050,16 @@ private: std::unique_ptr pipeline; std::unique_ptr reader; + std::shared_ptr archive_reader; + std::unique_ptr file_enumerator = nullptr; + ColumnsDescription columns_description; + NamesAndTypesList requested_columns; + NamesAndTypesList requested_virtual_columns; Block block_for_format; ContextPtr context; /// TODO Untangle potential issues with context lifetime. + SelectQueryInfo query_info; UInt64 max_block_size; bool finished_generate = false; @@ -897,7 +1071,7 @@ private: Pipe StorageFile::read( const Names & column_names, const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & /*query_info*/, + SelectQueryInfo & query_info, ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, @@ -909,32 +1083,46 @@ Pipe StorageFile::read( } else { - if (paths.size() == 1 && !fs::exists(paths[0])) + const auto & p = paths_to_archive.empty() ? paths : paths_to_archive; + if (p.size() == 1 && !fs::exists(p[0])) { if (context->getSettingsRef().engine_file_empty_if_not_exists) return Pipe(std::make_shared(storage_snapshot->getSampleBlockForColumns(column_names))); else - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist", paths[0]); + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist", p[0]); } } - auto files_info = std::make_shared(); - files_info->files = paths; - files_info->total_bytes_to_read = total_bytes_to_read; - - for (const auto & column : column_names) + IArchiveReader::NameFilter filter; + if (!paths_to_archive.empty()) { - if (column == "_path") - files_info->need_path_column = true; - if (column == "_file") - files_info->need_file_column = true; + if (paths.size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Multiple paths defined for reading from archive"); + + const auto & path = paths[0]; + + if (path.find_first_of("*?{") != std::string::npos) + { + auto matcher = std::make_shared(makeRegexpPatternFromGlobs(path)); + if (!matcher->ok()) + throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, + "Cannot compile regex from glob ({}): {}", path, matcher->error()); + + filter = [matcher](const std::string & p) + { + return re2::RE2::FullMatch(p, *matcher); + }; + } } + auto files_iterator = std::make_shared(paths, paths_to_archive, std::move(filter)); auto this_ptr = std::static_pointer_cast(shared_from_this()); size_t num_streams = max_num_streams; - if (max_num_streams > paths.size()) - num_streams = paths.size(); + + auto files_to_read = std::max(paths_to_archive.size(), paths.size()); + if (max_num_streams > files_to_read) + num_streams = files_to_read; Pipes pipes; pipes.reserve(num_streams); @@ -945,33 +1133,10 @@ Pipe StorageFile::read( if (progress_callback) progress_callback(FileProgress(0, total_bytes_to_read)); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(), getVirtuals()); + for (size_t i = 0; i < num_streams; ++i) { - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - auto fetch_columns = column_names; - const auto & virtuals = getVirtuals(); - std::erase_if( - fetch_columns, - [&](const String & col) - { - return std::any_of( - virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col) { return col == virtual_col.name; }); - }); - - if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); - columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - } - - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - /// In case of reading from fd we have to check whether we have already created /// the read buffer from it in Storage constructor (for schema inference) or not. /// If yes, then we should use it in StorageFileSource. Atomic bool flag is needed @@ -981,13 +1146,13 @@ Pipe StorageFile::read( read_buffer = std::move(peekable_read_buffer_from_fd); pipes.emplace_back(std::make_shared( + read_from_format_info, this_ptr, storage_snapshot, context, + query_info, max_block_size, - files_info, - columns_description, - block_for_format, + files_iterator, std::move(read_buffer))); } @@ -1238,6 +1403,9 @@ SinkToStoragePtr StorageFile::write( ContextPtr context, bool /*async_insert*/) { + if (!use_table_fd && !paths_to_archive.empty()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Writing to archives is not supported"); + if (format_name == "Distributed") throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method write is not implemented for Distributed format"); @@ -1411,6 +1579,7 @@ void registerStorageFile(StorageFactory & factory) factory_args.constraints, factory_args.comment, {}, + {}, }; ASTs & engine_args_ast = factory_args.engine_args; @@ -1481,7 +1650,7 @@ void registerStorageFile(StorageFactory & factory) else if (type == Field::Types::UInt64) source_fd = static_cast(literal->value.get()); else if (type == Field::Types::String) - source_path = literal->value.get(); + StorageFile::parseFileSource(literal->value.get(), source_path, storage_args.path_to_archive); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument must be path or file descriptor"); } @@ -1553,4 +1722,32 @@ void StorageFile::addColumnsToCache( schema_cache.addMany(cache_keys, columns); } +void StorageFile::parseFileSource(String source, String & filename, String & path_to_archive) +{ + size_t pos = source.find("::"); + if (pos == String::npos) + { + filename = std::move(source); + return; + } + + std::string_view path_to_archive_view = std::string_view{source}.substr(0, pos); + while (path_to_archive_view.back() == ' ') + path_to_archive_view.remove_suffix(1); + + if (path_to_archive_view.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path to archive is empty"); + + path_to_archive = path_to_archive_view; + + std::string_view filename_view = std::string_view{source}.substr(pos + 2); + while (filename_view.front() == ' ') + filename_view.remove_prefix(1); + + if (filename_view.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filename is empty"); + + filename = filename_view; +} + } diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index ed50ae73e51..b5c23d87bba 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -22,8 +22,8 @@ public: const ColumnsDescription & columns; const ConstraintsDescription & constraints; const String & comment; - const std::string rename_after_processing; + std::string path_to_archive; }; /// From file descriptor @@ -75,6 +75,8 @@ public: /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. bool supportsSubsetOfColumns() const override; + bool supportsSubcolumns() const override { return true; } + bool prefersLargeBlocks() const override; bool parallelizeOutputAfterReading(ContextPtr context) const override; @@ -88,10 +90,13 @@ public: const std::vector & paths, const String & compression_method, const std::optional & format_settings, - ContextPtr context); + ContextPtr context, + const std::vector & paths_to_archive = {"auto"}); static SchemaCache & getSchemaCache(const ContextPtr & context); + static void parseFileSource(String source, String & filename, String & path_to_archive); + protected: friend class StorageFileSource; friend class StorageFileSink; @@ -121,6 +126,7 @@ private: std::string base_path; std::vector paths; + std::vector paths_to_archive; bool is_db_table = true; /// Table is stored in real database, not user's file bool use_table_fd = false; /// Use table_fd instead of path diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index 293beca9c24..8d9b8f5d8d0 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -360,7 +360,7 @@ ColumnPtr fillColumnWithRandomData( auto column = ColumnUUID::create(); column->getData().resize(limit); /// NOTE This is slightly incorrect as random UUIDs should have fixed version 4. - fillBufferWithRandomData(reinterpret_cast(column->getData().data()), limit, sizeof(UUID), rng); + fillBufferWithRandomData(reinterpret_cast(column->getData().data()), limit, sizeof(UUID), rng, true); return column; } case TypeIndex::Int8: diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index afe75349864..af285a953dc 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -239,7 +239,7 @@ bool StorageInMemoryMetadata::hasAnyGroupByTTL() const ColumnDependencies StorageInMemoryMetadata::getColumnDependencies( const NameSet & updated_columns, bool include_ttl_target, - const std::function & has_indice_or_projection) const + const HasDependencyCallback & has_dependency) const { if (updated_columns.empty()) return {}; @@ -268,13 +268,13 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies( for (const auto & index : getSecondaryIndices()) { - if (has_indice_or_projection("skp_idx_" + index.name + ".idx") || has_indice_or_projection("skp_idx_" + index.name + ".idx2")) + if (has_dependency(index.name, ColumnDependency::SKIP_INDEX)) add_dependent_columns(index.expression, indices_columns); } for (const auto & projection : getProjections()) { - if (has_indice_or_projection(projection.getDirectoryName())) + if (has_dependency(projection.name, ColumnDependency::PROJECTION)) add_dependent_columns(&projection, projections_columns); } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 4ed7eb8bf29..30b2b303492 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -147,12 +147,14 @@ struct StorageInMemoryMetadata TTLDescriptions getGroupByTTLs() const; bool hasAnyGroupByTTL() const; + using HasDependencyCallback = std::function; + /// Returns columns, which will be needed to calculate dependencies (skip indices, projections, /// TTL expressions) if we update @updated_columns set of columns. ColumnDependencies getColumnDependencies( const NameSet & updated_columns, bool include_ttl_target, - const std::function & has_indice_or_projection) const; + const HasDependencyCallback & has_dependency) const; /// Block with ordinary + materialized columns. Block getSampleBlock() const; diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 640706aae17..121d859a3f2 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -177,7 +178,7 @@ void StorageJoin::mutate(const MutationCommands & commands, ContextPtr context) } } -HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr analyzed_join, ContextPtr context) const +HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr analyzed_join, ContextPtr context, const Names & required_columns_names) const { auto metadata_snapshot = getInMemoryMetadataPtr(); if (!analyzed_join->sameStrictnessAndKind(strictness, kind)) @@ -237,8 +238,10 @@ HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr analyzed_join, /// Qualifies will be added by join implementation (TableJoin contains a rename mapping). analyzed_join->setRightKeys(key_names); analyzed_join->setLeftKeys(left_key_names_resorted); - - HashJoinPtr join_clone = std::make_shared(analyzed_join, getRightSampleBlock()); + Block right_sample_block; + for (const auto & name : required_columns_names) + right_sample_block.insert(getRightSampleBlock().getByName(name)); + HashJoinPtr join_clone = std::make_shared(analyzed_join, right_sample_block); RWLockImpl::LockHolder holder = tryLockTimedWithContext(rwlock, RWLockImpl::Read, context); join_clone->setLock(holder); diff --git a/src/Storages/StorageJoin.h b/src/Storages/StorageJoin.h index 5559b5d1ec8..4626d744a38 100644 --- a/src/Storages/StorageJoin.h +++ b/src/Storages/StorageJoin.h @@ -49,7 +49,7 @@ public: /// Return instance of HashJoin holding lock that protects from insertions to StorageJoin. /// HashJoin relies on structure of hash table that's why we need to return it with locked mutex. - HashJoinPtr getJoinLocked(std::shared_ptr analyzed_join, ContextPtr context) const; + HashJoinPtr getJoinLocked(std::shared_ptr analyzed_join, ContextPtr context, const Names & required_columns_names) const; /// Get result type for function "joinGet(OrNull)" DataTypePtr joinGetCheckAndGetReturnType(const DataTypes & data_types, const String & column_name, bool or_null) const; diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index f2b1b907832..f98728c012e 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -419,7 +419,7 @@ StorageKeeperMap::StorageKeeperMap( } else if (code != Coordination::Error::ZOK) { - throw Coordination::Exception(code, dropped_lock_path); + throw Coordination::Exception::fromPath(code, dropped_lock_path); } else { @@ -918,7 +918,7 @@ void StorageKeeperMap::mutate(const MutationCommands & commands, ContextPtr loca { auto code = client->tryRemove(delete_request->getPath()); if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNONODE) - throw zkutil::KeeperException(code, delete_request->getPath()); + throw zkutil::KeeperException::fromPath(code, delete_request->getPath()); } } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index d8065b8bb3c..90cf55e53b2 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -777,7 +777,7 @@ void StorageLog::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr num_marks_saved = 0; total_rows = 0; total_bytes = 0; - getContext()->dropMMappedFileCache(); + getContext()->clearMMappedFileCache(); } @@ -946,9 +946,10 @@ void StorageLog::backupData(BackupEntriesCollector & backup_entries_collector, c fs::path data_path_in_backup_fs = data_path_in_backup; auto temp_dir_owner = std::make_shared(disk, "tmp/"); - fs::path temp_dir = temp_dir_owner->getPath(); + fs::path temp_dir = temp_dir_owner->getRelativePath(); disk->createDirectories(temp_dir); + const auto & read_settings = backup_entries_collector.getReadSettings(); bool copy_encrypted = !backup_entries_collector.getBackupSettings().decrypt_files_from_encrypted_disks; /// *.bin @@ -980,7 +981,7 @@ void StorageLog::backupData(BackupEntriesCollector & backup_entries_collector, c /// sizes.json String files_info_path = file_checker.getPath(); backup_entries_collector.addBackupEntry( - data_path_in_backup_fs / fileName(files_info_path), std::make_unique(disk, files_info_path, copy_encrypted)); + data_path_in_backup_fs / fileName(files_info_path), std::make_unique(disk, files_info_path, read_settings, copy_encrypted)); /// columns.txt backup_entries_collector.addBackupEntry( diff --git a/src/Storages/StorageMaterializedMySQL.h b/src/Storages/StorageMaterializedMySQL.h index e6fcbc203e6..2cd589bfd75 100644 --- a/src/Storages/StorageMaterializedMySQL.h +++ b/src/Storages/StorageMaterializedMySQL.h @@ -43,6 +43,11 @@ public: bool supportsTrivialCountOptimization() const override { return false; } + IndexSizeByName getSecondaryIndexSizes() const override + { + return nested_storage->getSecondaryIndexSizes(); + } + private: [[noreturn]] static void throwNotAllowed() { diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 16d724d54d8..7354dd56552 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -228,10 +228,13 @@ void StorageMaterializedView::dropInnerTableIfAny(bool sync, ContextPtr local_co { /// We will use `sync` argument wneh this function is called from a DROP query /// and will ignore database_atomic_wait_for_drop_and_detach_synchronously when it's called from drop task. - /// See the comment in StorageMaterializedView::drop + /// See the comment in StorageMaterializedView::drop. + /// DDL queries with StorageMaterializedView are fundamentally broken. + /// Best-effort to make them work: the inner table name is almost always less than the MV name (so it's safe to lock DDLGuard) + bool may_lock_ddl_guard = getStorageID().getQualifiedName() < target_table_id.getQualifiedName(); if (has_inner_table && tryGetTargetTable()) InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Drop, getContext(), local_context, target_table_id, - sync, /* ignore_sync_setting */ true); + sync, /* ignore_sync_setting */ true, may_lock_ddl_guard); } void StorageMaterializedView::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 6ed0583bd44..2ef1d8d3183 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -277,11 +277,13 @@ namespace const std::shared_ptr blocks_, const String & data_path_in_backup, const DiskPtr & temp_disk_, + const ReadSettings & read_settings_, UInt64 max_compress_block_size_) : context(context_) , metadata_snapshot(metadata_snapshot_) , blocks(blocks_) , temp_disk(temp_disk_) + , read_settings(read_settings_) , max_compress_block_size(max_compress_block_size_) { fs::path data_path_in_backup_fs = data_path_in_backup; @@ -314,7 +316,7 @@ namespace backup_entries.resize(file_paths.size()); temp_dir_owner.emplace(temp_disk); - fs::path temp_dir = temp_dir_owner->getPath(); + fs::path temp_dir = temp_dir_owner->getRelativePath(); temp_disk->createDirectories(temp_dir); /// Writing data.bin @@ -371,7 +373,7 @@ namespace file_checker.update(temp_dir / fs::path{file_paths[i]}.filename()); } file_checker.save(); - backup_entries[sizes_json_pos] = {file_paths[sizes_json_pos], std::make_shared(temp_disk, sizes_json_path)}; + backup_entries[sizes_json_pos] = {file_paths[sizes_json_pos], std::make_shared(temp_disk, sizes_json_path, read_settings)}; } /// We don't need to keep `blocks` any longer. @@ -386,6 +388,7 @@ namespace std::shared_ptr blocks; DiskPtr temp_disk; std::optional temp_dir_owner; + ReadSettings read_settings; UInt64 max_compress_block_size; Strings file_paths; size_t data_bin_pos, index_mrk_pos, columns_txt_pos, count_txt_pos, sizes_json_pos; @@ -395,13 +398,16 @@ namespace void StorageMemory::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { auto temp_disk = backup_entries_collector.getContext()->getGlobalTemporaryVolume()->getDisk(0); + const auto & read_settings = backup_entries_collector.getReadSettings(); auto max_compress_block_size = backup_entries_collector.getContext()->getSettingsRef().max_compress_block_size; + backup_entries_collector.addBackupEntries(std::make_shared( backup_entries_collector.getContext(), getInMemoryMetadataPtr(), data.get(), data_path_in_backup, temp_disk, + read_settings, max_compress_block_size)->getBackupEntries()); } @@ -453,10 +459,10 @@ void StorageMemory::restoreDataImpl(const BackupPtr & backup, const String & dat if (!dynamic_cast(in.get())) { temp_data_file.emplace(temporary_disk); - auto out = std::make_unique(temp_data_file->getPath()); + auto out = std::make_unique(temp_data_file->getAbsolutePath()); copyData(*in, *out); out.reset(); - in = createReadBufferFromFileBase(temp_data_file->getPath(), {}); + in = createReadBufferFromFileBase(temp_data_file->getAbsolutePath(), {}); } std::unique_ptr in_from_file{static_cast(in.release())}; CompressedReadBufferFromFile compressed_in{std::move(in_from_file)}; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 32e100edc4d..9d7f6903b46 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -220,7 +220,8 @@ void StorageMergeTree::read( local_context, query_info.query, table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr); - auto cluster = local_context->getCluster(local_context->getSettingsRef().cluster_for_parallel_replicas); + String cluster_for_parallel_replicas = local_context->getSettingsRef().cluster_for_parallel_replicas; + auto cluster = local_context->getCluster(cluster_for_parallel_replicas); Block header; @@ -919,7 +920,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if (getCurrentMutationVersion(left, lock) != getCurrentMutationVersion(right, lock)) { - disable_reason = "Some parts have differ mmutatuon version"; + disable_reason = "Some parts have different mutation version"; return false; } @@ -1154,16 +1155,25 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( } TransactionID first_mutation_tid = mutations_begin_it->second.tid; - MergeTreeTransactionPtr txn = tryGetTransactionForMutation(mutations_begin_it->second, log); - assert(txn || first_mutation_tid.isPrehistoric()); + MergeTreeTransactionPtr txn; - if (txn) + if (!first_mutation_tid.isPrehistoric()) { + /// Mutate visible parts only /// NOTE Do not mutate visible parts in Outdated state, because it does not make sense: /// mutation will fail anyway due to serialization error. - if (!part->version.isVisible(*txn)) + + /// It's possible that both mutation and transaction are already finished, + /// because that part should not be mutated because it was not visible for that transaction. + if (!part->version.isVisible(first_mutation_tid.start_csn, first_mutation_tid)) continue; + + txn = tryGetTransactionForMutation(mutations_begin_it->second, log); + if (!txn) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find transaction {} that has started mutation {} " + "that is going to be applied to part {}", + first_mutation_tid, mutations_begin_it->second.file_name, part->name); } auto commands = std::make_shared(); @@ -1370,8 +1380,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign cleared_count += clearOldWriteAheadLogs(); cleared_count += clearOldMutations(); cleared_count += clearEmptyParts(); - if (getSettings()->merge_tree_enable_clear_old_broken_detached) - cleared_count += clearOldBrokenPartsFromDetachedDirectory(); + cleared_count += clearOldBrokenPartsFromDetachedDirectory(); return cleared_count; /// TODO maybe take into account number of cleared objects when calculating backoff }, common_assignee_trigger, getStorageID()), /* need_trigger */ false); @@ -1644,11 +1653,7 @@ struct FutureNewEmptyPart MergeTreePartition partition; std::string part_name; - scope_guard tmp_dir_guard; - StorageMergeTree::MutableDataPartPtr data_part; - - std::string getDirName() const { return StorageMergeTree::EMPTY_PART_TMP_PREFIX + part_name; } }; using FutureNewEmptyParts = std::vector; @@ -1679,19 +1684,19 @@ FutureNewEmptyParts initCoverageWithNewEmptyParts(const DataPartsVector & old_pa return future_parts; } -StorageMergeTree::MutableDataPartsVector createEmptyDataParts(MergeTreeData & data, FutureNewEmptyParts & future_parts, const MergeTreeTransactionPtr & txn) +std::pair> createEmptyDataParts( + MergeTreeData & data, FutureNewEmptyParts & future_parts, const MergeTreeTransactionPtr & txn) { - StorageMergeTree::MutableDataPartsVector data_parts; + std::pair> data_parts; for (auto & part: future_parts) - data_parts.push_back(data.createEmptyPart(part.part_info, part.partition, part.part_name, txn)); + { + auto [new_data_part, tmp_dir_holder] = data.createEmptyPart(part.part_info, part.partition, part.part_name, txn); + data_parts.first.emplace_back(std::move(new_data_part)); + data_parts.second.emplace_back(std::move(tmp_dir_holder)); + } return data_parts; } -void captureTmpDirectoryHolders(MergeTreeData & data, FutureNewEmptyParts & future_parts) -{ - for (auto & part : future_parts) - part.tmp_dir_guard = data.getTemporaryPartDirectoryHolder(part.getDirName()); -} void StorageMergeTree::renameAndCommitEmptyParts(MutableDataPartsVector & new_parts, Transaction & transaction) { @@ -1729,15 +1734,24 @@ void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont { /// Asks to complete merges and does not allow them to start. /// This protects against "revival" of data for a removed partition after completion of merge. - auto merge_blocker = stopMergesAndWait(); waitForOutdatedPartsToBeLoaded(); + auto merge_blocker = stopMergesAndWait(); Stopwatch watch; ProfileEventsScope profile_events_scope; auto txn = query_context->getCurrentTransaction(); - MergeTreeData::Transaction transaction(*this, txn.get()); + if (txn) { + auto data_parts_lock = lockParts(); + auto parts_to_remove = getVisibleDataPartsVectorUnlocked(query_context, data_parts_lock); + removePartsFromWorkingSet(txn.get(), parts_to_remove, true, data_parts_lock); + LOG_INFO(log, "Removed {} parts: [{}]", parts_to_remove.size(), fmt::join(getPartsNames(parts_to_remove), ", ")); + } + else + { + MergeTreeData::Transaction transaction(*this, txn.get()); + auto operation_data_parts_lock = lockOperationsWithParts(); auto parts = getVisibleDataPartsVector(query_context); @@ -1749,9 +1763,7 @@ void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNames(parts), ", "), transaction.getTID()); - captureTmpDirectoryHolders(*this, future_parts); - - auto new_data_parts = createEmptyDataParts(*this, future_parts, txn); + auto [new_data_parts, tmp_dir_holders] = createEmptyDataParts(*this, future_parts, txn); renameAndCommitEmptyParts(new_data_parts, transaction); PartLog::addNewParts(query_context, PartLog::createPartLogEntries(new_data_parts, watch.elapsed(), profile_events_scope.getSnapshot())); @@ -1781,8 +1793,15 @@ void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPt /// It's important to create it outside of lock scope because /// otherwise it can lock parts in destructor and deadlock is possible. auto txn = query_context->getCurrentTransaction(); - MergeTreeData::Transaction transaction(*this, txn.get()); + if (txn) { + if (auto part = outdatePart(txn.get(), part_name, /*force=*/ true)) + dropPartsImpl({part}, detach); + } + else + { + MergeTreeData::Transaction transaction(*this, txn.get()); + auto operation_data_parts_lock = lockOperationsWithParts(); auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Active}); @@ -1792,8 +1811,10 @@ void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPt if (detach) { auto metadata_snapshot = getInMemoryMetadataPtr(); - LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory()); - part->makeCloneInDetached("", metadata_snapshot); + String part_dir = part->getDataPartStorage().getPartDirectory(); + LOG_INFO(log, "Detaching {}", part_dir); + auto holder = getTemporaryPartDirectoryHolder(String(DETACHED_DIR_NAME) + "/" + part_dir); + part->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {}); } { @@ -1803,9 +1824,7 @@ void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPt fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNames({part}), ", "), transaction.getTID()); - captureTmpDirectoryHolders(*this, future_parts); - - auto new_data_parts = createEmptyDataParts(*this, future_parts, txn); + auto [new_data_parts, tmp_dir_holders] = createEmptyDataParts(*this, future_parts, txn); renameAndCommitEmptyParts(new_data_parts, transaction); PartLog::addNewParts(query_context, PartLog::createPartLogEntries(new_data_parts, watch.elapsed(), profile_events_scope.getSnapshot())); @@ -1839,8 +1858,26 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont /// It's important to create it outside of lock scope because /// otherwise it can lock parts in destructor and deadlock is possible. auto txn = query_context->getCurrentTransaction(); - MergeTreeData::Transaction transaction(*this, txn.get()); + if (txn) { + DataPartsVector parts_to_remove; + { + auto data_parts_lock = lockParts(); + if (partition_ast && partition_ast->all) + parts_to_remove = getVisibleDataPartsVectorUnlocked(query_context, data_parts_lock); + else + { + String partition_id = getPartitionIDFromQuery(partition, query_context, &data_parts_lock); + parts_to_remove = getVisibleDataPartsVectorInPartition(query_context, partition_id, data_parts_lock); + } + removePartsFromWorkingSet(txn.get(), parts_to_remove, true, data_parts_lock); + } + dropPartsImpl(std::move(parts_to_remove), detach); + } + else + { + MergeTreeData::Transaction transaction(*this, txn.get()); + auto operation_data_parts_lock = lockOperationsWithParts(); DataPartsVector parts; @@ -1855,12 +1892,16 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont } if (detach) + { for (const auto & part : parts) { auto metadata_snapshot = getInMemoryMetadataPtr(); - LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory()); - part->makeCloneInDetached("", metadata_snapshot); + String part_dir = part->getDataPartStorage().getPartDirectory(); + LOG_INFO(log, "Detaching {}", part_dir); + auto holder = getTemporaryPartDirectoryHolder(String(DETACHED_DIR_NAME) + "/" + part_dir); + part->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {}); } + } auto future_parts = initCoverageWithNewEmptyParts(parts); @@ -1869,9 +1910,8 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNames(parts), ", "), transaction.getTID()); - captureTmpDirectoryHolders(*this, future_parts); - auto new_data_parts = createEmptyDataParts(*this, future_parts, txn); + auto [new_data_parts, tmp_dir_holders] = createEmptyDataParts(*this, future_parts, txn); renameAndCommitEmptyParts(new_data_parts, transaction); PartLog::addNewParts(query_context, PartLog::createPartLogEntries(new_data_parts, watch.elapsed(), profile_events_scope.getSnapshot())); @@ -1889,6 +1929,35 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont clearEmptyParts(); } +void StorageMergeTree::dropPartsImpl(DataPartsVector && parts_to_remove, bool detach) +{ + auto metadata_snapshot = getInMemoryMetadataPtr(); + + if (detach) + { + /// If DETACH clone parts to detached/ directory + /// NOTE: no race with background cleanup until we hold pointers to parts + for (const auto & part : parts_to_remove) + { + String part_dir = part->getDataPartStorage().getPartDirectory(); + LOG_INFO(log, "Detaching {}", part_dir); + auto holder = getTemporaryPartDirectoryHolder(String(DETACHED_DIR_NAME) + "/" + part_dir); + part->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {}); + } + } + + if (deduplication_log) + { + for (const auto & part : parts_to_remove) + deduplication_log->dropPart(part->info); + } + + if (detach) + LOG_INFO(log, "Detached {} parts: [{}]", parts_to_remove.size(), fmt::join(getPartsNames(parts_to_remove), ", ")); + else + LOG_INFO(log, "Removed {} parts: [{}]", parts_to_remove.size(), fmt::join(getPartsNames(parts_to_remove), ", ")); +} + PartitionCommandsResultInfo StorageMergeTree::attachPartition( const ASTPtr & partition, const StorageMetadataPtr & /* metadata_snapshot */, bool attach_part, ContextPtr local_context) @@ -1929,7 +1998,7 @@ PartitionCommandsResultInfo StorageMergeTree::attachPartition( } /// New parts with other data may appear in place of deleted parts. - local_context->dropCaches(); + local_context->clearCaches(); return results; } @@ -2181,6 +2250,7 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_ void StorageMergeTree::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) { const auto & backup_settings = backup_entries_collector.getBackupSettings(); + const auto & read_settings = backup_entries_collector.getReadSettings(); auto local_context = backup_entries_collector.getContext(); DataPartsVector data_parts; @@ -2193,7 +2263,7 @@ void StorageMergeTree::backupData(BackupEntriesCollector & backup_entries_collec for (const auto & data_part : data_parts) min_data_version = std::min(min_data_version, data_part->info.getDataVersion() + 1); - auto parts_backup_entries = backupParts(data_parts, data_path_in_backup, backup_settings, local_context); + auto parts_backup_entries = backupParts(data_parts, data_path_in_backup, backup_settings, read_settings, local_context); for (auto & part_backup_entries : parts_backup_entries) backup_entries_collector.addBackupEntries(std::move(part_backup_entries.backup_entries)); diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 936ba1b7f18..c77e5140d75 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -237,6 +237,7 @@ private: void dropPartNoWaitNoThrow(const String & part_name) override; void dropPart(const String & part_name, bool detach, ContextPtr context) override; void dropPartition(const ASTPtr & partition, bool detach, ContextPtr context) override; + void dropPartsImpl(DataPartsVector && parts_to_remove, bool detach); PartitionCommandsResultInfo attachPartition(const ASTPtr & partition, const StorageMetadataPtr & metadata_snapshot, bool part, ContextPtr context) override; void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, ContextPtr context) override; diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index b0a220eb1d2..76a439eabaf 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -104,6 +104,7 @@ Pipe StorageMySQL::read( column_names_, storage_snapshot->metadata->getColumns().getOrdinary(), IdentifierQuotingStyle::BackticksMySQL, + LiteralEscapingStyle::Regular, remote_database_name, remote_table_name, context_); diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index 3551ee36819..7961c44e844 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -122,7 +122,7 @@ Pipe StoragePostgreSQL::read( query_info_, column_names_, storage_snapshot->metadata->getColumns().getOrdinary(), - IdentifierQuotingStyle::DoubleQuotes, remote_table_schema, remote_table_name, context_); + IdentifierQuotingStyle::DoubleQuotes, LiteralEscapingStyle::PostgreSQL, remote_table_schema, remote_table_name, context_); LOG_TRACE(log, "Query: {}", query); Block sample_block; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 2c2cea0af2b..76a2ad9883c 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -197,6 +197,7 @@ namespace ActionLocks extern const StorageActionBlockType ReplicationQueue; extern const StorageActionBlockType PartsTTLMerge; extern const StorageActionBlockType PartsMove; + extern const StorageActionBlockType PullReplicationLog; } @@ -337,6 +338,8 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( mutations_updating_task->deactivate(); + mutations_watch_callback = std::make_shared(mutations_updating_task->getWatchCallback()); + merge_selecting_task = getContext()->getSchedulePool().createTask( getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::mergeSelectingTask)", [this] { mergeSelectingTask(); }); @@ -712,7 +715,7 @@ void StorageReplicatedMergeTree::createNewZooKeeperNodes() { auto res = future.get(); if (res.error != Coordination::Error::ZOK && res.error != Coordination::Error::ZNODEEXISTS) - throw Coordination::Exception(fmt::format("Failed to create new nodes at {}", zookeeper_path), res.error); + throw Coordination::Exception(res.error, "Failed to create new nodes at {}", zookeeper_path); } } @@ -748,7 +751,7 @@ bool StorageReplicatedMergeTree::createTableIfNotExists(const StorageMetadataPtr } else if (code != Coordination::Error::ZOK) { - throw Coordination::Exception(code, drop_lock_path); + throw Coordination::Exception::fromPath(code, drop_lock_path); } else { @@ -2096,8 +2099,10 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry) { if (auto part_to_detach = part.getPartIfItWasActive()) { - LOG_INFO(log, "Detaching {}", part_to_detach->getDataPartStorage().getPartDirectory()); - part_to_detach->makeCloneInDetached("", metadata_snapshot); + String part_dir = part_to_detach->getDataPartStorage().getPartDirectory(); + LOG_INFO(log, "Detaching {}", part_dir); + auto holder = getTemporaryPartDirectoryHolder(String(DETACHED_DIR_NAME) + "/" + part_dir); + part_to_detach->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {}); } } } @@ -2827,7 +2832,7 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo for (const auto & part : parts_to_remove_from_working_set) { LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory()); - part->makeCloneInDetached("clone", metadata_snapshot); + part->makeCloneInDetached("clone", metadata_snapshot, /*disk_transaction*/ {}); } } @@ -3214,7 +3219,7 @@ void StorageReplicatedMergeTree::mutationsUpdatingTask() { try { - queue.updateMutations(getZooKeeper(), mutations_updating_task->getWatchCallback()); + queue.updateMutations(getZooKeeper(), mutations_watch_callback); } catch (const Coordination::Exception & e) { @@ -3793,12 +3798,12 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n chassert(!broken_part); chassert(!storage_init); part->was_removed_as_broken = true; - part->makeCloneInDetached("broken", getInMemoryMetadataPtr()); + part->makeCloneInDetached("broken", getInMemoryMetadataPtr(), /*disk_transaction*/ {}); broken_part = part; } else { - part->makeCloneInDetached("covered-by-broken", getInMemoryMetadataPtr()); + part->makeCloneInDetached("covered-by-broken", getInMemoryMetadataPtr(), /*disk_transaction*/ {}); } detached_parts.push_back(part->name); } @@ -4306,7 +4311,7 @@ void StorageReplicatedMergeTree::updateQuorum(const String & part_name, bool is_ continue; } else - throw Coordination::Exception(code, quorum_status_path); + throw Coordination::Exception::fromPath(code, quorum_status_path); } else { @@ -4330,7 +4335,7 @@ void StorageReplicatedMergeTree::updateQuorum(const String & part_name, bool is_ continue; } else - throw Coordination::Exception(code, quorum_status_path); + throw Coordination::Exception::fromPath(code, quorum_status_path); } } } @@ -4340,7 +4345,7 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id) { auto zookeeper = getZooKeeper(); - LOG_DEBUG(log, "Cleaning up last parent node for partition {}", partition_id); + LOG_DEBUG(log, "Cleaning up last part node for partition {}", partition_id); /// The name of the previous part for which the quorum was reached. const String quorum_last_part_path = fs::path(zookeeper_path) / "quorum" / "last_part"; @@ -4361,6 +4366,7 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id) if (!parts_with_quorum.added_parts.contains(partition_id)) { /// There is no information about interested part. + LOG_TEST(log, "There is no information about the partition"); break; } @@ -4378,6 +4384,7 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id) else if (code == Coordination::Error::ZNONODE) { /// Node is deleted. It is impossible, but it is Ok. + LOG_WARNING(log, "The last part node {} was deleted", quorum_last_part_path); break; } else if (code == Coordination::Error::ZBADVERSION) @@ -4386,7 +4393,7 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id) continue; } else - throw Coordination::Exception(code, quorum_last_part_path); + throw Coordination::Exception::fromPath(code, quorum_last_part_path); } } @@ -5150,7 +5157,9 @@ void StorageReplicatedMergeTree::readParallelReplicasImpl( { auto table_id = getStorageID(); - auto parallel_replicas_cluster = local_context->getCluster(local_context->getSettingsRef().cluster_for_parallel_replicas); + auto scalars = local_context->hasQueryContext() ? local_context->getQueryContext()->getScalars() : Scalars{}; + String cluster_for_parallel_replicas = local_context->getSettingsRef().cluster_for_parallel_replicas; + auto parallel_replicas_cluster = local_context->getCluster(cluster_for_parallel_replicas); ASTPtr modified_query_ast; Block header; @@ -5885,7 +5894,7 @@ void StorageReplicatedMergeTree::alter( } else { - throw Coordination::Exception("Alter cannot be assigned because of Zookeeper error", rc); + throw Coordination::Exception::fromMessage(rc, "Alter cannot be assigned because of Zookeeper error"); } } @@ -6130,8 +6139,9 @@ PartitionCommandsResultInfo StorageReplicatedMergeTree::attachPartition( MutableDataPartsVector loaded_parts = tryLoadPartsToAttach(partition, attach_part, query_context, renamed_parts); /// TODO Allow to use quorum here. - ReplicatedMergeTreeSink output(*this, metadata_snapshot, 0, 0, 0, false, false, false, query_context, - /*is_attach*/true); + ReplicatedMergeTreeSink output(*this, metadata_snapshot, /* quorum */ 0, /* quorum_timeout_ms */ 0, /* max_parts_per_block */ 0, + /* quorum_parallel */ false, query_context->getSettingsRef().insert_deduplicate, + /* majority_quorum */ false, query_context, /*is_attach*/true); for (size_t i = 0; i < loaded_parts.size(); ++i) { @@ -7046,7 +7056,7 @@ void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, Conte continue; } else - throw Coordination::Exception("Unable to create a mutation znode", rc); + throw Coordination::Exception::fromMessage(rc, "Unable to create a mutation znode"); } merge_selecting_task->schedule(); @@ -8169,6 +8179,9 @@ ActionLock StorageReplicatedMergeTree::getActionLock(StorageActionBlockType acti if (action_type == ActionLocks::PartsMove) return parts_mover.moves_blocker.cancel(); + if (action_type == ActionLocks::PullReplicationLog) + return queue.pull_log_blocker.cancel(); + return {}; } @@ -8666,7 +8679,7 @@ void StorageReplicatedMergeTree::createTableSharedID() const } else if (code != Coordination::Error::ZOK) { - throw zkutil::KeeperException(code, zookeeper_table_id_path); + throw zkutil::KeeperException::fromPath(code, zookeeper_table_id_path); } } @@ -9109,7 +9122,7 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( } else { - throw zkutil::KeeperException(ec, zookeeper_part_replica_node); + throw zkutil::KeeperException::fromPath(ec, zookeeper_part_replica_node); } } @@ -9144,7 +9157,7 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( } else { - throw zkutil::KeeperException(error_code, zookeeper_part_uniq_node); + throw zkutil::KeeperException::fromPath(error_code, zookeeper_part_uniq_node); } @@ -9176,7 +9189,7 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( } else { - throw zkutil::KeeperException(error_code, zookeeper_part_uniq_node); + throw zkutil::KeeperException::fromPath(error_code, zookeeper_part_uniq_node); } } else @@ -9503,7 +9516,7 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP } } - MergeTreeData::MutableDataPartPtr new_data_part = createEmptyPart(new_part_info, partition, lost_part_name, NO_TRANSACTION_PTR); + auto [new_data_part, tmp_dir_holder] = createEmptyPart(new_part_info, partition, lost_part_name, NO_TRANSACTION_PTR); new_data_part->setName(lost_part_name); try @@ -9820,6 +9833,7 @@ void StorageReplicatedMergeTree::backupData( /// because we need to coordinate them with other replicas (other replicas can have better parts). const auto & backup_settings = backup_entries_collector.getBackupSettings(); + const auto & read_settings = backup_entries_collector.getReadSettings(); auto local_context = backup_entries_collector.getContext(); DataPartsVector data_parts; @@ -9828,7 +9842,7 @@ void StorageReplicatedMergeTree::backupData( else data_parts = getVisibleDataPartsVector(local_context); - auto parts_backup_entries = backupParts(data_parts, /* data_path_in_backup */ "", backup_settings, local_context); + auto parts_backup_entries = backupParts(data_parts, /* data_path_in_backup */ "", backup_settings, read_settings, local_context); auto coordination = backup_entries_collector.getBackupCoordination(); String shared_id = getTableSharedID(); diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 78ef39f032f..8e9eed678c8 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -497,6 +497,7 @@ private: BackgroundSchedulePool::TaskHolder queue_updating_task; BackgroundSchedulePool::TaskHolder mutations_updating_task; + Coordination::WatchCallbackPtr mutations_watch_callback; /// A task that selects parts to merge. BackgroundSchedulePool::TaskHolder merge_selecting_task; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index b52150250b8..9c8a3860807 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -41,6 +41,7 @@ #include #include +#include #include #include @@ -245,7 +246,7 @@ private: if (!outcome.IsSuccess()) { - throw Exception(ErrorCodes::S3_ERROR, "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", + throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", quoteString(request.GetBucket()), quoteString(request.GetPrefix()), backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage())); } @@ -513,22 +514,12 @@ StorageS3Source::KeyWithInfo StorageS3Source::KeysIterator::next() return pimpl->next(); } -Block StorageS3Source::getHeader(Block sample_block, const std::vector & requested_virtual_columns) -{ - for (const auto & virtual_column : requested_virtual_columns) - sample_block.insert({virtual_column.type->createColumn(), virtual_column.type, virtual_column.name}); - - return sample_block; -} - StorageS3Source::StorageS3Source( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, const String & format_, String name_, - const Block & sample_block_, ContextPtr context_, std::optional format_settings_, - const ColumnsDescription & columns_, UInt64 max_block_size_, const S3Settings::RequestSettings & request_settings_, String compression_hint_, @@ -536,21 +527,24 @@ StorageS3Source::StorageS3Source( const String & bucket_, const String & version_id_, std::shared_ptr file_iterator_, - const size_t download_thread_num_) - : ISource(getHeader(sample_block_, requested_virtual_columns_), false) + const size_t download_thread_num_, + std::optional query_info_) + : ISource(info.source_header, false) , WithContext(context_) , name(std::move(name_)) , bucket(bucket_) , version_id(version_id_) , format(format_) - , columns_desc(columns_) + , columns_desc(info.columns_description) + , requested_columns(info.requested_columns) , max_block_size(max_block_size_) , request_settings(request_settings_) , compression_hint(std::move(compression_hint_)) , client(client_) - , sample_block(sample_block_) + , sample_block(info.format_header) , format_settings(format_settings_) - , requested_virtual_columns(requested_virtual_columns_) + , query_info(std::move(query_info_)) + , requested_virtual_columns(info.requested_virtual_columns) , file_iterator(file_iterator_) , download_thread_num(download_thread_num_) , create_reader_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, 1) @@ -582,6 +576,8 @@ StorageS3Source::ReaderHolder StorageS3Source::createReader() format, *read_buf, sample_block, getContext(), max_block_size, format_settings, std::nullopt, std::nullopt, /* is_remote_fs */ true, compression_method); + if (query_info.has_value()) + input_format->setQueryInfo(query_info.value(), getContext()); QueryPipelineBuilder builder; builder.init(Pipe(input_format)); @@ -593,10 +589,17 @@ StorageS3Source::ReaderHolder StorageS3Source::createReader() { return std::make_shared(header, columns_desc, *input_format, getContext()); }); } + /// Add ExtractColumnsTransform to extract requested columns/subcolumns + /// from chunk read by IInputFormat. + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, requested_columns); + }); + auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); auto current_reader = std::make_unique(*pipeline); - return ReaderHolder{fs::path(bucket) / key_with_info.key, std::move(read_buf), std::move(input_format), std::move(pipeline), std::move(current_reader)}; + return ReaderHolder{key_with_info.key, bucket, std::move(read_buf), std::move(input_format), std::move(pipeline), std::move(current_reader)}; } std::future StorageS3Source::createReaderAsync() @@ -699,7 +702,6 @@ Chunk StorageS3Source::generate() progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); const auto & file_path = reader.getPath(); - for (const auto & virtual_column : requested_virtual_columns) { if (virtual_column.name == "_path") @@ -997,11 +999,6 @@ std::shared_ptr StorageS3::createFileIterator( } } -bool StorageS3::supportsSubcolumns() const -{ - return FormatFactory::instance().checkIfFormatSupportsSubcolumns(configuration.format); -} - bool StorageS3::supportsSubsetOfColumns() const { return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format); @@ -1033,52 +1030,20 @@ Pipe StorageS3::read( Pipes pipes; - std::unordered_set column_names_set(column_names.begin(), column_names.end()); - std::vector requested_virtual_columns; - - for (const auto & virtual_column : getVirtuals()) - { - if (column_names_set.contains(virtual_column.name)) - requested_virtual_columns.push_back(virtual_column); - } - std::shared_ptr iterator_wrapper = createFileIterator( query_configuration, distributed_processing, local_context, query_info.query, virtual_block, nullptr, local_context->getFileProgressCallback()); - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - auto fetch_columns = column_names; - const auto & virtuals = getVirtuals(); - std::erase_if( - fetch_columns, - [&](const String & col) - { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); }); - - if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); - - columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - block_for_format = storage_snapshot->metadata->getSampleBlock(); - } + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(), getVirtuals()); const size_t max_download_threads = local_context->getSettingsRef().max_download_threads; for (size_t i = 0; i < num_streams; ++i) { pipes.emplace_back(std::make_shared( - requested_virtual_columns, + read_from_format_info, query_configuration.format, getName(), - block_for_format, local_context, format_settings, - columns_description, max_block_size, query_configuration.request_settings, query_configuration.compression_method, @@ -1086,7 +1051,8 @@ Pipe StorageS3::read( query_configuration.url.bucket, query_configuration.url.version_id, iterator_wrapper, - max_download_threads)); + max_download_threads, + query_info)); } return Pipe::unitePipes(std::move(pipes)); @@ -1195,7 +1161,7 @@ void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, if (!response.IsSuccess()) { const auto & err = response.GetError(); - throw Exception(ErrorCodes::S3_ERROR, "{}: {}", std::to_string(static_cast(err.GetErrorType())), err.GetMessage()); + throw S3Exception(err.GetMessage(), err.GetErrorType()); } for (const auto & error : response.GetResult().GetErrors()) diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index d001a86842e..f0486a8a0b0 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -20,7 +20,9 @@ #include #include #include +#include #include +#include namespace Aws::S3 { @@ -115,16 +117,12 @@ public: ReadTaskCallback callback; }; - static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); - StorageS3Source( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, const String & format, String name_, - const Block & sample_block, ContextPtr context_, std::optional format_settings_, - const ColumnsDescription & columns_, UInt64 max_block_size_, const S3Settings::RequestSettings & request_settings_, String compression_hint_, @@ -132,7 +130,8 @@ public: const String & bucket, const String & version_id, std::shared_ptr file_iterator_, - size_t download_thread_num); + size_t download_thread_num, + std::optional query_info); ~StorageS3Source() override; @@ -141,28 +140,34 @@ public: Chunk generate() override; private: + friend class StorageS3QueueSource; + String name; String bucket; String version_id; String format; ColumnsDescription columns_desc; + NamesAndTypesList requested_columns; UInt64 max_block_size; S3Settings::RequestSettings request_settings; String compression_hint; std::shared_ptr client; Block sample_block; std::optional format_settings; + std::optional query_info; struct ReaderHolder { public: ReaderHolder( - String path_, + String key_, + String bucket_, std::unique_ptr read_buf_, std::shared_ptr input_format_, std::unique_ptr pipeline_, std::unique_ptr reader_) - : path(std::move(path_)) + : key(std::move(key_)) + , bucket(std::move(bucket_)) , read_buf(std::move(read_buf_)) , input_format(std::move(input_format_)) , pipeline(std::move(pipeline_)) @@ -187,19 +192,22 @@ private: pipeline = std::move(other.pipeline); input_format = std::move(other.input_format); read_buf = std::move(other.read_buf); - path = std::move(other.path); + key = std::move(other.key); + bucket = std::move(other.bucket); return *this; } explicit operator bool() const { return reader != nullptr; } PullingPipelineExecutor * operator->() { return reader.get(); } const PullingPipelineExecutor * operator->() const { return reader.get(); } - const String & getPath() const { return path; } + String getPath() const { return fs::path(bucket) / key; } + const String & getFile() const { return key; } const IInputFormat * getInputFormat() const { return input_format.get(); } private: - String path; + String key; + String bucket; std::unique_ptr read_buf; std::shared_ptr input_format; std::unique_ptr pipeline; @@ -208,7 +216,7 @@ private: ReaderHolder reader; - std::vector requested_virtual_columns; + NamesAndTypesList requested_virtual_columns; std::shared_ptr file_iterator; size_t download_thread_num = 1; @@ -323,6 +331,7 @@ protected: private: friend class StorageS3Cluster; friend class TableFunctionS3Cluster; + friend class StorageS3Queue; Configuration configuration; std::mutex configuration_update_mutex; @@ -350,7 +359,7 @@ private: const std::optional & format_settings, ContextPtr ctx); - bool supportsSubcolumns() const override; + bool supportsSubcolumns() const override { return true; } bool supportsSubsetOfColumns() const override; diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h index 5c2229875e5..4b4558f6330 100644 --- a/src/Storages/StorageS3Cluster.h +++ b/src/Storages/StorageS3Cluster.h @@ -36,6 +36,8 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override; + bool supportsSubcolumns() const override { return true; } + protected: void updateConfigurationIfChanged(ContextPtr local_context); diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index 581665a7dc5..e3d577ca0b3 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -77,6 +77,8 @@ struct S3Settings const PartUploadSettings & getUploadSettings() const { return upload_settings; } + void setStorageClassName(const String & storage_class_name) { upload_settings.storage_class_name = storage_class_name; } + RequestSettings() = default; explicit RequestSettings(const Settings & settings); explicit RequestSettings(const NamedCollection & collection); diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index d5ae6f2383f..d5db5763da9 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -91,6 +91,7 @@ Pipe StorageSQLite::read( column_names, storage_snapshot->metadata->getColumns().getOrdinary(), IdentifierQuotingStyle::DoubleQuotes, + LiteralEscapingStyle::Regular, "", remote_table_name, context_); diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index d8bbd523cbf..83336cbd22e 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -426,7 +426,7 @@ void StorageStripeLog::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont num_indices_saved = 0; total_rows = 0; total_bytes = 0; - getContext()->dropMMappedFileCache(); + getContext()->clearMMappedFileCache(); } @@ -544,9 +544,10 @@ void StorageStripeLog::backupData(BackupEntriesCollector & backup_entries_collec fs::path data_path_in_backup_fs = data_path_in_backup; auto temp_dir_owner = std::make_shared(disk, "tmp/"); - fs::path temp_dir = temp_dir_owner->getPath(); + fs::path temp_dir = temp_dir_owner->getRelativePath(); disk->createDirectories(temp_dir); + const auto & read_settings = backup_entries_collector.getReadSettings(); bool copy_encrypted = !backup_entries_collector.getBackupSettings().decrypt_files_from_encrypted_disks; /// data.bin @@ -576,7 +577,7 @@ void StorageStripeLog::backupData(BackupEntriesCollector & backup_entries_collec /// sizes.json String files_info_path = file_checker.getPath(); backup_entries_collector.addBackupEntry( - data_path_in_backup_fs / fileName(files_info_path), std::make_unique(disk, files_info_path, copy_encrypted)); + data_path_in_backup_fs / fileName(files_info_path), std::make_unique(disk, files_info_path, read_settings, copy_encrypted)); /// columns.txt backup_entries_collector.addBackupEntry( diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index db8cb6b42de..617b421fa24 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include @@ -22,6 +21,8 @@ #include #include #include +#include +#include #include #include @@ -208,33 +209,30 @@ void StorageURLSource::setCredentials(Poco::Net::HTTPBasicCredentials & credenti } } -Block StorageURLSource::getHeader(Block sample_block, const std::vector & requested_virtual_columns) -{ - for (const auto & virtual_column : requested_virtual_columns) - sample_block.insert({virtual_column.type->createColumn(), virtual_column.type, virtual_column.name}); - - return sample_block; -} - StorageURLSource::StorageURLSource( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, std::shared_ptr uri_iterator_, const std::string & http_method, std::function callback, const String & format, const std::optional & format_settings, String name_, - const Block & sample_block, ContextPtr context, - const ColumnsDescription & columns, UInt64 max_block_size, const ConnectionTimeouts & timeouts, CompressionMethod compression_method, size_t download_threads, + const SelectQueryInfo & query_info, const HTTPHeaderEntries & headers_, const URIParams & params, bool glob_url) - : ISource(getHeader(sample_block, requested_virtual_columns_), false), name(std::move(name_)), requested_virtual_columns(requested_virtual_columns_), uri_iterator(uri_iterator_) + : ISource(info.source_header, false) + , name(std::move(name_)) + , columns_description(info.columns_description) + , requested_columns(info.requested_columns) + , requested_virtual_columns(info.requested_virtual_columns) + , block_for_format(info.format_header) + , uri_iterator(uri_iterator_) { auto headers = getHeaders(headers_); @@ -281,7 +279,7 @@ StorageURLSource::StorageURLSource( input_format = FormatFactory::instance().getInput( format, *read_buf, - sample_block, + block_for_format, context, max_block_size, format_settings, @@ -289,12 +287,25 @@ StorageURLSource::StorageURLSource( /*max_download_threads*/ std::nullopt, /* is_remote_fs */ true, compression_method); + input_format->setQueryInfo(query_info, context); QueryPipelineBuilder builder; builder.init(Pipe(input_format)); - builder.addSimpleTransform([&](const Block & cur_header) - { return std::make_shared(cur_header, columns, *input_format, context); }); + if (columns_description.hasDefaults()) + { + builder.addSimpleTransform([&](const Block & cur_header) + { + return std::make_shared(cur_header, columns_description, *input_format, context); + }); + } + + /// Add ExtractColumnsTransform to extract requested columns/subcolumns + /// from chunk read by IInputFormat. + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, requested_columns); + }); pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); reader = std::make_unique(*pipeline); @@ -371,7 +382,7 @@ std::pair> StorageURLSource: for (; option != end; ++option) { bool skip_url_not_found_error = glob_url && read_settings.http_skip_not_found_url_for_globs && option == std::prev(end); - auto request_uri = Poco::URI(*option, context->getSettingsRef().disable_url_encoding); + auto request_uri = Poco::URI(*option, context->getSettingsRef().enable_url_encoding); for (const auto & [param, value] : params) request_uri.addQueryParameter(param, value); @@ -695,27 +706,6 @@ Pipe IStorageURLBase::read( { auto params = getReadURIParams(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size); - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - columns_description = storage_snapshot->getDescriptionForColumns(column_names); - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - block_for_format = storage_snapshot->metadata->getSampleBlock(); - } - - std::unordered_set column_names_set(column_names.begin(), column_names.end()); - std::vector requested_virtual_columns; - for (const auto & virtual_column : getVirtuals()) - { - if (column_names_set.contains(virtual_column.name)) - requested_virtual_columns.push_back(virtual_column); - } - size_t max_download_threads = local_context->getSettingsRef().max_download_threads; std::shared_ptr iterator_wrapper{nullptr}; @@ -759,6 +749,8 @@ Pipe IStorageURLBase::read( num_streams = 1; } + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(), getVirtuals()); + Pipes pipes; pipes.reserve(num_streams); @@ -766,20 +758,25 @@ Pipe IStorageURLBase::read( for (size_t i = 0; i < num_streams; ++i) { pipes.emplace_back(std::make_shared( - requested_virtual_columns, + read_from_format_info, iterator_wrapper, getReadMethod(), - getReadPOSTDataCallback(column_names, columns_description, query_info, local_context, processed_stage, max_block_size), + getReadPOSTDataCallback( + read_from_format_info.columns_description.getNamesOfPhysical(), + read_from_format_info.columns_description, + query_info, + local_context, + processed_stage, + max_block_size), format_name, format_settings, getName(), - block_for_format, local_context, - columns_description, max_block_size, getHTTPTimeouts(local_context), compression_method, download_threads, + query_info, headers, params, is_url_with_globs)); @@ -798,19 +795,6 @@ Pipe StorageURLWithFailover::read( size_t max_block_size, size_t /*num_streams*/) { - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - columns_description = storage_snapshot->getDescriptionForColumns(column_names); - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - block_for_format = storage_snapshot->metadata->getSampleBlock(); - } - auto params = getReadURIParams(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size); auto iterator_wrapper = std::make_shared([&, done = false]() mutable @@ -821,21 +805,22 @@ Pipe StorageURLWithFailover::read( return uri_options; }); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(), getVirtuals()); + auto pipe = Pipe(std::make_shared( - std::vector{}, + read_from_format_info, iterator_wrapper, getReadMethod(), - getReadPOSTDataCallback(column_names, columns_description, query_info, local_context, processed_stage, max_block_size), + getReadPOSTDataCallback(read_from_format_info.columns_description.getNamesOfPhysical(), read_from_format_info.columns_description, query_info, local_context, processed_stage, max_block_size), format_name, format_settings, getName(), - block_for_format, local_context, - columns_description, max_block_size, getHTTPTimeouts(local_context), compression_method, local_context->getSettingsRef().max_download_threads, + query_info, headers, params)); std::shuffle(uri_options.begin(), uri_options.end(), thread_local_rng); diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 68fd4014ac1..140f3d42f7b 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB @@ -158,20 +159,19 @@ public: using IteratorWrapper = std::function; StorageURLSource( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, std::shared_ptr uri_iterator_, const std::string & http_method, std::function callback, const String & format, const std::optional & format_settings, String name_, - const Block & sample_block, ContextPtr context, - const ColumnsDescription & columns, UInt64 max_block_size, const ConnectionTimeouts & timeouts, CompressionMethod compression_method, size_t download_threads, + const SelectQueryInfo & query_info, const HTTPHeaderEntries & headers_ = {}, const URIParams & params = {}, bool glob_url = false); @@ -182,8 +182,6 @@ public: static void setCredentials(Poco::Net::HTTPBasicCredentials & credentials, const Poco::URI & request_uri); - static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); - static std::pair> getFirstAvailableURIAndReadBuffer( std::vector::const_iterator & option, const std::vector::const_iterator & end, @@ -202,7 +200,10 @@ private: InitializeFunc initialize; String name; - std::vector requested_virtual_columns; + ColumnsDescription columns_description; + NamesAndTypesList requested_columns; + NamesAndTypesList requested_virtual_columns; + Block block_for_format; std::shared_ptr uri_iterator; Poco::URI curr_uri; @@ -271,6 +272,8 @@ public: return storage_snapshot->metadata->getSampleBlock(); } + bool supportsSubcolumns() const override { return true; } + static FormatSettings getFormatSettingsFromArgs(const StorageFactory::Arguments & args); struct Configuration : public StatelessTableEngineConfiguration diff --git a/src/Storages/StorageURLCluster.h b/src/Storages/StorageURLCluster.h index 67771416771..bd475d78f65 100644 --- a/src/Storages/StorageURLCluster.h +++ b/src/Storages/StorageURLCluster.h @@ -36,6 +36,8 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override; + bool supportsSubcolumns() const override { return true; } + private: void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override; diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index b532d1c91f0..1715cde9d1e 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -79,6 +79,7 @@ std::function StorageXDBC::getReadPOSTDataCallback( column_names, columns_description.getOrdinary(), bridge_helper->getIdentifierQuotingStyle(), + LiteralEscapingStyle::Regular, remote_database_name, remote_table_name, local_context); diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index 63b9a443f95..e09b27adf32 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include @@ -30,6 +32,8 @@ class IStorageSystemOneBlock : public IStorage protected: virtual void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const = 0; + virtual bool supportsColumnsMask() const { return false; } + public: explicit IStorageSystemOneBlock(const StorageID & table_id_) : IStorage(table_id_) { @@ -48,8 +52,15 @@ public: size_t /*num_streams*/) override { storage_snapshot->check(column_names); - Block sample_block = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals()); + + if (supportsColumnsMask()) + { + auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); + query_info.columns_mask = std::move(columns_mask); + sample_block = std::move(header); + } + MutableColumns res_columns = sample_block.cloneEmptyColumns(); fillData(res_columns, context, query_info); diff --git a/src/Storages/System/StorageSystemBuildOptions.cpp.in b/src/Storages/System/StorageSystemBuildOptions.cpp.in index 4e7a25d7726..796b134ba56 100644 --- a/src/Storages/System/StorageSystemBuildOptions.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in @@ -8,7 +8,6 @@ const char * auto_config_build[] "SYSTEM", "@CMAKE_SYSTEM_NAME@", "VERSION_GITHASH", "@VERSION_GITHASH@", "VERSION_REVISION", "@VERSION_REVISION@", - "VERSION_DATE", "@VERSION_DATE@", "BUILD_TYPE", "@CMAKE_BUILD_TYPE@", "SYSTEM_PROCESSOR", "@CMAKE_SYSTEM_PROCESSOR@", "CMAKE_VERSION", "@CMAKE_VERSION@", diff --git a/src/Storages/System/StorageSystemClusters.cpp b/src/Storages/System/StorageSystemClusters.cpp index f4ef52d7605..39a61f22b89 100644 --- a/src/Storages/System/StorageSystemClusters.cpp +++ b/src/Storages/System/StorageSystemClusters.cpp @@ -32,6 +32,12 @@ NamesAndTypesList StorageSystemClusters::getNamesAndTypes() }; } +NamesAndAliases StorageSystemClusters::getNamesAndAliases() +{ + return { + {"name", std::make_shared(), "cluster"}, + }; +} void StorageSystemClusters::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const { diff --git a/src/Storages/System/StorageSystemClusters.h b/src/Storages/System/StorageSystemClusters.h index 9aa1a6a5183..071ad423b89 100644 --- a/src/Storages/System/StorageSystemClusters.h +++ b/src/Storages/System/StorageSystemClusters.h @@ -22,6 +22,8 @@ public: static NamesAndTypesList getNamesAndTypes(); + static NamesAndAliases getNamesAndAliases(); + protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; using NameAndCluster = std::pair>; diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index e4ca6a15138..b76ad07abeb 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -315,23 +316,9 @@ Pipe StorageSystemColumns::read( const size_t /*num_streams*/) { storage_snapshot->check(column_names); - - /// Create a mask of what columns are needed in the result. - - NameSet names_set(column_names.begin(), column_names.end()); - Block sample_block = storage_snapshot->metadata->getSampleBlock(); - Block header; - std::vector columns_mask(sample_block.columns()); - for (size_t i = 0, size = columns_mask.size(); i < size; ++i) - { - if (names_set.contains(sample_block.getByPosition(i).name)) - { - columns_mask[i] = 1; - header.insert(sample_block.getByPosition(i)); - } - } + auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); Block block_to_filter; Storages storages; diff --git a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp index 67867b6c577..bae7a266dcd 100644 --- a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp +++ b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp @@ -290,7 +290,7 @@ void StorageSystemDDLWorkerQueue::fillData(MutableColumns & res_columns, Context } else { - throw Coordination::Exception(maybe_finished_hosts.error, fs::path(task.entry_path) / "finished"); + throw Coordination::Exception::fromPath(maybe_finished_hosts.error, fs::path(task.entry_path) / "finished"); } /// Process active nodes @@ -322,7 +322,7 @@ void StorageSystemDDLWorkerQueue::fillData(MutableColumns & res_columns, Context } else { - throw Coordination::Exception(maybe_active_hosts.error, fs::path(task.entry_path) / "active"); + throw Coordination::Exception::fromPath(maybe_active_hosts.error, fs::path(task.entry_path) / "active"); } /// Process the rest hosts diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.cpp b/src/Storages/System/StorageSystemDataSkippingIndices.cpp index 2649cf71182..0c4eb197efd 100644 --- a/src/Storages/System/StorageSystemDataSkippingIndices.cpp +++ b/src/Storages/System/StorageSystemDataSkippingIndices.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -185,21 +186,9 @@ Pipe StorageSystemDataSkippingIndices::read( size_t /* num_streams */) { storage_snapshot->check(column_names); - - NameSet names_set(column_names.begin(), column_names.end()); - Block sample_block = storage_snapshot->metadata->getSampleBlock(); - Block header; - std::vector columns_mask(sample_block.columns()); - for (size_t i = 0, size = columns_mask.size(); i < size; ++i) - { - if (names_set.contains(sample_block.getByPosition(i).name)) - { - columns_mask[i] = 1; - header.insert(sample_block.getByPosition(i)); - } - } + auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); MutableColumnPtr column = ColumnString::create(); diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp index 2fcc91e49bb..1fa94fab7bf 100644 --- a/src/Storages/System/StorageSystemDatabases.cpp +++ b/src/Storages/System/StorageSystemDatabases.cpp @@ -117,13 +117,23 @@ void StorageSystemDatabases::fillData(MutableColumns & res_columns, ContextPtr c const auto & database = databases.at(database_name); - res_columns[0]->insert(database_name); - res_columns[1]->insert(database->getEngineName()); - res_columns[2]->insert(context->getPath() + database->getDataPath()); - res_columns[3]->insert(database->getMetadataPath()); - res_columns[4]->insert(database->getUUID()); - res_columns[5]->insert(getEngineFull(context, database)); - res_columns[6]->insert(database->getDatabaseComment()); + size_t src_index = 0; + size_t res_index = 0; + const auto & columns_mask = query_info.columns_mask; + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database_name); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database->getEngineName()); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(context->getPath() + database->getDataPath()); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database->getMetadataPath()); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database->getUUID()); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(getEngineFull(context, database)); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database->getDatabaseComment()); } } diff --git a/src/Storages/System/StorageSystemDatabases.h b/src/Storages/System/StorageSystemDatabases.h index 37c5f97d497..29dd786ca0a 100644 --- a/src/Storages/System/StorageSystemDatabases.h +++ b/src/Storages/System/StorageSystemDatabases.h @@ -26,6 +26,8 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; + bool supportsColumnsMask() const override { return true; } + void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; }; diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 97af4094e42..c5d2ba94e09 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -81,13 +82,11 @@ struct WorkerState class DetachedPartsSource : public ISource { public: - DetachedPartsSource(Block header_, std::shared_ptr state_, std::vector columns_mask_, UInt64 block_size_, - bool has_bytes_on_disk_column_) + DetachedPartsSource(Block header_, std::shared_ptr state_, std::vector columns_mask_, UInt64 block_size_) : ISource(std::move(header_)) , state(state_) , columns_mask(std::move(columns_mask_)) , block_size(block_size_) - , has_bytes_on_disk_column(has_bytes_on_disk_column_) {} String getName() const override { return "DataPartsSource"; } @@ -127,7 +126,6 @@ private: std::shared_ptr state; const std::vector columns_mask; const UInt64 block_size; - const bool has_bytes_on_disk_column; const size_t support_threads = 35; StoragesInfo current_info; @@ -149,9 +147,6 @@ private: void calculatePartSizeOnDisk(size_t begin, std::vector> & parts_sizes) { - if (!has_bytes_on_disk_column) - return; - WorkerState worker_state; for (auto p_id = begin; p_id < detached_parts.size(); ++p_id) @@ -211,7 +206,9 @@ private: auto begin = detached_parts.size() - rows; std::vector> parts_sizes(rows); - calculatePartSizeOnDisk(begin, parts_sizes); + constexpr size_t bytes_on_disk_col_idx = 4; + if (columns_mask[bytes_on_disk_col_idx]) + calculatePartSizeOnDisk(begin, parts_sizes); for (auto p_id = begin; p_id < detached_parts.size(); ++p_id) { @@ -229,7 +226,7 @@ private: new_columns[res_index++]->insert(p.dir_name); if (columns_mask[src_index++]) { - chassert(has_bytes_on_disk_column); + chassert(src_index - 1 == bytes_on_disk_col_idx); size_t bytes_on_disk = parts_sizes.at(p_id - begin).load(); new_columns[res_index++]->insert(bytes_on_disk); } @@ -285,21 +282,7 @@ Pipe StorageSystemDetachedParts::read( storage_snapshot->check(column_names); Block sample_block = storage_snapshot->metadata->getSampleBlock(); - NameSet names_set(column_names.begin(), column_names.end()); - - Block header; - std::vector columns_mask(sample_block.columns()); - - for (size_t i = 0; i < columns_mask.size(); ++i) - { - if (names_set.contains(sample_block.getByPosition(i).name)) - { - columns_mask[i] = 1; - header.insert(sample_block.getByPosition(i)); - } - } - - bool has_bytes_on_disk_column = names_set.contains("bytes_on_disk"); + auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); auto state = std::make_shared(StoragesInfoStream(query_info, context)); @@ -307,7 +290,7 @@ Pipe StorageSystemDetachedParts::read( for (size_t i = 0; i < num_streams; ++i) { - auto source = std::make_shared(header.cloneEmpty(), state, columns_mask, max_block_size, has_bytes_on_disk_column); + auto source = std::make_shared(header.cloneEmpty(), state, columns_mask, max_block_size); pipe.addSource(std::move(source)); } diff --git a/src/Storages/System/StorageSystemKafkaConsumers.cpp b/src/Storages/System/StorageSystemKafkaConsumers.cpp new file mode 100644 index 00000000000..eb7d84603c0 --- /dev/null +++ b/src/Storages/System/StorageSystemKafkaConsumers.cpp @@ -0,0 +1,175 @@ +#include "config.h" + +#if USE_RDKAFKA + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "base/types.h" + +namespace DB +{ + +NamesAndTypesList StorageSystemKafkaConsumers::getNamesAndTypes() +{ + NamesAndTypesList names_and_types{ + {"database", std::make_shared()}, + {"table", std::make_shared()}, + {"consumer_id", std::make_shared()}, //(number? or string? - single clickhouse table can have many consumers) + {"assignments.topic", std::make_shared(std::make_shared())}, + {"assignments.partition_id", std::make_shared(std::make_shared())}, + {"assignments.current_offset", std::make_shared(std::make_shared())}, + {"exceptions.time", std::make_shared(std::make_shared())}, + {"exceptions.text", std::make_shared(std::make_shared())}, + {"last_poll_time", std::make_shared()}, + {"num_messages_read", std::make_shared()}, + {"last_commit_time", std::make_shared()}, + {"num_commits", std::make_shared()}, + {"last_rebalance_time", std::make_shared()}, + {"num_rebalance_revocations", std::make_shared()}, + {"num_rebalance_assignments", std::make_shared()}, + {"is_currently_used", std::make_shared()}, + {"rdkafka_stat", std::make_shared()}, + }; + return names_and_types; +} + +void StorageSystemKafkaConsumers::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +{ + auto tables_mark_dropped = DatabaseCatalog::instance().getTablesMarkedDropped(); + + size_t index = 0; + + + auto & database = assert_cast(*res_columns[index++]); + auto & table = assert_cast(*res_columns[index++]); + auto & consumer_id = assert_cast(*res_columns[index++]); //(number? or string? - single clickhouse table can have many consumers) + + auto & assigments_topics = assert_cast(assert_cast(*res_columns[index]).getData()); + auto & assigments_topics_offsets = assert_cast(*res_columns[index++]).getOffsets(); + + auto & assigments_partition_id = assert_cast(assert_cast(*res_columns[index]).getData()); + auto & assigments_partition_id_offsets = assert_cast(*res_columns[index++]).getOffsets(); + + auto & assigments_current_offset = assert_cast(assert_cast(*res_columns[index]).getData()); + auto & assigments_current_offset_offsets = assert_cast(*res_columns[index++]).getOffsets(); + + auto & exceptions_time = assert_cast(assert_cast(*res_columns[index]).getData()); + auto & exceptions_time_offset = assert_cast(*res_columns[index++]).getOffsets(); + auto & exceptions_text = assert_cast(assert_cast(*res_columns[index]).getData()); + auto & exceptions_text_offset = assert_cast(*res_columns[index++]).getOffsets(); + auto & last_poll_time = assert_cast(*res_columns[index++]); + auto & num_messages_read = assert_cast(*res_columns[index++]); + auto & last_commit_time = assert_cast(*res_columns[index++]); + auto & num_commits = assert_cast(*res_columns[index++]); + auto & last_rebalance_time = assert_cast(*res_columns[index++]); + auto & num_rebalance_revocations = assert_cast(*res_columns[index++]); + auto & num_rebalance_assigments = assert_cast(*res_columns[index++]); + auto & is_currently_used = assert_cast(*res_columns[index++]); + auto & rdkafka_stat = assert_cast(*res_columns[index++]); + + const auto access = context->getAccess(); + size_t last_assignment_num = 0; + size_t exceptions_num = 0; + + auto add_row = [&](const DatabaseTablesIteratorPtr & it, StorageKafka * storage_kafka_ptr) + { + if (!access->isGranted(AccessType::SHOW_TABLES, it->databaseName(), it->name())) + { + return; + } + + std::string database_str = it->databaseName(); + std::string table_str = it->name(); + + auto safe_consumers = storage_kafka_ptr->getSafeConsumers(); + + for (const auto & weak_consumer : safe_consumers.consumers) + { + if (auto consumer = weak_consumer.lock()) + { + auto consumer_stat = consumer->getStat(); + + database.insertData(database_str.data(), database_str.size()); + table.insertData(table_str.data(), table_str.size()); + + consumer_id.insertData(consumer_stat.consumer_id.data(), consumer_stat.consumer_id.size()); + + const auto num_assignnemts = consumer_stat.assignments.size(); + + for (size_t num = 0; num < num_assignnemts; ++num) + { + const auto & assign = consumer_stat.assignments[num]; + + assigments_topics.insertData(assign.topic_str.data(), assign.topic_str.size()); + + assigments_partition_id.insert(assign.partition_id); + assigments_current_offset.insert(assign.current_offset); + } + last_assignment_num += num_assignnemts; + + assigments_topics_offsets.push_back(last_assignment_num); + assigments_partition_id_offsets.push_back(last_assignment_num); + assigments_current_offset_offsets.push_back(last_assignment_num); + + for (const auto & exc : consumer_stat.exceptions_buffer) + { + exceptions_text.insertData(exc.text.data(), exc.text.size()); + exceptions_time.insert(exc.timestamp_usec); + } + exceptions_num += consumer_stat.exceptions_buffer.size(); + exceptions_text_offset.push_back(exceptions_num); + exceptions_time_offset.push_back(exceptions_num); + + + last_poll_time.insert(consumer_stat.last_poll_time); + num_messages_read.insert(consumer_stat.num_messages_read); + last_commit_time.insert(consumer_stat.last_commit_timestamp_usec); + num_commits.insert(consumer_stat.num_commits); + last_rebalance_time.insert(consumer_stat.last_rebalance_timestamp_usec); + + num_rebalance_revocations.insert(consumer_stat.num_rebalance_revocations); + num_rebalance_assigments.insert(consumer_stat.num_rebalance_assignments); + + is_currently_used.insert(consumer_stat.in_use); + + rdkafka_stat.insertData(consumer_stat.rdkafka_stat.data(), consumer_stat.rdkafka_stat.size()); + } + } + }; + + const bool show_tables_granted = access->isGranted(AccessType::SHOW_TABLES); + + if (show_tables_granted) + { + auto databases = DatabaseCatalog::instance().getDatabases(); + for (const auto & db : databases) + { + for (auto iterator = db.second->getTablesIterator(context); iterator->isValid(); iterator->next()) + { + StoragePtr storage = iterator->table(); + if (auto * kafka_table = dynamic_cast(storage.get())) + { + add_row(iterator, kafka_table); + } + } + } + + } +} + +} + +#endif diff --git a/src/Storages/System/StorageSystemKafkaConsumers.h b/src/Storages/System/StorageSystemKafkaConsumers.h new file mode 100644 index 00000000000..eda3a39bc7e --- /dev/null +++ b/src/Storages/System/StorageSystemKafkaConsumers.h @@ -0,0 +1,27 @@ +#pragma once + +#include "config.h" + +#if USE_RDKAFKA + + +#include + + +namespace DB +{ + +class StorageSystemKafkaConsumers final : public IStorageSystemOneBlock +{ +public: + std::string getName() const override { return "SystemKafkaConsumers"; } + static NamesAndTypesList getNamesAndTypes(); + +protected: + using IStorageSystemOneBlock::IStorageSystemOneBlock; + void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; +}; + +} + +#endif diff --git a/src/Storages/System/StorageSystemMerges.cpp b/src/Storages/System/StorageSystemMerges.cpp index 1f32a0ff700..2dbe2964eb9 100644 --- a/src/Storages/System/StorageSystemMerges.cpp +++ b/src/Storages/System/StorageSystemMerges.cpp @@ -20,6 +20,7 @@ NamesAndTypesList StorageSystemMerges::getNamesAndTypes() {"source_part_paths", std::make_shared(std::make_shared())}, {"result_part_path", std::make_shared()}, {"partition_id", std::make_shared()}, + {"partition", std::make_shared()}, {"is_mutation", std::make_shared()}, {"total_size_bytes_compressed", std::make_shared()}, {"total_size_bytes_uncompressed", std::make_shared()}, @@ -58,6 +59,7 @@ void StorageSystemMerges::fillData(MutableColumns & res_columns, ContextPtr cont res_columns[i++]->insert(merge.source_part_paths); res_columns[i++]->insert(merge.result_part_path); res_columns[i++]->insert(merge.partition_id); + res_columns[i++]->insert(merge.partition); res_columns[i++]->insert(merge.is_mutation); res_columns[i++]->insert(merge.total_size_bytes_compressed); res_columns[i++]->insert(merge.total_size_bytes_uncompressed); diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 0979b9d9371..513af6cfc46 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -254,21 +255,10 @@ Pipe StorageSystemPartsBase::read( StoragesInfoStream stream(query_info, context); /// Create the result. - - NameSet names_set(column_names.begin(), column_names.end()); - Block sample = storage_snapshot->metadata->getSampleBlock(); - Block header; - std::vector columns_mask(sample.columns()); - for (size_t i = 0; i < sample.columns(); ++i) - { - if (names_set.contains(sample.getByPosition(i).name)) - { - columns_mask[i] = 1; - header.insert(sample.getByPosition(i)); - } - } + auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample, column_names); + MutableColumns res_columns = header.cloneEmptyColumns(); if (has_state_column) res_columns.push_back(ColumnString::create()); diff --git a/src/Storages/System/StorageSystemQueryCache.cpp b/src/Storages/System/StorageSystemQueryCache.cpp index 288e4fd52a0..03757101ddf 100644 --- a/src/Storages/System/StorageSystemQueryCache.cpp +++ b/src/Storages/System/StorageSystemQueryCache.cpp @@ -50,7 +50,7 @@ void StorageSystemQueryCache::fillData(MutableColumns & res_columns, ContextPtr res_columns[3]->insert(key.is_shared); res_columns[4]->insert(key.is_compressed); res_columns[5]->insert(std::chrono::system_clock::to_time_t(key.expires_at)); - res_columns[6]->insert(key.ast->getTreeHash().first); + res_columns[6]->insert(key.ast->getTreeHash().low64); } } diff --git a/src/Storages/System/StorageSystemRoleGrants.cpp b/src/Storages/System/StorageSystemRoleGrants.cpp index cf5a24f88cd..8fcd0fd7cf5 100644 --- a/src/Storages/System/StorageSystemRoleGrants.cpp +++ b/src/Storages/System/StorageSystemRoleGrants.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -22,6 +23,7 @@ NamesAndTypesList StorageSystemRoleGrants::getNamesAndTypes() {"user_name", std::make_shared(std::make_shared())}, {"role_name", std::make_shared(std::make_shared())}, {"granted_role_name", std::make_shared()}, + {"granted_role_id", std::make_shared()}, {"granted_role_is_default", std::make_shared()}, {"with_admin_option", std::make_shared()}, }; @@ -45,12 +47,14 @@ void StorageSystemRoleGrants::fillData(MutableColumns & res_columns, ContextPtr auto & column_role_name = assert_cast(assert_cast(*res_columns[column_index]).getNestedColumn()); auto & column_role_name_null_map = assert_cast(*res_columns[column_index++]).getNullMapData(); auto & column_granted_role_name = assert_cast(*res_columns[column_index++]); + auto & column_granted_role_id = assert_cast(*res_columns[column_index++]).getData(); auto & column_is_default = assert_cast(*res_columns[column_index++]).getData(); auto & column_admin_option = assert_cast(*res_columns[column_index++]).getData(); auto add_row = [&](const String & grantee_name, AccessEntityType grantee_type, const String & granted_role_name, + const UUID & granted_role_id, bool is_default, bool with_admin_option) { @@ -72,6 +76,7 @@ void StorageSystemRoleGrants::fillData(MutableColumns & res_columns, ContextPtr assert(false); column_granted_role_name.insertData(granted_role_name.data(), granted_role_name.length()); + column_granted_role_id.push_back(granted_role_id.toUnderType()); column_is_default.push_back(is_default); column_admin_option.push_back(with_admin_option); }; @@ -90,7 +95,7 @@ void StorageSystemRoleGrants::fillData(MutableColumns & res_columns, ContextPtr continue; bool is_default = !default_roles || default_roles->match(role_id); - add_row(grantee_name, grantee_type, *role_name, is_default, element.admin_option); + add_row(grantee_name, grantee_type, *role_name, role_id, is_default, element.admin_option); } } }; diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp index 887d7f2a5d4..8d703632c68 100644 --- a/src/Storages/System/StorageSystemStackTrace.cpp +++ b/src/Storages/System/StorageSystemStackTrace.cpp @@ -275,15 +275,6 @@ Pipe StorageSystemStackTrace::read( Block sample_block = storage_snapshot->metadata->getSampleBlock(); - std::vector columns_mask(sample_block.columns()); - for (size_t i = 0, size = columns_mask.size(); i < size; ++i) - { - if (names_set.contains(sample_block.getByPosition(i).name)) - { - columns_mask[i] = 1; - } - } - bool send_signal = names_set.contains("trace") || names_set.contains("query_id"); bool read_thread_names = names_set.contains("thread_name"); diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index e00d2d95568..715c98ee92a 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -107,6 +108,22 @@ static ColumnPtr getFilteredTables(const ASTPtr & query, const ColumnPtr & filte return block.getByPosition(0).column; } +/// Avoid heavy operation on tables if we only queried columns that we can get without table object. +/// Otherwise it will require table initialization for Lazy database. +static bool needTable(const DatabasePtr & database, const Block & header) +{ + if (database->getEngineName() != "Lazy") + return true; + + static const std::set columns_without_table = { "database", "name", "uuid", "metadata_modification_time" }; + for (const auto & column : header.getColumnsWithTypeAndName()) + { + if (columns_without_table.find(column.name) == columns_without_table.end()) + return true; + } + return false; +} + class TablesBlockSource : public ISource { @@ -265,6 +282,8 @@ protected: if (!tables_it || !tables_it->isValid()) tables_it = database->getTablesIterator(context); + const bool need_table = needTable(database, getPort().getHeader()); + for (; rows_count < max_block_size && tables_it->isValid(); tables_it->next()) { auto table_name = tables_it->name(); @@ -274,23 +293,27 @@ protected: if (check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name)) continue; - StoragePtr table = tables_it->table(); - if (!table) - // Table might have just been removed or detached for Lazy engine (see DatabaseLazy::tryGetTable()) - continue; - + StoragePtr table = nullptr; TableLockHolder lock; - /// The only column that requires us to hold a shared lock is data_paths as rename might alter them (on ordinary tables) - /// and it's not protected internally by other mutexes - static const size_t DATA_PATHS_INDEX = 5; - if (columns_mask[DATA_PATHS_INDEX]) + if (need_table) { - lock = table->tryLockForShare(context->getCurrentQueryId(), context->getSettingsRef().lock_acquire_timeout); - if (!lock) - // Table was dropped while acquiring the lock, skipping table + table = tables_it->table(); + if (!table) + // Table might have just been removed or detached for Lazy engine (see DatabaseLazy::tryGetTable()) continue; - } + /// The only column that requires us to hold a shared lock is data_paths as rename might alter them (on ordinary tables) + /// and it's not protected internally by other mutexes + static const size_t DATA_PATHS_INDEX = 5; + if (columns_mask[DATA_PATHS_INDEX]) + { + lock = table->tryLockForShare(context->getCurrentQueryId(), + context->getSettingsRef().lock_acquire_timeout); + if (!lock) + // Table was dropped while acquiring the lock, skipping table + continue; + } + } ++rows_count; size_t src_index = 0; @@ -307,6 +330,7 @@ protected: if (columns_mask[src_index++]) { + chassert(table != nullptr); res_columns[res_index++]->insert(table->getName()); } @@ -396,7 +420,9 @@ protected: else src_index += 3; - StorageMetadataPtr metadata_snapshot = table->getInMemoryMetadataPtr(); + StorageMetadataPtr metadata_snapshot; + if (table) + metadata_snapshot = table->getInMemoryMetadataPtr(); ASTPtr expression_ptr; if (columns_mask[src_index++]) @@ -433,7 +459,7 @@ protected: if (columns_mask[src_index++]) { - auto policy = table->getStoragePolicy(); + auto policy = table ? table->getStoragePolicy() : nullptr; if (policy) res_columns[res_index++]->insert(policy->getName()); else @@ -444,7 +470,7 @@ protected: settings.select_sequential_consistency = 0; if (columns_mask[src_index++]) { - auto total_rows = table->totalRows(settings); + auto total_rows = table ? table->totalRows(settings) : std::nullopt; if (total_rows) res_columns[res_index++]->insert(*total_rows); else @@ -489,7 +515,7 @@ protected: if (columns_mask[src_index++]) { - auto lifetime_rows = table->lifetimeRows(); + auto lifetime_rows = table ? table->lifetimeRows() : std::nullopt; if (lifetime_rows) res_columns[res_index++]->insert(*lifetime_rows); else @@ -498,7 +524,7 @@ protected: if (columns_mask[src_index++]) { - auto lifetime_bytes = table->lifetimeBytes(); + auto lifetime_bytes = table ? table->lifetimeBytes() : std::nullopt; if (lifetime_bytes) res_columns[res_index++]->insert(*lifetime_bytes); else @@ -587,23 +613,9 @@ Pipe StorageSystemTables::read( const size_t /*num_streams*/) { storage_snapshot->check(column_names); - - /// Create a mask of what columns are needed in the result. - - NameSet names_set(column_names.begin(), column_names.end()); - Block sample_block = storage_snapshot->metadata->getSampleBlock(); - Block res_block; - std::vector columns_mask(sample_block.columns()); - for (size_t i = 0, size = columns_mask.size(); i < size; ++i) - { - if (names_set.contains(sample_block.getByPosition(i).name)) - { - columns_mask[i] = 1; - res_block.insert(sample_block.getByPosition(i)); - } - } + auto [columns_mask, res_block] = getQueriedColumnsMaskAndHeader(sample_block, column_names); ColumnPtr filtered_databases_column = getFilteredDatabases(query_info, context); ColumnPtr filtered_tables_column = getFilteredTables(query_info.query, filtered_databases_column, context); diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp index 12cef89b553..074a648d235 100644 --- a/src/Storages/System/attachInformationSchemaTables.cpp +++ b/src/Storages/System/attachInformationSchemaTables.cpp @@ -27,7 +27,6 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE); if (database.getEngineName() != "Memory") return; - bool is_uppercase = database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE; String metadata_resource_name = view_name + ".sql"; if (query.empty()) @@ -42,13 +41,18 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d assert(view_name == ast_create.getTable()); ast_create.attach = false; ast_create.setDatabase(database.getDatabaseName()); - if (is_uppercase) - ast_create.setTable(Poco::toUpper(view_name)); StoragePtr view = createTableFromAST(ast_create, database.getDatabaseName(), database.getTableDataPath(ast_create), context, true).second; - database.createTable(context, ast_create.getTable(), view, ast); + ASTPtr ast_upper = ast_create.clone(); + auto & ast_create_upper = ast_upper->as(); + ast_create_upper.setTable(Poco::toUpper(view_name)); + StoragePtr view_upper = createTableFromAST(ast_create_upper, database.getDatabaseName(), + database.getTableDataPath(ast_create_upper), context, true).second; + + database.createTable(context, ast_create_upper.getTable(), view_upper, ast_upper); + } catch (...) { diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index f0c67e0f787..b770f784c01 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -84,6 +84,10 @@ #include #include +#if USE_RDKAFKA +#include +#endif + #ifdef OS_LINUX #include #endif @@ -144,6 +148,9 @@ void attachSystemTablesLocal(ContextPtr context, IDatabase & system_database) attach(context, system_database, "backups"); attach(context, system_database, "schema_inference_cache"); attach(context, system_database, "dropped_tables"); +#if USE_RDKAFKA + attach(context, system_database, "kafka_consumers"); +#endif #ifdef OS_LINUX attach(context, system_database, "stack_trace"); #endif diff --git a/src/Storages/System/getQueriedColumnsMaskAndHeader.cpp b/src/Storages/System/getQueriedColumnsMaskAndHeader.cpp new file mode 100644 index 00000000000..c29ccb590ed --- /dev/null +++ b/src/Storages/System/getQueriedColumnsMaskAndHeader.cpp @@ -0,0 +1,24 @@ +#include + +namespace DB +{ + +std::pair, Block> getQueriedColumnsMaskAndHeader(const Block & sample_block, const Names & column_names) +{ + std::vector columns_mask(sample_block.columns()); + Block header; + + NameSet names_set(column_names.begin(), column_names.end()); + for (size_t i = 0; i < columns_mask.size(); ++i) + { + if (names_set.contains(sample_block.getByPosition(i).name)) + { + columns_mask[i] = 1; + header.insert(sample_block.getByPosition(i)); + } + } + + return std::make_pair(columns_mask, header); +} + +} diff --git a/src/Storages/System/getQueriedColumnsMaskAndHeader.h b/src/Storages/System/getQueriedColumnsMaskAndHeader.h new file mode 100644 index 00000000000..0781a92fa60 --- /dev/null +++ b/src/Storages/System/getQueriedColumnsMaskAndHeader.h @@ -0,0 +1,11 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +std::pair, Block> getQueriedColumnsMaskAndHeader(const Block & sample_block, const Names & column_names); + +} diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index 9929b5bb39b..74f2709f458 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -232,8 +232,8 @@ TableNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node, ContextMutablePtr & mutable_context, size_t subquery_depth) { - auto subquery_hash = subquery_node->getTreeHash(); - String temporary_table_name = fmt::format("_data_{}_{}", subquery_hash.first, subquery_hash.second); + const auto subquery_hash = subquery_node->getTreeHash(); + const auto temporary_table_name = fmt::format("_data_{}", toString(subquery_hash)); const auto & external_tables = mutable_context->getExternalTables(); auto external_table_it = external_tables.find(temporary_table_name); diff --git a/src/Storages/fuzzers/mergetree_checksum_fuzzer.cpp b/src/Storages/fuzzers/mergetree_checksum_fuzzer.cpp index 9a5a68f09cc..0f2ce8a2e44 100644 --- a/src/Storages/fuzzers/mergetree_checksum_fuzzer.cpp +++ b/src/Storages/fuzzers/mergetree_checksum_fuzzer.cpp @@ -1,4 +1,3 @@ -#include #include #include diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index 27ff4345b44..ebd02f424fa 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -38,7 +38,7 @@ ColumnsDescription getStructureOfRemoteTableInShard( if (shard_info.isLocal()) { TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_func_ptr, context); - return table_function_ptr->getActualTableStructure(context); + return table_function_ptr->getActualTableStructure(context, /*is_insert_query*/ true); } auto table_func_name = queryToString(table_func_ptr); diff --git a/src/Storages/prepareReadingFromFormat.cpp b/src/Storages/prepareReadingFromFormat.cpp new file mode 100644 index 00000000000..6be4213ec6b --- /dev/null +++ b/src/Storages/prepareReadingFromFormat.cpp @@ -0,0 +1,78 @@ +#include +#include + +namespace DB +{ + +ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, bool supports_subset_of_columns, const NamesAndTypesList & virtuals) +{ + ReadFromFormatInfo info; + /// Collect requested virtual columns and remove them from requested columns. + Strings columns_to_read; + for (const auto & column_name : requested_columns) + { + bool is_virtual = false; + for (const auto & virtual_column : virtuals) + { + if (column_name == virtual_column.name) + { + info.requested_virtual_columns.push_back(virtual_column); + is_virtual = true; + break; + } + } + + if (!is_virtual) + columns_to_read.push_back(column_name); + } + + /// Create header for Source that will contain all requested columns including virtual columns at the end + /// (because they will be added to the chunk after reading regular columns). + info.source_header = storage_snapshot->getSampleBlockForColumns(columns_to_read); + for (const auto & requested_virtual_column : info.requested_virtual_columns) + info.source_header.insert({requested_virtual_column.type->createColumn(), requested_virtual_column.type, requested_virtual_column.name}); + + /// Set requested columns that should be read from data. + info.requested_columns = storage_snapshot->getColumnsByNames(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns(), columns_to_read); + + if (supports_subset_of_columns) + { + /// If only virtual columns were requested, just read the smallest column. + if (columns_to_read.empty()) + { + columns_to_read.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); + } + /// We need to replace all subcolumns with their nested columns (e.g `a.b`, `a.b.c`, `x.y` -> `a`, `x`), + /// because most formats cannot extract subcolumns on their own. + /// All requested subcolumns will be extracted after reading. + else + { + std::unordered_set columns_to_read_set; + /// Save original order of columns. + std::vector new_columns_to_read; + for (const auto & column_to_read : info.requested_columns) + { + auto name = column_to_read.getNameInStorage(); + if (!columns_to_read_set.contains(name)) + { + columns_to_read_set.insert(name); + new_columns_to_read.push_back(name); + } + } + columns_to_read = std::move(new_columns_to_read); + } + info.columns_description = storage_snapshot->getDescriptionForColumns(columns_to_read); + } + /// If format doesn't support reading subset of columns, read all columns. + /// Requested columns/subcolumns will be extracted after reading. + else + { + info.columns_description = storage_snapshot->metadata->getColumns(); + } + + /// Create header for InputFormat with columns that will be read from the data. + info.format_header = storage_snapshot->getSampleBlockForColumns(info.columns_description.getNamesOfPhysical()); + return info; +} + +} diff --git a/src/Storages/prepareReadingFromFormat.h b/src/Storages/prepareReadingFromFormat.h new file mode 100644 index 00000000000..c5f3959a550 --- /dev/null +++ b/src/Storages/prepareReadingFromFormat.h @@ -0,0 +1,26 @@ +#pragma once +#include +#include + +namespace DB +{ + struct ReadFromFormatInfo + { + /// Header that will return Source from storage. + /// It contains all requested columns including virtual columns; + Block source_header; + /// Header that will be passed to IInputFormat to read data from file. + /// It can contain more columns than were requested if format doesn't support + /// reading subset of columns. + Block format_header; + /// Description of columns for format_header. Used for inserting defaults. + ColumnsDescription columns_description; + /// The list of requested columns without virtual columns. + NamesAndTypesList requested_columns; + /// The list of requested virtual columns. + NamesAndTypesList requested_virtual_columns; + }; + + /// Get all needed information for reading from data in some input format. + ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, bool supports_subset_of_columns, const NamesAndTypesList & virtuals); +} diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index a4f1d963704..e5b1c8e8744 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -34,6 +34,8 @@ void registerStorageS3(StorageFactory & factory); void registerStorageCOS(StorageFactory & factory); void registerStorageOSS(StorageFactory & factory); void registerStorageHudi(StorageFactory & factory); +void registerStorageS3Queue(StorageFactory & factory); + #if USE_PARQUET void registerStorageDeltaLake(StorageFactory & factory); #endif @@ -133,6 +135,7 @@ void registerStorages() registerStorageCOS(factory); registerStorageOSS(factory); registerStorageHudi(factory); + registerStorageS3Queue(factory); #if USE_PARQUET registerStorageDeltaLake(factory); diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp index 5c1442ece11..749a154c19d 100644 --- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -127,7 +127,8 @@ static void checkOld( std::string transformed_query = transformQueryForExternalDatabase( query_info, query_info.syntax_analyzer_result->requiredSourceColumns(), - state.getColumns(0), IdentifierQuotingStyle::DoubleQuotes, "test", "table", state.context); + state.getColumns(0), IdentifierQuotingStyle::DoubleQuotes, + LiteralEscapingStyle::Regular, "test", "table", state.context); EXPECT_EQ(transformed_query, expected) << query; } @@ -180,7 +181,8 @@ static void checkNewAnalyzer( query_info.table_expression = findTableExpression(query_node->getJoinTree(), "table"); std::string transformed_query = transformQueryForExternalDatabase( - query_info, column_names, state.getColumns(0), IdentifierQuotingStyle::DoubleQuotes, "test", "table", state.context); + query_info, column_names, state.getColumns(0), IdentifierQuotingStyle::DoubleQuotes, + LiteralEscapingStyle::Regular, "test", "table", state.context); EXPECT_EQ(transformed_query, expected) << query; } diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index 375510e62bf..84a696a1e9c 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -258,6 +258,7 @@ String transformQueryForExternalDatabaseImpl( Names used_columns, const NamesAndTypesList & available_columns, IdentifierQuotingStyle identifier_quoting_style, + LiteralEscapingStyle literal_escaping_style, const String & database, const String & table, ContextPtr context) @@ -337,7 +338,8 @@ String transformQueryForExternalDatabaseImpl( IAST::FormatSettings settings( out, /*one_line*/ true, /*hilite*/ false, /*always_quote_identifiers*/ identifier_quoting_style != IdentifierQuotingStyle::None, - /*identifier_quoting_style*/ identifier_quoting_style); + /*identifier_quoting_style*/ identifier_quoting_style, /*show_secrets_*/ true, + /*literal_escaping_style*/ literal_escaping_style); select->format(settings); @@ -351,6 +353,7 @@ String transformQueryForExternalDatabase( const Names & column_names, const NamesAndTypesList & available_columns, IdentifierQuotingStyle identifier_quoting_style, + LiteralEscapingStyle literal_escaping_style, const String & database, const String & table, ContextPtr context) @@ -375,6 +378,7 @@ String transformQueryForExternalDatabase( column_names, available_columns, identifier_quoting_style, + literal_escaping_style, database, table, context); @@ -386,6 +390,7 @@ String transformQueryForExternalDatabase( query_info.syntax_analyzer_result->requiredSourceColumns(), available_columns, identifier_quoting_style, + literal_escaping_style, database, table, context); diff --git a/src/Storages/transformQueryForExternalDatabase.h b/src/Storages/transformQueryForExternalDatabase.h index 0f2b0a5822f..fb6af21907e 100644 --- a/src/Storages/transformQueryForExternalDatabase.h +++ b/src/Storages/transformQueryForExternalDatabase.h @@ -31,6 +31,7 @@ String transformQueryForExternalDatabase( const Names & column_names, const NamesAndTypesList & available_columns, IdentifierQuotingStyle identifier_quoting_style, + LiteralEscapingStyle literal_escaping_style, const String & database, const String & table, ContextPtr context); diff --git a/src/TableFunctions/CMakeLists.txt b/src/TableFunctions/CMakeLists.txt index c9e5c66fe4a..b02a0e79f9c 100644 --- a/src/TableFunctions/CMakeLists.txt +++ b/src/TableFunctions/CMakeLists.txt @@ -6,16 +6,18 @@ if (TARGET ch_contrib::hivemetastore) add_headers_and_sources(clickhouse_table_functions Hive) endif () -list(REMOVE_ITEM clickhouse_table_functions_sources +extract_into_parent_list(clickhouse_table_functions_sources dbms_sources ITableFunction.cpp TableFunctionView.cpp - TableFunctionFactory.cpp) -list(REMOVE_ITEM clickhouse_table_functions_headers + TableFunctionFactory.cpp +) +extract_into_parent_list(clickhouse_table_functions_headers dbms_headers ITableFunction.h TableFunctionView.h - TableFunctionFactory.h) + TableFunctionFactory.h +) -add_library(clickhouse_table_functions ${clickhouse_table_functions_sources}) +add_library(clickhouse_table_functions ${clickhouse_table_functions_headers} ${clickhouse_table_functions_sources}) target_link_libraries(clickhouse_table_functions PRIVATE clickhouse_parsers clickhouse_storages_system dbms) diff --git a/src/TableFunctions/Hive/TableFunctionHive.cpp b/src/TableFunctions/Hive/TableFunctionHive.cpp index fb7635181dc..ebebee13092 100644 --- a/src/TableFunctions/Hive/TableFunctionHive.cpp +++ b/src/TableFunctions/Hive/TableFunctionHive.cpp @@ -49,13 +49,14 @@ namespace DB actual_columns = parseColumnsListFromString(table_structure, context_); } - ColumnsDescription TableFunctionHive::getActualTableStructure(ContextPtr /*context_*/) const { return actual_columns; } + ColumnsDescription TableFunctionHive::getActualTableStructure(ContextPtr /*context_*/, bool /*is_insert_query*/) const { return actual_columns; } StoragePtr TableFunctionHive::executeImpl( const ASTPtr & /*ast_function_*/, ContextPtr context_, const std::string & table_name_, - ColumnsDescription /*cached_columns_*/) const + ColumnsDescription /*cached_columns_*/, + bool /*is_insert_query*/) const { const Settings & settings = context_->getSettings(); ParserExpression partition_by_parser; diff --git a/src/TableFunctions/Hive/TableFunctionHive.h b/src/TableFunctions/Hive/TableFunctionHive.h index ec09a87a876..5e48be46ce1 100644 --- a/src/TableFunctions/Hive/TableFunctionHive.h +++ b/src/TableFunctions/Hive/TableFunctionHive.h @@ -17,10 +17,10 @@ public: bool hasStaticStructure() const override { return true; } StoragePtr executeImpl( - const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return storage_type_name; } - ColumnsDescription getActualTableStructure(ContextPtr) const override; + ColumnsDescription getActualTableStructure(ContextPtr, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function_, ContextPtr context_) override; private: diff --git a/src/TableFunctions/ITableFunction.cpp b/src/TableFunctions/ITableFunction.cpp index df19e0ebad3..137e1dc27fe 100644 --- a/src/TableFunctions/ITableFunction.cpp +++ b/src/TableFunctions/ITableFunction.cpp @@ -34,15 +34,15 @@ StoragePtr ITableFunction::execute(const ASTPtr & ast_function, ContextPtr conte auto context_to_use = use_global_context ? context->getGlobalContext() : context; if (cached_columns.empty()) - return executeImpl(ast_function, context, table_name, std::move(cached_columns)); + return executeImpl(ast_function, context, table_name, std::move(cached_columns), is_insert_query); - if (hasStaticStructure() && cached_columns == getActualTableStructure(context)) - return executeImpl(ast_function, context_to_use, table_name, std::move(cached_columns)); + if (hasStaticStructure() && cached_columns == getActualTableStructure(context,is_insert_query)) + return executeImpl(ast_function, context_to_use, table_name, std::move(cached_columns), is_insert_query); auto this_table_function = shared_from_this(); auto get_storage = [=]() -> StoragePtr { - return this_table_function->executeImpl(ast_function, context_to_use, table_name, cached_columns); + return this_table_function->executeImpl(ast_function, context_to_use, table_name, cached_columns, is_insert_query); }; /// It will request actual table structure and create underlying storage lazily diff --git a/src/TableFunctions/ITableFunction.h b/src/TableFunctions/ITableFunction.h index fe71005cb9c..028bbb99c15 100644 --- a/src/TableFunctions/ITableFunction.h +++ b/src/TableFunctions/ITableFunction.h @@ -58,7 +58,7 @@ public: virtual void parseArguments(const ASTPtr & /*ast_function*/, ContextPtr /*context*/) {} /// Returns actual table structure probably requested from remote server, may fail - virtual ColumnsDescription getActualTableStructure(ContextPtr /*context*/) const = 0; + virtual ColumnsDescription getActualTableStructure(ContextPtr /*context*/, bool is_insert_query) const = 0; /// Check if table function needs a structure hint from SELECT query in case of /// INSERT INTO FUNCTION ... SELECT ... and INSERT INTO ... SELECT ... FROM table_function(...) @@ -89,7 +89,7 @@ protected: private: virtual StoragePtr executeImpl( - const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const = 0; + const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const = 0; virtual const char * getStorageTypeName() const = 0; }; diff --git a/src/TableFunctions/ITableFunctionCluster.h b/src/TableFunctions/ITableFunctionCluster.h index a8329684ee6..7e81d6d21b7 100644 --- a/src/TableFunctions/ITableFunctionCluster.h +++ b/src/TableFunctions/ITableFunctionCluster.h @@ -17,7 +17,7 @@ namespace DB namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int BAD_GET; + extern const int CLUSTER_DOESNT_EXIST; extern const int LOGICAL_ERROR; } @@ -59,7 +59,7 @@ protected: cluster_name = checkAndGetLiteralArgument(args[0], "cluster_name"); if (!context->tryGetCluster(cluster_name)) - throw Exception(ErrorCodes::BAD_GET, "Requested cluster '{}' not found", cluster_name); + throw Exception(ErrorCodes::CLUSTER_DOESNT_EXIST, "Requested cluster '{}' not found", cluster_name); /// Just cut the first arg (cluster_name) and try to parse other table function arguments as is args.erase(args.begin()); diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h index f87838cfb56..6d50e9138ff 100644 --- a/src/TableFunctions/ITableFunctionDataLake.h +++ b/src/TableFunctions/ITableFunctionDataLake.h @@ -26,7 +26,8 @@ protected: const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, - ColumnsDescription /*cached_columns*/) const override + ColumnsDescription /*cached_columns*/, + bool /*is_insert_query*/) const override { ColumnsDescription columns; if (TableFunction::configuration.structure != "auto") @@ -42,7 +43,7 @@ protected: const char * getStorageTypeName() const override { return Storage::name; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override + ColumnsDescription getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const override { if (TableFunction::configuration.structure == "auto") { diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp index a60ab70d570..487826dc363 100644 --- a/src/TableFunctions/ITableFunctionFileLike.cpp +++ b/src/TableFunctions/ITableFunctionFileLike.cpp @@ -110,7 +110,7 @@ void ITableFunctionFileLike::addColumnsStructureToArguments(ASTs & args, const S } } -StoragePtr ITableFunctionFileLike::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr ITableFunctionFileLike::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { ColumnsDescription columns; if (structure != "auto") diff --git a/src/TableFunctions/ITableFunctionFileLike.h b/src/TableFunctions/ITableFunctionFileLike.h index 8300cc27591..902c7b01c4d 100644 --- a/src/TableFunctions/ITableFunctionFileLike.h +++ b/src/TableFunctions/ITableFunctionFileLike.h @@ -42,13 +42,14 @@ protected: virtual String getFormatFromFirstArgument(); String filename; + String path_to_archive; String format = "auto"; String structure = "auto"; String compression_method = "auto"; ColumnsDescription structure_hint; private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; virtual StoragePtr getStorage( const String & source, const String & format, const ColumnsDescription & columns, ContextPtr global_context, diff --git a/src/TableFunctions/ITableFunctionXDBC.cpp b/src/TableFunctions/ITableFunctionXDBC.cpp index 1fb0f392e33..59702259b35 100644 --- a/src/TableFunctions/ITableFunctionXDBC.cpp +++ b/src/TableFunctions/ITableFunctionXDBC.cpp @@ -61,7 +61,7 @@ void ITableFunctionXDBC::startBridgeIfNot(ContextPtr context) const } } -ColumnsDescription ITableFunctionXDBC::getActualTableStructure(ContextPtr context) const +ColumnsDescription ITableFunctionXDBC::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { startBridgeIfNot(context); @@ -92,10 +92,10 @@ ColumnsDescription ITableFunctionXDBC::getActualTableStructure(ContextPtr contex return ColumnsDescription{columns}; } -StoragePtr ITableFunctionXDBC::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr ITableFunctionXDBC::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { startBridgeIfNot(context); - auto columns = getActualTableStructure(context); + auto columns = getActualTableStructure(context, is_insert_query); auto result = std::make_shared( StorageID(getDatabaseName(), table_name), schema_name, remote_table_name, columns, ConstraintsDescription{}, String{}, context, helper); result->startup(); diff --git a/src/TableFunctions/ITableFunctionXDBC.h b/src/TableFunctions/ITableFunctionXDBC.h index 984a6a1957f..da0fa83033b 100644 --- a/src/TableFunctions/ITableFunctionXDBC.h +++ b/src/TableFunctions/ITableFunctionXDBC.h @@ -16,7 +16,7 @@ namespace DB class ITableFunctionXDBC : public ITableFunction { private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; /* A factory method to create bridge helper, that will assist in remote interaction */ virtual BridgeHelperPtr createBridgeHelper(ContextPtr context, @@ -24,7 +24,7 @@ private: const std::string & connection_string_, bool use_connection_pooling_) const = 0; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp index d2a96173491..a4ece2f6a07 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp @@ -39,15 +39,13 @@ namespace bool isConnectionString(const std::string & candidate) { - return candidate.starts_with("DefaultEndpointsProtocol"); + return !candidate.starts_with("http"); } } -StorageAzureBlob::Configuration TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context, bool get_format_from_file) +void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context) { - StorageAzureBlob::Configuration configuration; - /// Supported signatures: /// /// AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]) @@ -59,87 +57,80 @@ StorageAzureBlob::Configuration TableFunctionAzureBlobStorage::parseArgumentsImp configuration.blobs_paths = {configuration.blob_path}; - if (configuration.format == "auto" && get_format_from_file) + if (configuration.format == "auto") configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true); - - return configuration; } - - if (engine_args.size() < 3 || engine_args.size() > 8) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage Azure requires 3 to 7 arguments: " - "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])"); - - for (auto & engine_arg : engine_args) - engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); - - std::unordered_map engine_args_to_idx; - - configuration.connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); - configuration.is_connection_string = isConnectionString(configuration.connection_url); - - configuration.container = checkAndGetLiteralArgument(engine_args[1], "container"); - configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); - - auto is_format_arg = [] (const std::string & s) -> bool + else { - return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); - }; + if (engine_args.size() < 3 || engine_args.size() > 8) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage Azure requires 3 to 7 arguments: " + "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])"); - if (engine_args.size() == 4) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name/structure"); - if (is_format_arg(fourth_arg)) + for (auto & engine_arg : engine_args) + engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); + + std::unordered_map engine_args_to_idx; + + configuration.connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); + configuration.is_connection_string = isConnectionString(configuration.connection_url); + + configuration.container = checkAndGetLiteralArgument(engine_args[1], "container"); + configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); + + auto is_format_arg + = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); }; + + if (engine_args.size() == 4) { - configuration.format = fourth_arg; + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name/structure"); + if (is_format_arg(fourth_arg)) + { + configuration.format = fourth_arg; + } + else + { + configuration.structure = fourth_arg; + } } - else + else if (engine_args.size() == 5) { - configuration.structure = fourth_arg; + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); + if (is_format_arg(fourth_arg)) + { + configuration.format = fourth_arg; + configuration.compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); + } + else + { + configuration.account_name = fourth_arg; + configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + } } - } - else if (engine_args.size() == 5) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) + else if (engine_args.size() == 6) { - configuration.format = fourth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); + if (is_format_arg(fourth_arg)) + { + configuration.format = fourth_arg; + configuration.compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); + configuration.structure = checkAndGetLiteralArgument(engine_args[5], "structure"); + } + else + { + configuration.account_name = fourth_arg; + configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name/structure"); + if (is_format_arg(sixth_arg)) + configuration.format = sixth_arg; + else + configuration.structure = sixth_arg; + } } - else - { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - } - } - else if (engine_args.size() == 6) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - configuration.format = fourth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); - configuration.structure = checkAndGetLiteralArgument(engine_args[5], "structure"); - } - else - { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); - if (!is_format_arg(sixth_arg)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); - configuration.format = sixth_arg; - } - } - else if (engine_args.size() == 7) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format, compression and structure must be last arguments"); - } - else + else if (engine_args.size() == 7) { + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); configuration.account_name = fourth_arg; configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); @@ -148,17 +139,9 @@ StorageAzureBlob::Configuration TableFunctionAzureBlobStorage::parseArgumentsImp configuration.format = sixth_arg; configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); } - } - else if (engine_args.size() == 8) - { - - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); - } - else + else if (engine_args.size() == 8) { + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); configuration.account_name = fourth_arg; configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); @@ -168,14 +151,12 @@ StorageAzureBlob::Configuration TableFunctionAzureBlobStorage::parseArgumentsImp configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); configuration.structure = checkAndGetLiteralArgument(engine_args[7], "structure"); } + + configuration.blobs_paths = {configuration.blob_path}; + + if (configuration.format == "auto") + configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true); } - - configuration.blobs_paths = {configuration.blob_path}; - - if (configuration.format == "auto" && get_format_from_file) - configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true); - - return configuration; } void TableFunctionAzureBlobStorage::parseArguments(const ASTPtr & ast_function, ContextPtr context) @@ -190,19 +171,99 @@ void TableFunctionAzureBlobStorage::parseArguments(const ASTPtr & ast_function, auto & args = args_func.at(0)->children; - configuration = parseArgumentsImpl(args, context); + parseArgumentsImpl(args, context); } -ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(ContextPtr context) const +void TableFunctionAzureBlobStorage::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context) +{ + if (tryGetNamedCollectionWithOverrides(args, context)) + { + /// In case of named collection, just add key-value pair "structure='...'" + /// at the end of arguments to override existed structure. + ASTs equal_func_args = {std::make_shared("structure"), std::make_shared(structure)}; + auto equal_func = makeASTFunction("equals", std::move(equal_func_args)); + args.push_back(equal_func); + } + else + { + if (args.size() < 3 || args.size() > 8) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage Azure requires 3 to 7 arguments: " + "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])"); + + auto structure_literal = std::make_shared(structure); + + auto is_format_arg + = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); }; + + + if (args.size() == 3) + { + /// Add format=auto & compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + else if (args.size() == 4) + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name/structure"); + if (is_format_arg(fourth_arg)) + { + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + else + { + args.back() = structure_literal; + } + } + else if (args.size() == 5) + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name"); + if (!is_format_arg(fourth_arg)) + { + /// Add format=auto & compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(std::make_shared("auto")); + } + args.push_back(structure_literal); + } + else if (args.size() == 6) + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name"); + if (!is_format_arg(fourth_arg)) + { + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + else + { + args.back() = structure_literal; + } + } + else if (args.size() == 7) + { + args.push_back(structure_literal); + } + else if (args.size() == 8) + { + args.back() = structure_literal; + } + } +} + +ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(ContextPtr context, bool is_insert_query) const { if (configuration.structure == "auto") { context->checkAccess(getSourceAccessType()); - auto client = StorageAzureBlob::createClient(configuration); + auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); auto settings = StorageAzureBlob::createSettings(context); auto object_storage = std::make_unique("AzureBlobStorageTableFunction", std::move(client), std::move(settings)); - return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context); + return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context, false); } return parseColumnsListFromString(configuration.structure, context); @@ -213,9 +274,9 @@ bool TableFunctionAzureBlobStorage::supportsReadingSubsetOfColumns() return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format); } -StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - auto client = StorageAzureBlob::createClient(configuration); + auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); auto settings = StorageAzureBlob::createSettings(context); ColumnsDescription columns; @@ -234,6 +295,7 @@ StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_funct String{}, /// No format_settings for table function Azure std::nullopt, + /* distributed_processing */ false, nullptr); storage->startup(); diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.h b/src/TableFunctions/TableFunctionAzureBlobStorage.h index 0ac3f9771c7..83c4449fd17 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.h +++ b/src/TableFunctions/TableFunctionAzureBlobStorage.h @@ -13,13 +13,23 @@ namespace DB class Context; -/* AzureBlob(source, [access_key_id, secret_access_key,] [format, structure, compression]) - creates a temporary storage for a file in AzureBlob. +/* AzureBlob(source, [access_key_id, secret_access_key,] [format, compression, structure]) - creates a temporary storage for a file in AzureBlob. */ class TableFunctionAzureBlobStorage : public ITableFunction { public: static constexpr auto name = "azureBlobStorage"; - static constexpr auto signature = "- connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]\n"; + + static constexpr auto signature = " - connection_string, container_name, blobpath\n" + " - connection_string, container_name, blobpath, structure \n" + " - connection_string, container_name, blobpath, format \n" + " - connection_string, container_name, blobpath, format, compression \n" + " - connection_string, container_name, blobpath, format, compression, structure \n" + " - storage_account_url, container_name, blobpath, account_name, account_key\n" + " - storage_account_url, container_name, blobpath, account_name, account_key, structure\n" + " - storage_account_url, container_name, blobpath, account_name, account_key, format\n" + " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression\n" + " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure\n"; static size_t getMaxNumberOfArguments() { return 8; } @@ -46,7 +56,9 @@ public: return {"_path", "_file"}; } - static StorageAzureBlob::Configuration parseArgumentsImpl(ASTs & args, const ContextPtr & context, bool get_format_from_file = true); + virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context); + + static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context); protected: @@ -54,11 +66,12 @@ protected: const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, - ColumnsDescription cached_columns) const override; + ColumnsDescription cached_columns, + bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Azure"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; mutable StorageAzureBlob::Configuration configuration; diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp new file mode 100644 index 00000000000..eee585967c2 --- /dev/null +++ b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp @@ -0,0 +1,85 @@ +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE + +#include +#include +#include +#include + +#include "registerTableFunctions.h" + +#include + + +namespace DB +{ + +StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl( + const ASTPtr & /*function*/, ContextPtr context, + const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const +{ + StoragePtr storage; + ColumnsDescription columns; + bool structure_argument_was_provided = configuration.structure != "auto"; + + if (structure_argument_was_provided) + { + columns = parseColumnsListFromString(configuration.structure, context); + } + else if (!structure_hint.empty()) + { + columns = structure_hint; + } + + auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); + auto settings = StorageAzureBlob::createSettings(context); + + if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) + { + /// On worker node this filename won't contains globs + storage = std::make_shared( + configuration, + std::make_unique(table_name, std::move(client), std::move(settings)), + context, + StorageID(getDatabaseName(), table_name), + columns, + ConstraintsDescription{}, + /* comment */String{}, + /* format_settings */std::nullopt, /// No format_settings + /* distributed_processing */ true, + /*partition_by_=*/nullptr); + } + else + { + storage = std::make_shared( + cluster_name, + configuration, + std::make_unique(table_name, std::move(client), std::move(settings)), + StorageID(getDatabaseName(), table_name), + columns, + ConstraintsDescription{}, + context, + structure_argument_was_provided); + } + + storage->startup(); + + return storage; +} + + +void registerTableFunctionAzureBlobStorageCluster(TableFunctionFactory & factory) +{ + factory.registerFunction( + {.documentation + = {.description=R"(The table function can be used to read the data stored on Azure Blob Storage in parallel for many nodes in a specified cluster.)", + .examples{{"azureBlobStorageCluster", "SELECT * FROM azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])", ""}}}, + .allow_readonly = false} + ); +} + + +} + +#endif diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.h b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.h new file mode 100644 index 00000000000..58f79328f63 --- /dev/null +++ b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.h @@ -0,0 +1,55 @@ +#pragma once + +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE + +#include +#include +#include +#include + + +namespace DB +{ + +class Context; + +/** + * azureBlobStorageCluster(cluster_name, source, [access_key_id, secret_access_key,] format, compression_method, structure) + * A table function, which allows to process many files from Azure Blob Storage on a specific cluster + * On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks + * in Azure Blob Storage file path and dispatch each file dynamically. + * On worker node it asks initiator about next task to process, processes it. + * This is repeated until the tasks are finished. + */ +class TableFunctionAzureBlobStorageCluster : public ITableFunctionCluster +{ +public: + static constexpr auto name = "azureBlobStorageCluster"; + static constexpr auto signature = " - cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]"; + + String getName() const override + { + return name; + } + + String getSignature() const override + { + return signature; + } + +protected: + StoragePtr executeImpl( + const ASTPtr & ast_function, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns, + bool is_insert_query) const override; + + const char * getStorageTypeName() const override { return "AzureBlobStorageCluster"; } +}; + +} + +#endif diff --git a/src/TableFunctions/TableFunctionDictionary.cpp b/src/TableFunctions/TableFunctionDictionary.cpp index 90db9550a72..f0060acb411 100644 --- a/src/TableFunctions/TableFunctionDictionary.cpp +++ b/src/TableFunctions/TableFunctionDictionary.cpp @@ -43,7 +43,7 @@ void TableFunctionDictionary::parseArguments(const ASTPtr & ast_function, Contex dictionary_name = checkAndGetLiteralArgument(args[0], "dictionary_name"); } -ColumnsDescription TableFunctionDictionary::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionDictionary::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { const ExternalDictionariesLoader & external_loader = context->getExternalDictionariesLoader(); std::string resolved_name = external_loader.resolveDictionaryName(dictionary_name, context->getCurrentDatabase()); @@ -76,10 +76,10 @@ ColumnsDescription TableFunctionDictionary::getActualTableStructure(ContextPtr c } StoragePtr TableFunctionDictionary::executeImpl( - const ASTPtr &, ContextPtr context, const std::string & table_name, ColumnsDescription) const + const ASTPtr &, ContextPtr context, const std::string & table_name, ColumnsDescription, bool is_insert_query) const { StorageID dict_id(getDatabaseName(), table_name); - auto dictionary_table_structure = getActualTableStructure(context); + auto dictionary_table_structure = getActualTableStructure(context, is_insert_query); auto result = std::make_shared( dict_id, dictionary_name, std::move(dictionary_table_structure), String{}, StorageDictionary::Location::Custom, context); diff --git a/src/TableFunctions/TableFunctionDictionary.h b/src/TableFunctions/TableFunctionDictionary.h index cc184a32a17..d0beb292fe1 100644 --- a/src/TableFunctions/TableFunctionDictionary.h +++ b/src/TableFunctions/TableFunctionDictionary.h @@ -18,9 +18,9 @@ public: void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Dictionary"; } diff --git a/src/TableFunctions/TableFunctionExecutable.cpp b/src/TableFunctions/TableFunctionExecutable.cpp index 66e7ce7592c..5a64a988156 100644 --- a/src/TableFunctions/TableFunctionExecutable.cpp +++ b/src/TableFunctions/TableFunctionExecutable.cpp @@ -120,12 +120,12 @@ void TableFunctionExecutable::parseArguments(const ASTPtr & ast_function, Contex } } -ColumnsDescription TableFunctionExecutable::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionExecutable::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { return parseColumnsListFromString(structure, context); } -StoragePtr TableFunctionExecutable::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionExecutable::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { auto storage_id = StorageID(getDatabaseName(), table_name); auto global_context = context->getGlobalContext(); @@ -135,7 +135,7 @@ StoragePtr TableFunctionExecutable::executeImpl(const ASTPtr & /*ast_function*/, if (settings_query != nullptr) settings.applyChanges(settings_query->as()->changes); - auto storage = std::make_shared(storage_id, format, settings, input_queries, getActualTableStructure(context), ConstraintsDescription{}); + auto storage = std::make_shared(storage_id, format, settings, input_queries, getActualTableStructure(context, is_insert_query), ConstraintsDescription{}); storage->startup(); return storage; } diff --git a/src/TableFunctions/TableFunctionExecutable.h b/src/TableFunctions/TableFunctionExecutable.h index 2d9f86e14e6..aa595312fe4 100644 --- a/src/TableFunctions/TableFunctionExecutable.h +++ b/src/TableFunctions/TableFunctionExecutable.h @@ -24,11 +24,11 @@ public: bool hasStaticStructure() const override { return true; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Executable"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; std::vector skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override; diff --git a/src/TableFunctions/TableFunctionExplain.cpp b/src/TableFunctions/TableFunctionExplain.cpp index aae93c8b596..f127979d92a 100644 --- a/src/TableFunctions/TableFunctionExplain.cpp +++ b/src/TableFunctions/TableFunctionExplain.cpp @@ -91,7 +91,7 @@ void TableFunctionExplain::parseArguments(const ASTPtr & ast_function, ContextPt query = std::move(explain_query); } -ColumnsDescription TableFunctionExplain::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionExplain::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { Block sample_block = getInterpreter(context).getSampleBlock(query->as()->getKind()); ColumnsDescription columns_description; @@ -123,7 +123,7 @@ static Block executeMonoBlock(QueryPipeline & pipeline) } StoragePtr TableFunctionExplain::executeImpl( - const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const + const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { /// To support settings inside explain subquery. auto mutable_context = Context::createCopy(context); @@ -132,7 +132,7 @@ StoragePtr TableFunctionExplain::executeImpl( Block block = executeMonoBlock(blockio.pipeline); StorageID storage_id(getDatabaseName(), table_name); - auto storage = std::make_shared(storage_id, getActualTableStructure(context), std::move(block)); + auto storage = std::make_shared(storage_id, getActualTableStructure(context, is_insert_query), std::move(block)); storage->startup(); return storage; } diff --git a/src/TableFunctions/TableFunctionExplain.h b/src/TableFunctions/TableFunctionExplain.h index 99d3e52ee68..2eb7e35d0b5 100644 --- a/src/TableFunctions/TableFunctionExplain.h +++ b/src/TableFunctions/TableFunctionExplain.h @@ -17,7 +17,7 @@ public: std::string getName() const override { return name; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Explain"; } @@ -25,7 +25,7 @@ private: void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; InterpreterExplainQuery getInterpreter(ContextPtr context) const; diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index 0e49f26db40..56a6839ddbb 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -25,6 +25,7 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr if (context->getApplicationType() != Context::ApplicationType::LOCAL) { ITableFunctionFileLike::parseFirstArguments(arg, context); + StorageFile::parseFileSource(std::move(filename), filename, path_to_archive); return; } @@ -39,6 +40,8 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr fd = STDOUT_FILENO; else if (filename == "stderr") fd = STDERR_FILENO; + else + StorageFile::parseFileSource(std::move(filename), filename, path_to_archive); } else if (type == Field::Types::Int64 || type == Field::Types::UInt64) { @@ -76,22 +79,31 @@ StoragePtr TableFunctionFile::getStorage(const String & source, ConstraintsDescription{}, String{}, global_context->getSettingsRef().rename_files_after_processing, + path_to_archive, }; + if (fd >= 0) return std::make_shared(fd, args); return std::make_shared(source, global_context->getUserFilesPath(), args); } -ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { if (structure == "auto") { if (fd >= 0) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Schema inference is not supported for table function '{}' with file descriptor", getName()); size_t total_bytes_to_read = 0; - Strings paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read); - return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context); + + Strings paths; + Strings paths_to_archives; + if (path_to_archive.empty()) + paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read); + else + paths_to_archives = StorageFile::getPathsList(path_to_archive, context->getUserFilesPath(), context, total_bytes_to_read); + + return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context, paths_to_archives); } diff --git a/src/TableFunctions/TableFunctionFile.h b/src/TableFunctions/TableFunctionFile.h index 797948cad03..439ae87b4ae 100644 --- a/src/TableFunctions/TableFunctionFile.h +++ b/src/TableFunctions/TableFunctionFile.h @@ -20,7 +20,7 @@ public: return name; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override { diff --git a/src/TableFunctions/TableFunctionFormat.cpp b/src/TableFunctions/TableFunctionFormat.cpp index 2a46f839bbe..eb7437a3943 100644 --- a/src/TableFunctions/TableFunctionFormat.cpp +++ b/src/TableFunctions/TableFunctionFormat.cpp @@ -52,7 +52,7 @@ void TableFunctionFormat::parseArguments(const ASTPtr & ast_function, ContextPtr structure = checkAndGetLiteralArgument(args[1], "structure"); } -ColumnsDescription TableFunctionFormat::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionFormat::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { if (structure == "auto") { @@ -98,9 +98,9 @@ Block TableFunctionFormat::parseData(ColumnsDescription columns, ContextPtr cont return concatenateBlocks(blocks); } -StoragePtr TableFunctionFormat::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionFormat::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - auto columns = getActualTableStructure(context); + auto columns = getActualTableStructure(context, is_insert_query); Block res_block = parseData(columns, context); auto res = std::make_shared(StorageID(getDatabaseName(), table_name), columns, res_block); res->startup(); diff --git a/src/TableFunctions/TableFunctionFormat.h b/src/TableFunctions/TableFunctionFormat.h index d64ab14cb64..e20e8b6ea4b 100644 --- a/src/TableFunctions/TableFunctionFormat.h +++ b/src/TableFunctions/TableFunctionFormat.h @@ -18,10 +18,10 @@ public: bool hasStaticStructure() const override { return false; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Values"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; Block parseData(ColumnsDescription columns, ContextPtr context) const; diff --git a/src/TableFunctions/TableFunctionGenerateRandom.cpp b/src/TableFunctions/TableFunctionGenerateRandom.cpp index 08059796660..c6a9154cc66 100644 --- a/src/TableFunctions/TableFunctionGenerateRandom.cpp +++ b/src/TableFunctions/TableFunctionGenerateRandom.cpp @@ -97,7 +97,7 @@ void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, Co } } -ColumnsDescription TableFunctionGenerateRandom::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionGenerateRandom::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { if (structure == "auto") { @@ -113,9 +113,9 @@ ColumnsDescription TableFunctionGenerateRandom::getActualTableStructure(ContextP return parseColumnsListFromString(structure, context); } -StoragePtr TableFunctionGenerateRandom::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionGenerateRandom::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - ColumnsDescription columns = getActualTableStructure(context); + ColumnsDescription columns = getActualTableStructure(context, is_insert_query); auto res = std::make_shared( StorageID(getDatabaseName(), table_name), columns, String{}, max_array_length, max_string_length, random_seed); res->startup(); diff --git a/src/TableFunctions/TableFunctionGenerateRandom.h b/src/TableFunctions/TableFunctionGenerateRandom.h index 584d65311f4..a5d11ce0af6 100644 --- a/src/TableFunctions/TableFunctionGenerateRandom.h +++ b/src/TableFunctions/TableFunctionGenerateRandom.h @@ -19,10 +19,10 @@ public: void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "GenerateRandom"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; String structure = "auto"; diff --git a/src/TableFunctions/TableFunctionHDFS.cpp b/src/TableFunctions/TableFunctionHDFS.cpp index 7aab55b48c9..a8e2108fda8 100644 --- a/src/TableFunctions/TableFunctionHDFS.cpp +++ b/src/TableFunctions/TableFunctionHDFS.cpp @@ -28,7 +28,7 @@ StoragePtr TableFunctionHDFS::getStorage( compression_method_); } -ColumnsDescription TableFunctionHDFS::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionHDFS::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { if (structure == "auto") { diff --git a/src/TableFunctions/TableFunctionHDFS.h b/src/TableFunctions/TableFunctionHDFS.h index eec0a05fe8d..a7eb5daa440 100644 --- a/src/TableFunctions/TableFunctionHDFS.h +++ b/src/TableFunctions/TableFunctionHDFS.h @@ -34,7 +34,7 @@ public: return signature; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override { diff --git a/src/TableFunctions/TableFunctionInput.cpp b/src/TableFunctions/TableFunctionInput.cpp index 4941241acae..658a55c6fc4 100644 --- a/src/TableFunctions/TableFunctionInput.cpp +++ b/src/TableFunctions/TableFunctionInput.cpp @@ -43,7 +43,7 @@ void TableFunctionInput::parseArguments(const ASTPtr & ast_function, ContextPtr structure = checkAndGetLiteralArgument(evaluateConstantExpressionOrIdentifierAsLiteral(args[0], context), "structure"); } -ColumnsDescription TableFunctionInput::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionInput::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { if (structure == "auto") { @@ -58,9 +58,9 @@ ColumnsDescription TableFunctionInput::getActualTableStructure(ContextPtr contex return parseColumnsListFromString(structure, context); } -StoragePtr TableFunctionInput::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionInput::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - auto storage = std::make_shared(StorageID(getDatabaseName(), table_name), getActualTableStructure(context)); + auto storage = std::make_shared(StorageID(getDatabaseName(), table_name), getActualTableStructure(context, is_insert_query)); storage->startup(); return storage; } diff --git a/src/TableFunctions/TableFunctionInput.h b/src/TableFunctions/TableFunctionInput.h index 8e7b34cb829..3164ce43eef 100644 --- a/src/TableFunctions/TableFunctionInput.h +++ b/src/TableFunctions/TableFunctionInput.h @@ -20,10 +20,10 @@ public: void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Input"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; String structure; diff --git a/src/TableFunctions/TableFunctionMeiliSearch.cpp b/src/TableFunctions/TableFunctionMeiliSearch.cpp index 41ae5bb9ee2..01840a80262 100644 --- a/src/TableFunctions/TableFunctionMeiliSearch.cpp +++ b/src/TableFunctions/TableFunctionMeiliSearch.cpp @@ -8,13 +8,13 @@ namespace DB { StoragePtr TableFunctionMeiliSearch::executeImpl( - const ASTPtr & /* ast_function */, ContextPtr /*context*/, const String & table_name, ColumnsDescription /*cached_columns*/) const + const ASTPtr & /* ast_function */, ContextPtr /*context*/, const String & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { return std::make_shared( StorageID(getDatabaseName(), table_name), configuration.value(), ColumnsDescription{}, ConstraintsDescription{}, String{}); } -ColumnsDescription TableFunctionMeiliSearch::getActualTableStructure(ContextPtr /* context */) const +ColumnsDescription TableFunctionMeiliSearch::getActualTableStructure(ContextPtr /* context */, bool /*is_insert_query*/) const { return StorageMeiliSearch::getTableStructureFromData(configuration.value()); } diff --git a/src/TableFunctions/TableFunctionMeiliSearch.h b/src/TableFunctions/TableFunctionMeiliSearch.h index 86be944ab12..a127809a9c5 100644 --- a/src/TableFunctions/TableFunctionMeiliSearch.h +++ b/src/TableFunctions/TableFunctionMeiliSearch.h @@ -13,11 +13,11 @@ public: private: StoragePtr executeImpl( - const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns) const override; + const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "meilisearch"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; std::optional configuration; diff --git a/src/TableFunctions/TableFunctionMerge.cpp b/src/TableFunctions/TableFunctionMerge.cpp index 586cee54085..599953a1add 100644 --- a/src/TableFunctions/TableFunctionMerge.cpp +++ b/src/TableFunctions/TableFunctionMerge.cpp @@ -118,7 +118,7 @@ const TableFunctionMerge::DBToTableSetMap & TableFunctionMerge::getSourceDatabas return *source_databases_and_tables; } -ColumnsDescription TableFunctionMerge::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionMerge::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { for (const auto & db_with_tables : getSourceDatabasesAndTables(context)) { @@ -134,11 +134,11 @@ ColumnsDescription TableFunctionMerge::getActualTableStructure(ContextPtr contex } -StoragePtr TableFunctionMerge::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionMerge::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { auto res = std::make_shared( StorageID(getDatabaseName(), table_name), - getActualTableStructure(context), + getActualTableStructure(context, is_insert_query), String{}, source_database_name_or_regexp, database_is_regexp, diff --git a/src/TableFunctions/TableFunctionMerge.h b/src/TableFunctions/TableFunctionMerge.h index 3439056deda..8cc5119978a 100644 --- a/src/TableFunctions/TableFunctionMerge.h +++ b/src/TableFunctions/TableFunctionMerge.h @@ -17,13 +17,13 @@ public: std::string getName() const override { return name; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Merge"; } using TableSet = std::set; using DBToTableSetMap = std::map; const DBToTableSetMap & getSourceDatabasesAndTables(ContextPtr context) const; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; std::vector skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; static TableSet getMatchedTablesWithAccess(const String & database_name, const String & table_regexp, const ContextPtr & context); diff --git a/src/TableFunctions/TableFunctionMongoDB.cpp b/src/TableFunctions/TableFunctionMongoDB.cpp index 31dd64f8254..5c7c1d98cdf 100644 --- a/src/TableFunctions/TableFunctionMongoDB.cpp +++ b/src/TableFunctions/TableFunctionMongoDB.cpp @@ -27,9 +27,9 @@ namespace ErrorCodes StoragePtr TableFunctionMongoDB::executeImpl(const ASTPtr & /*ast_function*/, - ContextPtr context, const String & table_name, ColumnsDescription /*cached_columns*/) const + ContextPtr context, const String & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - auto columns = getActualTableStructure(context); + auto columns = getActualTableStructure(context, is_insert_query); auto storage = std::make_shared( StorageID(configuration->database, table_name), configuration->host, @@ -46,7 +46,7 @@ StoragePtr TableFunctionMongoDB::executeImpl(const ASTPtr & /*ast_function*/, return storage; } -ColumnsDescription TableFunctionMongoDB::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionMongoDB::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { return parseColumnsListFromString(structure, context); } diff --git a/src/TableFunctions/TableFunctionMongoDB.h b/src/TableFunctions/TableFunctionMongoDB.h index b5033b2d654..c2c15cabe5a 100644 --- a/src/TableFunctions/TableFunctionMongoDB.h +++ b/src/TableFunctions/TableFunctionMongoDB.h @@ -17,11 +17,11 @@ public: private: StoragePtr executeImpl( const ASTPtr & ast_function, ContextPtr context, - const std::string & table_name, ColumnsDescription cached_columns) const override; + const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "MongoDB"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; std::optional configuration; diff --git a/src/TableFunctions/TableFunctionMySQL.cpp b/src/TableFunctions/TableFunctionMySQL.cpp index 1705dfcbfa5..03bd2264551 100644 --- a/src/TableFunctions/TableFunctionMySQL.cpp +++ b/src/TableFunctions/TableFunctionMySQL.cpp @@ -57,7 +57,7 @@ void TableFunctionMySQL::parseArguments(const ASTPtr & ast_function, ContextPtr pool.emplace(createMySQLPoolWithFailover(*configuration, mysql_settings)); } -ColumnsDescription TableFunctionMySQL::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionMySQL::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { return StorageMySQL::getTableStructureFromData(*pool, configuration->database, configuration->table, context); } @@ -66,7 +66,8 @@ StoragePtr TableFunctionMySQL::executeImpl( const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, - ColumnsDescription /*cached_columns*/) const + ColumnsDescription /*cached_columns*/, + bool /*is_insert_query*/) const { auto res = std::make_shared( StorageID(getDatabaseName(), table_name), diff --git a/src/TableFunctions/TableFunctionMySQL.h b/src/TableFunctions/TableFunctionMySQL.h index 5a230530bc4..04f619f5f4b 100644 --- a/src/TableFunctions/TableFunctionMySQL.h +++ b/src/TableFunctions/TableFunctionMySQL.h @@ -23,10 +23,10 @@ public: return name; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "MySQL"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; mutable std::optional pool; diff --git a/src/TableFunctions/TableFunctionNull.cpp b/src/TableFunctions/TableFunctionNull.cpp index d25b9e15aa7..57911e16d4b 100644 --- a/src/TableFunctions/TableFunctionNull.cpp +++ b/src/TableFunctions/TableFunctionNull.cpp @@ -32,14 +32,14 @@ void TableFunctionNull::parseArguments(const ASTPtr & ast_function, ContextPtr c structure = checkAndGetLiteralArgument(evaluateConstantExpressionOrIdentifierAsLiteral(arguments[0], context), "structure"); } -ColumnsDescription TableFunctionNull::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionNull::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { if (structure != "auto") return parseColumnsListFromString(structure, context); return default_structure; } -StoragePtr TableFunctionNull::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionNull::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { ColumnsDescription columns; if (structure != "auto") diff --git a/src/TableFunctions/TableFunctionNull.h b/src/TableFunctions/TableFunctionNull.h index 4fece9e6da9..e80552d4cff 100644 --- a/src/TableFunctions/TableFunctionNull.h +++ b/src/TableFunctions/TableFunctionNull.h @@ -23,11 +23,11 @@ public: void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Null"; } void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; String structure = "auto"; ColumnsDescription structure_hint; diff --git a/src/TableFunctions/TableFunctionNumbers.cpp b/src/TableFunctions/TableFunctionNumbers.cpp index ba7a4dc4b36..d6cf50bc7d6 100644 --- a/src/TableFunctions/TableFunctionNumbers.cpp +++ b/src/TableFunctions/TableFunctionNumbers.cpp @@ -23,14 +23,14 @@ namespace ErrorCodes template -ColumnsDescription TableFunctionNumbers::getActualTableStructure(ContextPtr /*context*/) const +ColumnsDescription TableFunctionNumbers::getActualTableStructure(ContextPtr /*context*/, bool /*is_insert_query*/) const { /// NOTE: https://bugs.llvm.org/show_bug.cgi?id=47418 return ColumnsDescription{{{"number", std::make_shared()}}}; } template -StoragePtr TableFunctionNumbers::executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionNumbers::executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { if (const auto * function = ast_function->as()) { diff --git a/src/TableFunctions/TableFunctionNumbers.h b/src/TableFunctions/TableFunctionNumbers.h index 0a2f3eb863e..e380f40f7b2 100644 --- a/src/TableFunctions/TableFunctionNumbers.h +++ b/src/TableFunctions/TableFunctionNumbers.h @@ -19,12 +19,12 @@ public: std::string getName() const override { return name; } bool hasStaticStructure() const override { return true; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "SystemNumbers"; } UInt64 evaluateArgument(ContextPtr context, ASTPtr & argument) const; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; }; diff --git a/src/TableFunctions/TableFunctionPostgreSQL.cpp b/src/TableFunctions/TableFunctionPostgreSQL.cpp index 2b17a1b2c2b..322e0df7c15 100644 --- a/src/TableFunctions/TableFunctionPostgreSQL.cpp +++ b/src/TableFunctions/TableFunctionPostgreSQL.cpp @@ -20,7 +20,7 @@ namespace ErrorCodes StoragePtr TableFunctionPostgreSQL::executeImpl(const ASTPtr & /*ast_function*/, - ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const + ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { auto result = std::make_shared( StorageID(getDatabaseName(), table_name), @@ -38,7 +38,7 @@ StoragePtr TableFunctionPostgreSQL::executeImpl(const ASTPtr & /*ast_function*/, } -ColumnsDescription TableFunctionPostgreSQL::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionPostgreSQL::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { return StoragePostgreSQL::getTableStructureFromData(connection_pool, configuration->table, configuration->schema, context); } diff --git a/src/TableFunctions/TableFunctionPostgreSQL.h b/src/TableFunctions/TableFunctionPostgreSQL.h index 9f10e1c180e..f7d77567dd4 100644 --- a/src/TableFunctions/TableFunctionPostgreSQL.h +++ b/src/TableFunctions/TableFunctionPostgreSQL.h @@ -20,11 +20,11 @@ public: private: StoragePtr executeImpl( const ASTPtr & ast_function, ContextPtr context, - const std::string & table_name, ColumnsDescription cached_columns) const override; + const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "PostgreSQL"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; postgres::PoolWithFailoverPtr connection_pool; diff --git a/src/TableFunctions/TableFunctionRedis.cpp b/src/TableFunctions/TableFunctionRedis.cpp index ec659ae61e0..0b7433845b4 100644 --- a/src/TableFunctions/TableFunctionRedis.cpp +++ b/src/TableFunctions/TableFunctionRedis.cpp @@ -25,9 +25,9 @@ namespace ErrorCodes } StoragePtr TableFunctionRedis::executeImpl( - const ASTPtr & /*ast_function*/, ContextPtr context, const String & table_name, ColumnsDescription /*cached_columns*/) const + const ASTPtr & /*ast_function*/, ContextPtr context, const String & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - auto columns = getActualTableStructure(context); + auto columns = getActualTableStructure(context, is_insert_query); StorageInMemoryMetadata metadata; metadata.setColumns(columns); @@ -39,7 +39,7 @@ StoragePtr TableFunctionRedis::executeImpl( return storage; } -ColumnsDescription TableFunctionRedis::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionRedis::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { return parseColumnsListFromString(structure, context); } diff --git a/src/TableFunctions/TableFunctionRedis.h b/src/TableFunctions/TableFunctionRedis.h index b985a89e3d7..a7fc0df0a15 100644 --- a/src/TableFunctions/TableFunctionRedis.h +++ b/src/TableFunctions/TableFunctionRedis.h @@ -19,11 +19,11 @@ public: private: StoragePtr executeImpl( const ASTPtr & ast_function, ContextPtr context, - const String & table_name, ColumnsDescription cached_columns) const override; + const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Redis"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; RedisConfiguration configuration; diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index e6d72ddf17b..1f42ce4ba30 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -37,7 +37,10 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr String cluster_name; String cluster_description; - String database, table, username = "default", password; + String database = "system"; + String table = "one"; /// The table containing one row is used by default for queries without explicit table specification. + String username = "default"; + String password; if (args_func.size() != 1) throw Exception(help_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); @@ -86,7 +89,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr else { /// Supported signatures: - /// + /// remote('addresses_expr') /// remote('addresses_expr', db.table) /// remote('addresses_expr', 'db', 'table') /// remote('addresses_expr', db.table, 'user') @@ -102,6 +105,8 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr /// /// remoteSecure() - same as remote() /// + /// cluster() + /// cluster('cluster_name') /// cluster('cluster_name', db.table) /// cluster('cluster_name', 'db', 'table') /// cluster('cluster_name', db.table, sharding_key) @@ -109,7 +114,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr /// /// clusterAllReplicas() - same as cluster() - if (args.size() < 2 || args.size() > max_args) + if ((!is_cluster_function && args.empty()) || args.size() > max_args) throw Exception(help_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); size_t arg_num = 0; @@ -128,8 +133,15 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr if (is_cluster_function) { - args[arg_num] = evaluateConstantExpressionOrIdentifierAsLiteral(args[arg_num], context); - cluster_name = checkAndGetLiteralArgument(args[arg_num], "cluster_name"); + if (!args.empty()) + { + args[arg_num] = evaluateConstantExpressionOrIdentifierAsLiteral(args[arg_num], context); + cluster_name = checkAndGetLiteralArgument(args[arg_num], "cluster_name"); + } + else + { + cluster_name = "default"; + } } else { @@ -141,44 +153,49 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr } ++arg_num; - const auto * function = args[arg_num]->as(); - if (function && TableFunctionFactory::instance().isTableFunctionName(function->name)) - { - remote_table_function_ptr = args[arg_num]; - ++arg_num; - } - else - { - args[arg_num] = evaluateConstantExpressionForDatabaseName(args[arg_num], context); - database = checkAndGetLiteralArgument(args[arg_num], "database"); - ++arg_num; - - auto qualified_name = QualifiedTableName::parseFromString(database); - if (qualified_name.database.empty()) + /// Names of database and table is not necessary. + if (arg_num < args.size()) + { + const auto * function = args[arg_num]->as(); + if (function && TableFunctionFactory::instance().isTableFunctionName(function->name)) { - if (arg_num >= args.size()) + remote_table_function_ptr = args[arg_num]; + ++arg_num; + } + else + { + args[arg_num] = evaluateConstantExpressionForDatabaseName(args[arg_num], context); + database = checkAndGetLiteralArgument(args[arg_num], "database"); + + ++arg_num; + + auto qualified_name = QualifiedTableName::parseFromString(database); + if (qualified_name.database.empty()) { - throw Exception(help_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + if (arg_num >= args.size()) + { + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table name was not found in function arguments. {}", static_cast(help_message)); + } + else + { + std::swap(qualified_name.database, qualified_name.table); + args[arg_num] = evaluateConstantExpressionOrIdentifierAsLiteral(args[arg_num], context); + qualified_name.table = checkAndGetLiteralArgument(args[arg_num], "table"); + ++arg_num; + } } - else + + database = std::move(qualified_name.database); + table = std::move(qualified_name.table); + + /// Cluster function may have sharding key for insert + if (is_cluster_function && arg_num < args.size()) { - std::swap(qualified_name.database, qualified_name.table); - args[arg_num] = evaluateConstantExpressionOrIdentifierAsLiteral(args[arg_num], context); - qualified_name.table = checkAndGetLiteralArgument(args[arg_num], "table"); + sharding_key = args[arg_num]; ++arg_num; } } - - database = std::move(qualified_name.database); - table = std::move(qualified_name.table); - - /// Cluster function may have sharding key for insert - if (is_cluster_function && arg_num < args.size()) - { - sharding_key = args[arg_num]; - ++arg_num; - } } /// Username and password parameters are prohibited in cluster version of the function @@ -203,15 +220,19 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr ++arg_num; } - if (arg_num < args.size() && !sharding_key) + if (arg_num < args.size()) { + if (sharding_key) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Arguments `user` and `password` should be string literals (in single quotes)"); sharding_key = args[arg_num]; ++arg_num; } } if (arg_num < args.size()) + { throw Exception(help_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + } } if (!cluster_name.empty()) @@ -276,12 +297,12 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr remote_table_id.table_name = table; } -StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const +StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const { /// StorageDistributed supports mismatching structure of remote table, so we can use outdated structure for CREATE ... AS remote(...) /// without additional conversion in StorageTableFunctionProxy if (cached_columns.empty()) - cached_columns = getActualTableStructure(context); + cached_columns = getActualTableStructure(context, is_insert_query); assert(cluster); StoragePtr res = remote_table_function_ptr @@ -318,7 +339,7 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & /*ast_function*/, Con return res; } -ColumnsDescription TableFunctionRemote::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionRemote::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { assert(cluster); return getStructureOfRemoteTable(*cluster, remote_table_id, context, remote_table_function_ptr); @@ -329,11 +350,13 @@ TableFunctionRemote::TableFunctionRemote(const std::string & name_, bool secure_ { is_cluster_function = (name == "cluster" || name == "clusterAllReplicas"); help_message = PreformattedMessage::create( - "Table function '{}' requires from 2 to {} parameters: " - ", , {}", + "Table function '{}' requires from {} to {} parameters: " + "{}", name, + is_cluster_function ? 0 : 1, is_cluster_function ? 4 : 6, - is_cluster_function ? " [, sharding_key]" : " [, username[, password], sharding_key]"); + is_cluster_function ? "[, , ] [, sharding_key]" + : " [, , ] [, username[, password], sharding_key]"); } void registerTableFunctionRemote(TableFunctionFactory & factory) diff --git a/src/TableFunctions/TableFunctionRemote.h b/src/TableFunctions/TableFunctionRemote.h index 32039d1e6a8..0f75bf2b854 100644 --- a/src/TableFunctions/TableFunctionRemote.h +++ b/src/TableFunctions/TableFunctionRemote.h @@ -22,13 +22,13 @@ public: std::string getName() const override { return name; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; bool needStructureConversion() const override { return false; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Distributed"; } void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 84bd93b0009..0ae3a19dc7f 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -313,7 +313,7 @@ void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String & } } -ColumnsDescription TableFunctionS3::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionS3::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { if (configuration.structure == "auto") { @@ -330,7 +330,7 @@ bool TableFunctionS3::supportsReadingSubsetOfColumns() return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format); } -StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { S3::URI s3_uri (configuration.url); diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h index d308f469236..ac932260792 100644 --- a/src/TableFunctions/TableFunctionS3.h +++ b/src/TableFunctions/TableFunctionS3.h @@ -64,11 +64,12 @@ protected: const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, - ColumnsDescription cached_columns) const override; + ColumnsDescription cached_columns, + bool is_insert_query) const override; const char * getStorageTypeName() const override { return "S3"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; mutable StorageS3::Configuration configuration; diff --git a/src/TableFunctions/TableFunctionS3Cluster.cpp b/src/TableFunctions/TableFunctionS3Cluster.cpp index 1d93132c411..ce96f7f580b 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.cpp +++ b/src/TableFunctions/TableFunctionS3Cluster.cpp @@ -17,7 +17,7 @@ namespace DB StoragePtr TableFunctionS3Cluster::executeImpl( const ASTPtr & /*function*/, ContextPtr context, - const std::string & table_name, ColumnsDescription /*cached_columns*/) const + const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { StoragePtr storage; ColumnsDescription columns; diff --git a/src/TableFunctions/TableFunctionS3Cluster.h b/src/TableFunctions/TableFunctionS3Cluster.h index 459ff144f02..4fe25079cf4 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.h +++ b/src/TableFunctions/TableFunctionS3Cluster.h @@ -52,7 +52,8 @@ protected: const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, - ColumnsDescription cached_columns) const override; + ColumnsDescription cached_columns, + bool is_insert_query) const override; const char * getStorageTypeName() const override { return "S3Cluster"; } }; diff --git a/src/TableFunctions/TableFunctionSQLite.cpp b/src/TableFunctions/TableFunctionSQLite.cpp index a9831363bd9..27e6fcf1fd1 100644 --- a/src/TableFunctions/TableFunctionSQLite.cpp +++ b/src/TableFunctions/TableFunctionSQLite.cpp @@ -29,7 +29,7 @@ namespace ErrorCodes StoragePtr TableFunctionSQLite::executeImpl(const ASTPtr & /*ast_function*/, - ContextPtr context, const String & table_name, ColumnsDescription /*cached_columns*/) const + ContextPtr context, const String & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { auto storage = std::make_shared(StorageID(getDatabaseName(), table_name), sqlite_db, @@ -42,7 +42,7 @@ StoragePtr TableFunctionSQLite::executeImpl(const ASTPtr & /*ast_function*/, } -ColumnsDescription TableFunctionSQLite::getActualTableStructure(ContextPtr /* context */) const +ColumnsDescription TableFunctionSQLite::getActualTableStructure(ContextPtr /* context */, bool /*is_insert_query*/) const { return StorageSQLite::getTableStructureFromData(sqlite_db, remote_table_name); } diff --git a/src/TableFunctions/TableFunctionSQLite.h b/src/TableFunctions/TableFunctionSQLite.h index fded5646b39..74318f058a9 100644 --- a/src/TableFunctions/TableFunctionSQLite.h +++ b/src/TableFunctions/TableFunctionSQLite.h @@ -18,11 +18,11 @@ public: private: StoragePtr executeImpl( const ASTPtr & ast_function, ContextPtr context, - const std::string & table_name, ColumnsDescription cached_columns) const override; + const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "SQLite"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; String database_path, remote_table_name; diff --git a/src/TableFunctions/TableFunctionURL.cpp b/src/TableFunctions/TableFunctionURL.cpp index 4ed204a2af3..8d5a023fc3b 100644 --- a/src/TableFunctions/TableFunctionURL.cpp +++ b/src/TableFunctions/TableFunctionURL.cpp @@ -118,7 +118,7 @@ StoragePtr TableFunctionURL::getStorage( configuration.http_method); } -ColumnsDescription TableFunctionURL::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionURL::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { if (structure == "auto") { diff --git a/src/TableFunctions/TableFunctionURL.h b/src/TableFunctions/TableFunctionURL.h index 021eb71df53..5e58a36dde9 100644 --- a/src/TableFunctions/TableFunctionURL.h +++ b/src/TableFunctions/TableFunctionURL.h @@ -32,7 +32,7 @@ public: return signature; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; static void addColumnsStructureToArguments(ASTs & args, const String & desired_structure, const ContextPtr & context); diff --git a/src/TableFunctions/TableFunctionURLCluster.cpp b/src/TableFunctions/TableFunctionURLCluster.cpp index c94943db758..a2949278155 100644 --- a/src/TableFunctions/TableFunctionURLCluster.cpp +++ b/src/TableFunctions/TableFunctionURLCluster.cpp @@ -38,7 +38,7 @@ StoragePtr TableFunctionURLCluster::getStorage( format, compression_method, StorageID(getDatabaseName(), table_name), - getActualTableStructure(context), + getActualTableStructure(context, /* is_insert_query */ true), ConstraintsDescription{}, configuration, structure != "auto"); diff --git a/src/TableFunctions/TableFunctionValues.cpp b/src/TableFunctions/TableFunctionValues.cpp index cf0e20c624c..42a19874704 100644 --- a/src/TableFunctions/TableFunctionValues.cpp +++ b/src/TableFunctions/TableFunctionValues.cpp @@ -119,14 +119,14 @@ void TableFunctionValues::parseArguments(const ASTPtr & ast_function, ContextPtr structure = ColumnsDescription(names_and_types); } -ColumnsDescription TableFunctionValues::getActualTableStructure(ContextPtr /*context*/) const +ColumnsDescription TableFunctionValues::getActualTableStructure(ContextPtr /*context*/, bool /*is_insert_query*/) const { return structure; } -StoragePtr TableFunctionValues::executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionValues::executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - auto columns = getActualTableStructure(context); + auto columns = getActualTableStructure(context, is_insert_query); Block sample_block; for (const auto & name_type : columns.getOrdinary()) diff --git a/src/TableFunctions/TableFunctionValues.h b/src/TableFunctions/TableFunctionValues.h index 61ce5158086..7c87bff835e 100644 --- a/src/TableFunctions/TableFunctionValues.h +++ b/src/TableFunctions/TableFunctionValues.h @@ -14,10 +14,10 @@ public: std::string getName() const override { return name; } bool hasStaticStructure() const override { return true; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Values"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; static DataTypes getTypesFromArgument(const ASTPtr & arg, ContextPtr context); diff --git a/src/TableFunctions/TableFunctionView.cpp b/src/TableFunctions/TableFunctionView.cpp index 6b50e7e0611..2a50fb2d006 100644 --- a/src/TableFunctions/TableFunctionView.cpp +++ b/src/TableFunctions/TableFunctionView.cpp @@ -41,7 +41,7 @@ void TableFunctionView::parseArguments(const ASTPtr & ast_function, ContextPtr / throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function '{}' requires a query argument.", getName()); } -ColumnsDescription TableFunctionView::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionView::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { assert(create.select); assert(create.children.size() == 1); @@ -58,9 +58,9 @@ ColumnsDescription TableFunctionView::getActualTableStructure(ContextPtr context } StoragePtr TableFunctionView::executeImpl( - const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const + const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - auto columns = getActualTableStructure(context); + auto columns = getActualTableStructure(context, is_insert_query); auto res = std::make_shared(StorageID(getDatabaseName(), table_name), create, columns, ""); res->startup(); return res; diff --git a/src/TableFunctions/TableFunctionView.h b/src/TableFunctions/TableFunctionView.h index bbf072655ed..c679a1f315d 100644 --- a/src/TableFunctions/TableFunctionView.h +++ b/src/TableFunctions/TableFunctionView.h @@ -21,7 +21,7 @@ public: const ASTSelectWithUnionQuery & getSelectQuery() const; private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "View"; } @@ -29,7 +29,7 @@ private: void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; ASTCreateQuery create; }; diff --git a/src/TableFunctions/TableFunctionViewIfPermitted.cpp b/src/TableFunctions/TableFunctionViewIfPermitted.cpp index 12762e24f7e..d7944df1b28 100644 --- a/src/TableFunctions/TableFunctionViewIfPermitted.cpp +++ b/src/TableFunctions/TableFunctionViewIfPermitted.cpp @@ -55,16 +55,16 @@ void TableFunctionViewIfPermitted::parseArguments(const ASTPtr & ast_function, C else_table_function = TableFunctionFactory::instance().get(else_ast, context); } -ColumnsDescription TableFunctionViewIfPermitted::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionViewIfPermitted::getActualTableStructure(ContextPtr context, bool is_insert_query) const { - return else_table_function->getActualTableStructure(context); + return else_table_function->getActualTableStructure(context, is_insert_query); } StoragePtr TableFunctionViewIfPermitted::executeImpl( - const ASTPtr & /* ast_function */, ContextPtr context, const std::string & table_name, ColumnsDescription /* cached_columns */) const + const ASTPtr & /* ast_function */, ContextPtr context, const std::string & table_name, ColumnsDescription /* cached_columns */, bool is_insert_query) const { StoragePtr storage; - auto columns = getActualTableStructure(context); + auto columns = getActualTableStructure(context, is_insert_query); if (isPermitted(context, columns)) { diff --git a/src/TableFunctions/TableFunctionViewIfPermitted.h b/src/TableFunctions/TableFunctionViewIfPermitted.h index 9fdb34f30ab..bee4e15bfa5 100644 --- a/src/TableFunctions/TableFunctionViewIfPermitted.h +++ b/src/TableFunctions/TableFunctionViewIfPermitted.h @@ -20,7 +20,7 @@ public: const ASTSelectWithUnionQuery & getSelectQuery() const; private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "ViewIfPermitted"; } @@ -28,7 +28,7 @@ private: void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; bool isPermitted(const ContextPtr & context, const ColumnsDescription & else_columns) const; diff --git a/src/TableFunctions/TableFunctionZeros.cpp b/src/TableFunctions/TableFunctionZeros.cpp index 3c487362e1f..eb93626590e 100644 --- a/src/TableFunctions/TableFunctionZeros.cpp +++ b/src/TableFunctions/TableFunctionZeros.cpp @@ -20,14 +20,14 @@ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; template -ColumnsDescription TableFunctionZeros::getActualTableStructure(ContextPtr /*context*/) const +ColumnsDescription TableFunctionZeros::getActualTableStructure(ContextPtr /*context*/, bool /*is_insert_query*/) const { /// NOTE: https://bugs.llvm.org/show_bug.cgi?id=47418 return ColumnsDescription{{{"zero", std::make_shared()}}}; } template -StoragePtr TableFunctionZeros::executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionZeros::executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { if (const auto * function = ast_function->as()) { diff --git a/src/TableFunctions/TableFunctionZeros.h b/src/TableFunctions/TableFunctionZeros.h index eef1577673e..07d523ee37c 100644 --- a/src/TableFunctions/TableFunctionZeros.h +++ b/src/TableFunctions/TableFunctionZeros.h @@ -19,12 +19,12 @@ public: std::string getName() const override { return name; } bool hasStaticStructure() const override { return true; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "SystemZeros"; } UInt64 evaluateArgument(ContextPtr context, ASTPtr & argument) const; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; }; diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index eb6e0372223..6dd29551f48 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -75,6 +75,7 @@ void registerTableFunctions() #if USE_AZURE_BLOB_STORAGE registerTableFunctionAzureBlobStorage(factory); + registerTableFunctionAzureBlobStorageCluster(factory); #endif diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index 758e193e88f..db212698e65 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -72,6 +72,7 @@ void registerTableFunctionExplain(TableFunctionFactory & factory); #if USE_AZURE_BLOB_STORAGE void registerTableFunctionAzureBlobStorage(TableFunctionFactory & factory); +void registerTableFunctionAzureBlobStorageCluster(TableFunctionFactory & factory); #endif void registerTableFunctions(); diff --git a/src/configure_config.cmake b/src/configure_config.cmake index 5529e2f2f39..9f038f10fcd 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -162,5 +162,8 @@ endif () if (TARGET ch_contrib::fiu) set(FIU_ENABLE 1) endif() +if (TARGET ch_contrib::libarchive) + set(USE_LIBARCHIVE 1) +endif() set(SOURCE_DIR ${CMAKE_SOURCE_DIR}) diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt index 02f70c8a6df..20ea31efa70 100644 --- a/tests/analyzer_integration_broken_tests.txt +++ b/tests/analyzer_integration_broken_tests.txt @@ -1,44 +1,15 @@ test_access_for_functions/test.py::test_access_rights_for_function -test_backward_compatibility/test_normalized_count_comparison.py::test_select_aggregate_alias_column test_concurrent_backups_s3/test.py::test_concurrent_backups test_distributed_ddl/test.py::test_default_database[configs] test_distributed_ddl/test.py::test_default_database[configs_secure] test_distributed_ddl/test.py::test_on_server_fail[configs] test_distributed_ddl/test.py::test_on_server_fail[configs_secure] test_distributed_insert_backward_compatibility/test.py::test_distributed_in_tuple -test_distributed_inter_server_secret/test.py::test_per_user_inline_settings_secure_cluster[default-] -test_distributed_inter_server_secret/test.py::test_per_user_inline_settings_secure_cluster[nopass-] -test_distributed_inter_server_secret/test.py::test_per_user_inline_settings_secure_cluster[pass-foo] -test_distributed_inter_server_secret/test.py::test_per_user_protocol_settings_secure_cluster[default-] -test_distributed_inter_server_secret/test.py::test_per_user_protocol_settings_secure_cluster[nopass-] -test_distributed_inter_server_secret/test.py::test_per_user_protocol_settings_secure_cluster[pass-foo] -test_distributed_inter_server_secret/test.py::test_user_insecure_cluster[default-] -test_distributed_inter_server_secret/test.py::test_user_insecure_cluster[nopass-] -test_distributed_inter_server_secret/test.py::test_user_insecure_cluster[pass-foo] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster[default-] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster[nopass-] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster[pass-foo] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster_from_backward[default-] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster_from_backward[nopass-] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster_from_backward[pass-foo] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster_with_backward[default-] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster_with_backward[nopass-] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster_with_backward[pass-foo] test_distributed_load_balancing/test.py::test_distributed_replica_max_ignored_errors test_distributed_load_balancing/test.py::test_load_balancing_default test_distributed_load_balancing/test.py::test_load_balancing_priority_round_robin[dist_priority] test_distributed_load_balancing/test.py::test_load_balancing_priority_round_robin[dist_priority_negative] test_distributed_load_balancing/test.py::test_load_balancing_round_robin -test_backward_compatibility/test.py::test_backward_compatability1 -test_backward_compatibility/test_aggregate_fixed_key.py::test_two_level_merge -test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_avg -test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact[1000] -test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact[500000] -test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact_variadic[1000] -test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact_variadic[500000] -test_backward_compatibility/test_ip_types_binary_compatibility.py::test_ip_types_binary_compatibility -test_backward_compatibility/test_select_aggregate_alias_column.py::test_select_aggregate_alias_column -test_backward_compatibility/test_short_strings_aggregation.py::test_backward_compatability test_mask_sensitive_info/test.py::test_encryption_functions test_merge_table_over_distributed/test.py::test_global_in test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_over_distributed @@ -87,7 +58,6 @@ test_row_policy/test.py::test_users_xml_is_readonly test_row_policy/test.py::test_with_prewhere test_row_policy/test.py::test_with_prewhere test_settings_constraints_distributed/test.py::test_select_clamps_settings -test_backward_compatibility/test_cte_distributed.py::test_cte_distributed test_compression_codec_read/test.py::test_default_codec_read test_dictionaries_update_and_reload/test.py::test_reload_after_fail_in_cache_dictionary test_distributed_type_object/test.py::test_distributed_type_object @@ -98,9 +68,6 @@ test_storage_postgresql/test.py::test_postgres_select_insert test_storage_rabbitmq/test.py::test_rabbitmq_materialized_view test_system_merges/test.py::test_mutation_simple[] test_system_merges/test.py::test_mutation_simple[replicated] -test_backward_compatibility/test_insert_profile_events.py::test_new_client_compatible -test_backward_compatibility/test_insert_profile_events.py::test_old_client_compatible -test_backward_compatibility/test_vertical_merges_from_compact_parts.py::test_vertical_merges_from_compact_parts test_disk_over_web_server/test.py::test_cache[node2] test_disk_over_web_server/test.py::test_incorrect_usage test_disk_over_web_server/test.py::test_replicated_database @@ -108,96 +75,23 @@ test_disk_over_web_server/test.py::test_unavailable_server test_disk_over_web_server/test.py::test_usage[node2] test_distributed_backward_compatability/test.py::test_distributed_in_tuple test_executable_table_function/test.py::test_executable_function_input_python -test_groupBitmapAnd_on_distributed/test_groupBitmapAndState_on_distributed_table.py::test_groupBitmapAndState_on_different_version_nodes -test_groupBitmapAnd_on_distributed/test_groupBitmapAndState_on_distributed_table.py::test_groupBitmapAndState_on_distributed_table test_settings_profile/test.py::test_show_profiles test_sql_user_defined_functions_on_cluster/test.py::test_sql_user_defined_functions_on_cluster -test_backward_compatibility/test_functions.py::test_aggregate_states -test_backward_compatibility/test_functions.py::test_string_functions -test_default_compression_codec/test.py::test_default_codec_for_compact_parts -test_default_compression_codec/test.py::test_default_codec_multiple -test_default_compression_codec/test.py::test_default_codec_single -test_default_compression_codec/test.py::test_default_codec_version_update test_postgresql_protocol/test.py::test_python_client -test_quota/test.py::test_add_remove_interval -test_quota/test.py::test_add_remove_quota -test_quota/test.py::test_consumption_of_show_clusters -test_quota/test.py::test_consumption_of_show_databases -test_quota/test.py::test_consumption_of_show_privileges -test_quota/test.py::test_consumption_of_show_processlist -test_quota/test.py::test_consumption_of_show_tables -test_quota/test.py::test_dcl_introspection -test_quota/test.py::test_dcl_management -test_quota/test.py::test_exceed_quota -test_quota/test.py::test_query_inserts -test_quota/test.py::test_quota_from_users_xml -test_quota/test.py::test_reload_users_xml_by_timer -test_quota/test.py::test_simpliest_quota -test_quota/test.py::test_tracking_quota -test_quota/test.py::test_users_xml_is_readonly -test_replicated_merge_tree_compatibility/test.py::test_replicated_merge_tree_defaults_compatibility -test_polymorphic_parts/test.py::test_different_part_types_on_replicas[polymorphic_table_wide-Wide] -test_old_versions/test.py::test_client_is_older_than_server -test_polymorphic_parts/test.py::test_polymorphic_parts_non_adaptive -test_old_versions/test.py::test_server_is_older_than_client -test_polymorphic_parts/test.py::test_compact_parts_only -test_polymorphic_parts/test.py::test_different_part_types_on_replicas[polymorphic_table_compact-Compact] -test_polymorphic_parts/test.py::test_polymorphic_parts_index -test_old_versions/test.py::test_distributed_query_initiator_is_older_than_shard -test_polymorphic_parts/test.py::test_polymorphic_parts_basics[first_node1-second_node1] -test_polymorphic_parts/test.py::test_polymorphic_parts_basics[first_node0-second_node0] -test_ttl_replicated/test.py::test_ttl_table[DELETE] -test_ttl_replicated/test.py::test_ttl_columns -test_ttl_replicated/test.py::test_ttl_compatibility[node_left2-node_right2-2] -test_ttl_replicated/test.py::test_ttl_table[] -test_version_update/test.py::test_aggregate_function_versioning_server_upgrade -test_version_update/test.py::test_aggregate_function_versioning_fetch_data_from_old_to_new_server -test_ttl_replicated/test.py::test_ttl_double_delete_rule_returns_error -test_ttl_replicated/test.py::test_ttl_alter_delete[test_ttl_alter_delete] -test_ttl_replicated/test.py::test_ttl_alter_delete[test_ttl_alter_delete_replicated] -test_ttl_replicated/test.py::test_ttl_compatibility[node_left0-node_right0-0] -test_version_update/test.py::test_modulo_partition_key_issue_23508 -test_ttl_replicated/test.py::test_ttl_many_columns -test_ttl_replicated/test.py::test_modify_column_ttl -test_ttl_replicated/test.py::test_merge_with_ttl_timeout -test_ttl_replicated/test.py::test_ttl_empty_parts -test_ttl_replicated/test.py::test_ttl_compatibility[node_left1-node_right1-1] -test_version_update/test.py::test_aggregate_function_versioning_persisting_metadata -test_version_update/test.py::test_aggregate_function_versioning_issue_16587 -test_ttl_replicated/test.py::test_modify_ttl test_mysql_database_engine/test.py::test_mysql_ddl_for_mysql_database test_profile_events_s3/test.py::test_profile_events -test_version_update_after_mutation/test.py::test_upgrade_while_mutation -test_version_update_after_mutation/test.py::test_mutate_and_upgrade -test_system_flush_logs/test.py::test_system_logs[system.text_log-0] test_user_defined_object_persistence/test.py::test_persistence test_settings_profile/test.py::test_show_profiles test_sql_user_defined_functions_on_cluster/test.py::test_sql_user_defined_functions_on_cluster test_select_access_rights/test_main.py::test_alias_columns test_select_access_rights/test_main.py::test_select_count test_select_access_rights/test_main.py::test_select_join -test_replicated_merge_tree_compatibility/test.py::test_replicated_merge_tree_defaults_compatibility test_postgresql_protocol/test.py::test_python_client -test_quota/test.py::test_add_remove_interval -test_quota/test.py::test_add_remove_quota -test_quota/test.py::test_consumption_of_show_clusters -test_quota/test.py::test_consumption_of_show_databases -test_quota/test.py::test_consumption_of_show_privileges -test_quota/test.py::test_consumption_of_show_processlist -test_quota/test.py::test_consumption_of_show_tables -test_quota/test.py::test_dcl_introspection -test_quota/test.py::test_dcl_management -test_quota/test.py::test_exceed_quota -test_quota/test.py::test_query_inserts -test_quota/test.py::test_quota_from_users_xml -test_quota/test.py::test_reload_users_xml_by_timer -test_quota/test.py::test_simpliest_quota -test_quota/test.py::test_tracking_quota -test_quota/test.py::test_users_xml_is_readonly test_replicating_constants/test.py::test_different_versions test_merge_tree_s3/test.py::test_heavy_insert_select_check_memory[node] +test_wrong_db_or_table_name/test.py::test_wrong_table_name test_drop_is_lock_free/test.py::test_query_is_lock_free[detach table] -test_backward_compatibility/test_data_skipping_indices.py::test_index -test_backward_compatibility/test_convert_ordinary.py::test_convert_ordinary_to_atomic -test_backward_compatibility/test_memory_bound_aggregation.py::test_backward_compatability test_odbc_interaction/test.py::test_postgres_insert +test_zookeeper_config/test.py::test_chroot_with_different_root +test_zookeeper_config/test.py::test_chroot_with_same_root +test_merge_tree_azure_blob_storage/test.py::test_table_manipulations diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index c8f2bb9f43d..15d46403da9 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -2,12 +2,10 @@ 00562_in_subquery_merge_tree 00593_union_all_assert_columns_removed 00673_subquery_prepared_set_performance -00700_decimal_compare 00717_merge_and_distributed 00725_memory_tracking 00754_distributed_optimize_skip_select_on_unused_shards 00754_distributed_optimize_skip_select_on_unused_shards_with_prewhere -00838_unique_index 00927_asof_joins 00940_order_by_read_in_order_query_plan 00945_bloom_filter_index @@ -89,9 +87,7 @@ 02382_join_and_filtering_set 02402_merge_engine_with_view 02404_memory_bound_merging -02421_decimal_in_precision_issue_41125 02426_orc_bug -02428_decimal_in_floating_point_literal 02428_parameterized_view 02458_use_structure_from_insertion_table 02479_race_condition_between_insert_and_droppin_mv diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index 514aaf7e2ac..a8c7f2c3a9c 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -8,7 +8,11 @@ import sys from github import Github from build_download_helper import get_build_name_for_check, read_build_urls -from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse +from clickhouse_helper import ( + ClickHouseHelper, + prepare_tests_results_for_clickhouse, + get_instance_type, +) from commit_status_helper import ( RerunHelper, format_description, @@ -17,7 +21,6 @@ from commit_status_helper import ( ) from docker_pull_helper import get_image_with_version from env_helper import ( - GITHUB_RUN_URL, REPORTS_PATH, TEMP_PATH, ) @@ -26,19 +29,37 @@ from pr_info import PRInfo from report import TestResult from s3_helper import S3Helper from stopwatch import Stopwatch +from upload_result_helper import upload_results IMAGE_NAME = "clickhouse/fuzzer" -def get_run_command(pr_number, sha, download_url, workspace_path, image): +def get_run_command( + check_start_time, check_name, pr_number, sha, download_url, workspace_path, image +): + instance_type = get_instance_type() + + envs = [ + "-e CLICKHOUSE_CI_LOGS_HOST", + "-e CLICKHOUSE_CI_LOGS_PASSWORD", + f"-e CHECK_START_TIME='{check_start_time}'", + f"-e CHECK_NAME='{check_name}'", + f"-e INSTANCE_TYPE='{instance_type}'", + f"-e PR_TO_TEST={pr_number}", + f"-e SHA_TO_TEST={sha}", + f"-e BINARY_URL_TO_DOWNLOAD='{download_url}'", + ] + + env_str = " ".join(envs) + return ( f"docker run " # For sysctl "--privileged " "--network=host " f"--volume={workspace_path}:/workspace " + f"{env_str} " "--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE " - f'-e PR_TO_TEST={pr_number} -e SHA_TO_TEST={sha} -e BINARY_URL_TO_DOWNLOAD="{download_url}" ' f"{image}" ) @@ -79,7 +100,7 @@ def main(): build_url = url break else: - raise Exception("Cannot binary clickhouse among build results") + raise Exception("Cannot find the clickhouse binary among build results") logging.info("Got build url %s", build_url) @@ -88,11 +109,19 @@ def main(): os.makedirs(workspace_path) run_command = get_run_command( - pr_info.number, pr_info.sha, build_url, workspace_path, docker_image + stopwatch.start_time_str, + check_name, + pr_info.number, + pr_info.sha, + build_url, + workspace_path, + docker_image, ) logging.info("Going to run %s", run_command) run_log_path = os.path.join(temp_path, "run.log") + main_log_path = os.path.join(workspace_path, "main.log") + with open(run_log_path, "w", encoding="utf-8") as log: with subprocess.Popen( run_command, shell=True, stderr=log, stdout=log @@ -105,20 +134,47 @@ def main(): subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + # Cleanup run log from the credentials of CI logs database. + # Note: a malicious user can still print them by splitting the value into parts. + # But we will be warned when a malicious user modifies CI script. + # Although they can also print them from inside tests. + # Nevertheless, the credentials of the CI logs have limited scope + # and does not provide access to sensitive info. + + ci_logs_host = os.getenv("CLICKHOUSE_CI_LOGS_HOST", "CLICKHOUSE_CI_LOGS_HOST") + ci_logs_password = os.getenv( + "CLICKHOUSE_CI_LOGS_PASSWORD", "CLICKHOUSE_CI_LOGS_PASSWORD" + ) + + if ci_logs_host not in ("CLICKHOUSE_CI_LOGS_HOST", ""): + subprocess.check_call( + f"sed -i -r -e 's!{ci_logs_host}!CLICKHOUSE_CI_LOGS_HOST!g; s!{ci_logs_password}!CLICKHOUSE_CI_LOGS_PASSWORD!g;' '{run_log_path}' '{main_log_path}'", + shell=True, + ) + check_name_lower = ( check_name.lower().replace("(", "").replace(")", "").replace(" ", "") ) s3_prefix = f"{pr_info.number}/{pr_info.sha}/fuzzer_{check_name_lower}/" paths = { "run.log": run_log_path, - "main.log": os.path.join(workspace_path, "main.log"), - "server.log.zst": os.path.join(workspace_path, "server.log.zst"), + "main.log": main_log_path, "fuzzer.log": os.path.join(workspace_path, "fuzzer.log"), "report.html": os.path.join(workspace_path, "report.html"), "core.zst": os.path.join(workspace_path, "core.zst"), "dmesg.log": os.path.join(workspace_path, "dmesg.log"), } + compressed_server_log_path = os.path.join(workspace_path, "server.log.zst") + if os.path.exists(compressed_server_log_path): + paths["server.log.zst"] = compressed_server_log_path + + # The script can fail before the invocation of `zstd`, but we are still interested in its log: + + not_compressed_server_log_path = os.path.join(workspace_path, "server.log") + if os.path.exists(not_compressed_server_log_path): + paths["server.log"] = not_compressed_server_log_path + s3_helper = S3Helper() for f in paths: try: @@ -127,10 +183,6 @@ def main(): logging.info("Exception uploading file %s text %s", f, ex) paths[f] = "" - report_url = GITHUB_RUN_URL - if paths["report.html"]: - report_url = paths["report.html"] - # Try to get status message saved by the fuzzer try: with open( @@ -152,6 +204,19 @@ def main(): if "fail" in status: test_result.status = "FAIL" + if paths["report.html"]: + report_url = paths["report.html"] + else: + report_url = upload_results( + s3_helper, + pr_info.number, + pr_info.sha, + [test_result], + [], + check_name, + [url for url in paths.values() if url], + ) + ch_helper = ClickHouseHelper() prepared_events = prepare_tests_results_for_clickhouse( diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index 9e44af5296c..3e853c0ce41 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -1,20 +1,16 @@ #!/usr/bin/env python3 +from pathlib import Path +from typing import List, Tuple import subprocess import logging import json import os import sys import time -from typing import List, Tuple from ci_config import CI_CONFIG, BuildConfig -from commit_status_helper import ( - NotSet, - get_commit_filtered_statuses, - get_commit, - post_commit_status, -) +from ccache_utils import CargoCache from docker_pull_helper import get_image_with_version from env_helper import ( GITHUB_JOB, @@ -24,20 +20,19 @@ from env_helper import ( S3_DOWNLOAD, TEMP_PATH, ) -from get_robot_token import get_best_robot_token -from github_helper import GitHub +from git_helper import Git, git_runner from pr_info import PRInfo from s3_helper import S3Helper from tee_popen import TeePopen from version_helper import ( ClickHouseVersion, - Git, get_version_from_repo, update_version_local, ) from clickhouse_helper import ( ClickHouseHelper, prepare_tests_results_for_clickhouse, + get_instance_type, ) from stopwatch import Stopwatch @@ -46,11 +41,11 @@ BUILD_LOG_NAME = "build_log.log" def _can_export_binaries(build_config: BuildConfig) -> bool: - if build_config["package_type"] != "deb": + if build_config.package_type != "deb": return False - if build_config["sanitizer"] != "": + if build_config.sanitizer != "": return True - if build_config["build_type"] != "": + if build_config.debug_build: return True return False @@ -58,34 +53,37 @@ def _can_export_binaries(build_config: BuildConfig) -> bool: def get_packager_cmd( build_config: BuildConfig, packager_path: str, - output_path: str, + output_path: Path, + cargo_cache_dir: Path, build_version: str, image_version: str, official: bool, ) -> str: - package_type = build_config["package_type"] - comp = build_config["compiler"] + package_type = build_config.package_type + comp = build_config.compiler cmake_flags = "-DENABLE_CLICKHOUSE_SELF_EXTRACTING=1" cmd = ( - f"cd {packager_path} && CMAKE_FLAGS='{cmake_flags}' ./packager --output-dir={output_path} " - f"--package-type={package_type} --compiler={comp}" + f"cd {packager_path} && CMAKE_FLAGS='{cmake_flags}' ./packager " + f"--output-dir={output_path} --package-type={package_type} --compiler={comp}" ) - if build_config["build_type"]: - cmd += f" --build-type={build_config['build_type']}" - if build_config["sanitizer"]: - cmd += f" --sanitizer={build_config['sanitizer']}" - if build_config["tidy"] == "enable": + if build_config.debug_build: + cmd += " --debug-build" + if build_config.sanitizer: + cmd += f" --sanitizer={build_config.sanitizer}" + if build_config.tidy: cmd += " --clang-tidy" cmd += " --cache=sccache" cmd += " --s3-rw-access" cmd += f" --s3-bucket={S3_BUILDS_BUCKET}" + cmd += f" --cargo-cache-dir={cargo_cache_dir}" - if "additional_pkgs" in build_config and build_config["additional_pkgs"]: + if build_config.additional_pkgs: cmd += " --additional-pkgs" cmd += f" --docker-image-version={image_version}" + cmd += " --with-profiler" cmd += f" --version={build_version}" if _can_export_binaries(build_config): @@ -98,13 +96,13 @@ def get_packager_cmd( def build_clickhouse( - packager_cmd: str, logs_path: str, build_output_path: str -) -> Tuple[str, bool]: - build_log_path = os.path.join(logs_path, BUILD_LOG_NAME) + packager_cmd: str, logs_path: Path, build_output_path: Path +) -> Tuple[Path, bool]: + build_log_path = logs_path / BUILD_LOG_NAME success = False with TeePopen(packager_cmd, build_log_path) as process: retcode = process.wait() - if os.path.exists(build_output_path): + if build_output_path.exists(): build_results = os.listdir(build_output_path) else: build_results = [] @@ -188,7 +186,7 @@ def create_json_artifact( result = { "log_url": log_url, "build_urls": build_urls, - "build_config": build_config, + "build_config": build_config.__dict__, "elapsed_seconds": elapsed, "status": success, "job_name": GITHUB_JOB, @@ -225,10 +223,10 @@ def upload_master_static_binaries( pr_info: PRInfo, build_config: BuildConfig, s3_helper: S3Helper, - build_output_path: str, + build_output_path: Path, ) -> None: """Upload binary artifacts to a static S3 links""" - static_binary_name = build_config.get("static_binary_name", False) + static_binary_name = build_config.static_binary_name if pr_info.number != 0: return elif not static_binary_name: @@ -237,49 +235,21 @@ def upload_master_static_binaries( return s3_path = "/".join((pr_info.base_ref, static_binary_name, "clickhouse")) - binary = os.path.join(build_output_path, "clickhouse") + binary = build_output_path / "clickhouse" url = s3_helper.upload_build_file_to_s3(binary, s3_path) print(f"::notice ::Binary static URL: {url}") -def mark_failed_reports_pending(build_name: str, pr_info: PRInfo) -> None: - try: - gh = GitHub(get_best_robot_token()) - commit = get_commit(gh, pr_info.sha) - statuses = get_commit_filtered_statuses(commit) - report_status = [ - name - for name, builds in CI_CONFIG["builds_report_config"].items() - if build_name in builds - ][0] - for status in statuses: - if status.context == report_status and status.state in ["failure", "error"]: - logging.info( - "Commit already have failed status for '%s', setting it to 'pending'", - report_status, - ) - post_commit_status( - commit, - "pending", - status.target_url or NotSet, - "Set to pending on rerun", - report_status, - pr_info, - ) - except: # we do not care about any exception here - logging.info("Failed to get or mark the reports status as pending, continue") - - def main(): logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() build_name = sys.argv[1] - build_config = CI_CONFIG["build_config"][build_name] + build_config = CI_CONFIG.build_config[build_name] - if not os.path.exists(TEMP_PATH): - os.makedirs(TEMP_PATH) + temp_path = Path(TEMP_PATH) + os.makedirs(temp_path, exist_ok=True) pr_info = PRInfo() @@ -300,17 +270,12 @@ def main(): # put them as github actions artifact (result) check_for_success_run(s3_helper, s3_path_prefix, build_name, build_config) - # If it's a latter running, we need to mark possible failed status - mark_failed_reports_pending(build_name, pr_info) - docker_image = get_image_with_version(IMAGES_PATH, IMAGE_NAME) image_version = docker_image.version logging.info("Got version from repo %s", version.string) official_flag = pr_info.number == 0 - if "official" in build_config: - official_flag = build_config["official"] version_type = "testing" if "release" in pr_info.labels or "release-lts" in pr_info.labels: @@ -323,14 +288,18 @@ def main(): logging.info("Build short name %s", build_name) - build_output_path = os.path.join(TEMP_PATH, build_name) - if not os.path.exists(build_output_path): - os.makedirs(build_output_path) + build_output_path = temp_path / build_name + os.makedirs(build_output_path, exist_ok=True) + cargo_cache = CargoCache( + temp_path / "cargo_cache" / "registry", temp_path, s3_helper + ) + cargo_cache.download() packager_cmd = get_packager_cmd( build_config, os.path.join(REPO_COPY, "docker/packager"), build_output_path, + cargo_cache.directory, version.string, image_version, official_flag, @@ -338,9 +307,8 @@ def main(): logging.info("Going to run packager with %s", packager_cmd) - logs_path = os.path.join(TEMP_PATH, "build_log") - if not os.path.exists(logs_path): - os.makedirs(logs_path) + logs_path = temp_path / "build_log" + os.makedirs(logs_path, exist_ok=True) start = time.time() log_path, success = build_clickhouse(packager_cmd, logs_path, build_output_path) @@ -349,6 +317,9 @@ def main(): f"sudo chown -R ubuntu:ubuntu {build_output_path}", shell=True ) logging.info("Build finished with %s, log path %s", success, log_path) + if success: + cargo_cache.upload() + if not success: # We check if docker works, because if it's down, it's infrastructure try: @@ -361,8 +332,8 @@ def main(): # FIXME performance performance_urls = [] - performance_path = os.path.join(build_output_path, "performance.tar.zst") - if os.path.exists(performance_path): + performance_path = build_output_path / "performance.tar.zst" + if performance_path.exists(): performance_urls.append( s3_helper.upload_build_file_to_s3(performance_path, s3_performance_path) ) @@ -385,9 +356,9 @@ def main(): print("::notice ::Build URLs: {}".format("\n".join(build_urls))) - if os.path.exists(log_path): + if log_path.exists(): log_url = s3_helper.upload_build_file_to_s3( - log_path, s3_path_prefix + "/" + os.path.basename(log_path) + log_path, s3_path_prefix + "/" + log_path.name ) logging.info("Log url %s", log_url) else: @@ -401,7 +372,79 @@ def main(): upload_master_static_binaries(pr_info, build_config, s3_helper, build_output_path) + # Upload profile data ch_helper = ClickHouseHelper() + + clickhouse_ci_logs_host = os.getenv("CLICKHOUSE_CI_LOGS_HOST", "") + if clickhouse_ci_logs_host: + instance_type = get_instance_type() + query = f"""INSERT INTO build_time_trace +( + pull_request_number, + commit_sha, + check_start_time, + check_name, + instance_type, + file, + library, + time, + pid, + tid, + ph, + ts, + dur, + cat, + name, + detail, + count, + avgMs, + args_name +) +SELECT {pr_info.number}, '{pr_info.sha}', '{stopwatch.start_time_str}', '{build_name}', '{instance_type}', * +FROM input(' + file String, + library String, + time DateTime64(6), + pid UInt32, + tid UInt32, + ph String, + ts UInt64, + dur UInt64, + cat String, + name String, + detail String, + count UInt64, + avgMs UInt64, + args_name String') +FORMAT JSONCompactEachRow""" + + auth = { + "X-ClickHouse-User": "ci", + "X-ClickHouse-Key": os.getenv("CLICKHOUSE_CI_LOGS_PASSWORD", ""), + } + url = f"https://{clickhouse_ci_logs_host}/" + profiles_dir = temp_path / "profiles_source" + os.makedirs(profiles_dir, exist_ok=True) + logging.info("Processing profile JSON files from {GIT_REPO_ROOT}/build_docker") + git_runner( + "./utils/prepare-time-trace/prepare-time-trace.sh " + f"build_docker {profiles_dir.absolute()}" + ) + profile_data_file = temp_path / "profile.json" + with open(profile_data_file, "wb") as profile_fd: + for profile_sourse in os.listdir(profiles_dir): + with open(profiles_dir / profile_sourse, "rb") as ps_fd: + profile_fd.write(ps_fd.read()) + + logging.info( + "::notice ::Log Uploading profile data, path: %s, size: %s, query: %s", + profile_data_file, + profile_data_file.stat().st_size, + query, + ) + ch_helper.insert_file(url, auth, query, profile_data_file) + + # Upload statistics to CI database prepared_events = prepare_tests_results_for_clickhouse( pr_info, [], diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py index 47c11ee0911..a6fda749494 100644 --- a/tests/ci/build_download_helper.py +++ b/tests/ci/build_download_helper.py @@ -16,6 +16,10 @@ from ci_config import CI_CONFIG DOWNLOAD_RETRIES_COUNT = 5 +class DownloadException(Exception): + pass + + def get_with_retries( url: str, retries: int = DOWNLOAD_RETRIES_COUNT, @@ -91,7 +95,7 @@ def get_gh_api( def get_build_name_for_check(check_name: str) -> str: - return CI_CONFIG["tests_config"][check_name]["required_build"] # type: ignore + return CI_CONFIG.test_configs[check_name].required_build def read_build_urls(build_name: str, reports_path: str) -> List[str]: @@ -149,7 +153,9 @@ def download_build_with_progress(url: str, path: Path) -> None: if os.path.exists(path): os.remove(path) else: - raise Exception(f"Cannot download dataset from {url}, all retries exceeded") + raise DownloadException( + f"Cannot download dataset from {url}, all retries exceeded" + ) if sys.stdout.isatty(): sys.stdout.write("\n") @@ -174,7 +180,7 @@ def download_builds_filter( print(urls) if not urls: - raise Exception("No build URLs found") + raise DownloadException("No build URLs found") download_builds(result_path, urls, filter_fn) diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index 1362f3c8934..a134cb19346 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -70,7 +70,7 @@ def get_failed_report( message = f"{job_name} failed" build_result = BuildResult( compiler="unknown", - build_type="unknown", + debug_build=False, sanitizer="unknown", status=message, elapsed_seconds=0, @@ -85,7 +85,7 @@ def process_report( build_config = build_report["build_config"] build_result = BuildResult( compiler=build_config["compiler"], - build_type=build_config["build_type"], + debug_build=build_config["debug_build"], sanitizer=build_config["sanitizer"], status="success" if build_report["status"] else "failure", elapsed_seconds=build_report["elapsed_seconds"], @@ -149,7 +149,7 @@ def main(): logging.info("Check is already finished according to github status, exiting") sys.exit(0) - builds_for_check = CI_CONFIG["builds_report_config"][build_check_name] + builds_for_check = CI_CONFIG.builds_report_config[build_check_name] required_builds = required_builds or len(builds_for_check) # Collect reports from json artifacts diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/app.py b/tests/ci/cancel_and_rerun_workflow_lambda/app.py index 250655ddeb2..8282fb7768a 100644 --- a/tests/ci/cancel_and_rerun_workflow_lambda/app.py +++ b/tests/ci/cancel_and_rerun_workflow_lambda/app.py @@ -323,7 +323,9 @@ def main(event): if action == "edited": print("PR is edited, check if the body is correct") - error, category = check_pr_description(pull_request["body"]) + error, _ = check_pr_description( + pull_request["body"], pull_request["base"]["repo"]["full_name"] + ) if error: print( f"The PR's body is wrong, is going to comment it. The error is: {error}" diff --git a/tests/ci/ccache_utils.py b/tests/ci/ccache_utils.py index e8522127132..e726bf80b92 100644 --- a/tests/ci/ccache_utils.py +++ b/tests/ci/ccache_utils.py @@ -1,71 +1,31 @@ #!/usr/bin/env python3 import logging -import time -import sys import os import shutil +from hashlib import md5 from pathlib import Path import requests # type: ignore +from build_download_helper import download_build_with_progress, DownloadException from compress_files import decompress_fast, compress_fast from env_helper import S3_DOWNLOAD, S3_BUILDS_BUCKET +from git_helper import git_runner from s3_helper import S3Helper DOWNLOAD_RETRIES_COUNT = 5 -def dowload_file_with_progress(url, path): - logging.info("Downloading from %s to temp path %s", url, path) - for i in range(DOWNLOAD_RETRIES_COUNT): - try: - with open(path, "wb") as f: - response = requests.get(url, stream=True) - response.raise_for_status() - total_length = response.headers.get("content-length") - if total_length is None or int(total_length) == 0: - logging.info( - "No content-length, will download file without progress" - ) - f.write(response.content) - else: - dl = 0 - total_length = int(total_length) - logging.info("Content length is %ld bytes", total_length) - for data in response.iter_content(chunk_size=4096): - dl += len(data) - f.write(data) - if sys.stdout.isatty(): - done = int(50 * dl / total_length) - percent = int(100 * float(dl) / total_length) - eq_str = "=" * done - space_str = " " * (50 - done) - sys.stdout.write(f"\r[{eq_str}{space_str}] {percent}%") - sys.stdout.flush() - break - except Exception as ex: - sys.stdout.write("\n") - time.sleep(3) - logging.info("Exception while downloading %s, retry %s", ex, i + 1) - if os.path.exists(path): - os.remove(path) - else: - raise Exception(f"Cannot download dataset from {url}, all retries exceeded") - - sys.stdout.write("\n") - logging.info("Downloading finished") - - def get_ccache_if_not_exists( - path_to_ccache_dir: str, + path_to_ccache_dir: Path, s3_helper: S3Helper, current_pr_number: int, - temp_path: str, + temp_path: Path, release_pr: int, ) -> int: """returns: number of PR for downloaded PR. -1 if ccache not found""" - ccache_name = os.path.basename(path_to_ccache_dir) + ccache_name = path_to_ccache_dir.name cache_found = False prs_to_check = [current_pr_number] # Release PR is either 0 or defined @@ -94,11 +54,11 @@ def get_ccache_if_not_exists( logging.info("Found ccache on path %s", obj) url = f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/{obj}" - compressed_cache = os.path.join(temp_path, os.path.basename(obj)) - dowload_file_with_progress(url, compressed_cache) + compressed_cache = temp_path / os.path.basename(obj) + download_build_with_progress(url, compressed_cache) - path_to_decompress = str(Path(path_to_ccache_dir).parent) - if not os.path.exists(path_to_decompress): + path_to_decompress = path_to_ccache_dir.parent + if not path_to_decompress.exists(): os.makedirs(path_to_decompress) if os.path.exists(path_to_ccache_dir): @@ -122,15 +82,77 @@ def get_ccache_if_not_exists( return ccache_pr -def upload_ccache(path_to_ccache_dir, s3_helper, current_pr_number, temp_path): +def upload_ccache( + path_to_ccache_dir: Path, + s3_helper: S3Helper, + current_pr_number: int, + temp_path: Path, +) -> None: logging.info("Uploading cache %s for pr %s", path_to_ccache_dir, current_pr_number) - ccache_name = os.path.basename(path_to_ccache_dir) - compressed_cache_path = os.path.join(temp_path, ccache_name + ".tar.zst") + ccache_name = path_to_ccache_dir.name + compressed_cache_path = temp_path / f"{ccache_name}.tar.zst" compress_fast(path_to_ccache_dir, compressed_cache_path) - s3_path = ( - str(current_pr_number) + "/ccaches/" + os.path.basename(compressed_cache_path) - ) + s3_path = f"{current_pr_number}/ccaches/{compressed_cache_path.name}" logging.info("Will upload %s to path %s", compressed_cache_path, s3_path) s3_helper.upload_build_file_to_s3(compressed_cache_path, s3_path) logging.info("Upload finished") + + +class CargoCache: + PREFIX = "ccache/cargo_cache" + + def __init__( + self, + directory: Path, + temp_path: Path, + s3_helper: S3Helper, + ): + self._cargo_lock_file = Path(git_runner.cwd) / "rust" / "Cargo.lock" + self.lock_hash = md5(self._cargo_lock_file.read_bytes()).hexdigest() + self.directory = directory + self.archive_name = f"Cargo_cache_{self.lock_hash}.tar.zst" + self.temp_path = temp_path + self.s3_helper = s3_helper + self._url = ( + f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/{self.PREFIX}/{self.archive_name}" + ) + self._force_upload_cache = False + + def download(self): + logging.info("Searching rust cache for Cargo.lock md5 %s", self.lock_hash) + compressed_cache = self.temp_path / self.archive_name + try: + download_build_with_progress(self._url, compressed_cache) + except DownloadException: + logging.warning("Unable downloading cargo cache, creating empty directory") + self.directory.mkdir(parents=True, exist_ok=True) + return + + # decompress the cache and check if the necessary directory is there + self.directory.parent.mkdir(parents=True, exist_ok=True) + decompress_fast(compressed_cache, self.directory.parent) + if not self.directory.exists(): + logging.warning( + "The cargo cache archive was successfully downloaded and " + "decompressed, but %s does not exitst. Creating empty one", + self.directory, + ) + logging.info("Cache for Cargo.lock md5 %s will be uploaded", self.lock_hash) + self.directory.mkdir(parents=True, exist_ok=True) + + def upload(self): + if not self._force_upload_cache: + cache_response = requests.head(self._url) + if cache_response.status_code == 200: + logging.info( + "Remote cargo cache %s already exist, won't reupload", self._url + ) + return + + logging.info("Compressing cargo cache") + archive_path = self.directory.parent / self.archive_name + compress_fast(self.directory, archive_path) + s3_path = f"{self.PREFIX}/{self.archive_name}" + logging.info("Uploading %s to S3 path %s", archive_path, s3_path) + self.s3_helper.upload_build_file_to_s3(archive_path, s3_path) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 517e40fd2d6..a2f3a3ab401 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -1,180 +1,189 @@ #!/usr/bin/env python3 +import logging + from dataclasses import dataclass -from typing import Callable, Dict, TypeVar +from typing import Callable, Dict, List, Literal -ConfValue = TypeVar("ConfValue", str, bool) -BuildConfig = Dict[str, ConfValue] -CI_CONFIG = { - "build_config": { - "package_release": { - "compiler": "clang-16", - "build_type": "", - "sanitizer": "", - "package_type": "deb", - "static_binary_name": "amd64", - "additional_pkgs": True, - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "package_aarch64": { - "compiler": "clang-16-aarch64", - "build_type": "", - "sanitizer": "", - "package_type": "deb", - "static_binary_name": "aarch64", - "additional_pkgs": True, - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "package_asan": { - "compiler": "clang-16", - "build_type": "", - "sanitizer": "address", - "package_type": "deb", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "package_ubsan": { - "compiler": "clang-16", - "build_type": "", - "sanitizer": "undefined", - "package_type": "deb", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "package_tsan": { - "compiler": "clang-16", - "build_type": "", - "sanitizer": "thread", - "package_type": "deb", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "package_msan": { - "compiler": "clang-16", - "build_type": "", - "sanitizer": "memory", - "package_type": "deb", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "package_debug": { - "compiler": "clang-16", - "build_type": "debug", - "sanitizer": "", - "package_type": "deb", - "tidy": "disable", - "with_coverage": False, - "comment": "Note: sparse checkout was used", - }, - "binary_release": { - "compiler": "clang-16", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "binary_tidy": { - "compiler": "clang-16", - "build_type": "debug", - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "debug-amd64", - "tidy": "enable", - "with_coverage": False, - "comment": "clang-tidy is used for static analysis", - }, - "binary_darwin": { - "compiler": "clang-16-darwin", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "macos", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "binary_aarch64": { - "compiler": "clang-16-aarch64", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "binary_aarch64_v80compat": { - "compiler": "clang-16-aarch64-v80compat", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "aarch64v80compat", - "tidy": "disable", - "with_coverage": False, - "comment": "For ARMv8.1 and older", - }, - "binary_freebsd": { - "compiler": "clang-16-freebsd", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "freebsd", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "binary_darwin_aarch64": { - "compiler": "clang-16-darwin-aarch64", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "macos-aarch64", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "binary_ppc64le": { - "compiler": "clang-16-ppc64le", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "powerpc64le", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "binary_amd64_compat": { - "compiler": "clang-16-amd64-compat", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "amd64compat", - "tidy": "disable", - "with_coverage": False, - "comment": "SSE2-only build", - }, - "binary_riscv64": { - "compiler": "clang-16-riscv64", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "riscv64", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, +@dataclass +class BuildConfig: + compiler: str + package_type: Literal["deb", "binary"] + additional_pkgs: bool = False + debug_build: bool = False + sanitizer: str = "" + tidy: bool = False + comment: str = "" + static_binary_name: str = "" + + +@dataclass +class TestConfig: + required_build: str + force_tests: bool = False + + +BuildConfigs = Dict[str, BuildConfig] +BuildsReportConfig = Dict[str, List[str]] +TestConfigs = Dict[str, TestConfig] + + +@dataclass +class CiConfig: + build_config: BuildConfigs + builds_report_config: BuildsReportConfig + test_configs: TestConfigs + + def validate(self) -> None: + errors = [] + # All build configs must belong to build_report_config + for build_name in self.build_config.keys(): + build_in_reports = False + for report_config in self.builds_report_config.values(): + if build_name in report_config: + build_in_reports = True + break + if not build_in_reports: + logging.error( + "Build name %s does not belong to build reports", build_name + ) + errors.append( + f"Build name {build_name} does not belong to build reports" + ) + # And otherwise + for build_report_name, build_names in self.builds_report_config.items(): + missed_names = [ + name for name in build_names if name not in self.build_config.keys() + ] + if missed_names: + logging.error( + "The following names of the build report '%s' " + "are missed in build_config: %s", + build_report_name, + missed_names, + ) + errors.append( + f"The following names of the build report '{build_report_name}' " + f"are missed in build_config: {missed_names}", + ) + # And finally, all of tests' requirements must be in the builds + for test_name, test_config in self.test_configs.items(): + if test_config.required_build not in self.build_config.keys(): + logging.error( + "The requierment '%s' for '%s' is not found in builds", + test_config, + test_name, + ) + errors.append( + f"The requierment '{test_config}' for " + f"'{test_name}' is not found in builds" + ) + + if errors: + raise KeyError("config contains errors", errors) + + +CI_CONFIG = CiConfig( + build_config={ + "package_release": BuildConfig( + compiler="clang-16", + package_type="deb", + static_binary_name="amd64", + additional_pkgs=True, + ), + "package_aarch64": BuildConfig( + compiler="clang-16-aarch64", + package_type="deb", + static_binary_name="aarch64", + additional_pkgs=True, + ), + "package_asan": BuildConfig( + compiler="clang-16", + sanitizer="address", + package_type="deb", + ), + "package_ubsan": BuildConfig( + compiler="clang-16", + sanitizer="undefined", + package_type="deb", + ), + "package_tsan": BuildConfig( + compiler="clang-16", + sanitizer="thread", + package_type="deb", + ), + "package_msan": BuildConfig( + compiler="clang-16", + sanitizer="memory", + package_type="deb", + ), + "package_debug": BuildConfig( + compiler="clang-16", + debug_build=True, + package_type="deb", + comment="Note: sparse checkout was used", + ), + "binary_release": BuildConfig( + compiler="clang-16", + package_type="binary", + ), + "binary_tidy": BuildConfig( + compiler="clang-16", + debug_build=True, + package_type="binary", + static_binary_name="debug-amd64", + tidy=True, + comment="clang-tidy is used for static analysis", + ), + "binary_darwin": BuildConfig( + compiler="clang-16-darwin", + package_type="binary", + static_binary_name="macos", + ), + "binary_aarch64": BuildConfig( + compiler="clang-16-aarch64", + package_type="binary", + ), + "binary_aarch64_v80compat": BuildConfig( + compiler="clang-16-aarch64-v80compat", + package_type="binary", + static_binary_name="aarch64v80compat", + comment="For ARMv8.1 and older", + ), + "binary_freebsd": BuildConfig( + compiler="clang-16-freebsd", + package_type="binary", + static_binary_name="freebsd", + ), + "binary_darwin_aarch64": BuildConfig( + compiler="clang-16-darwin-aarch64", + package_type="binary", + static_binary_name="macos-aarch64", + ), + "binary_ppc64le": BuildConfig( + compiler="clang-16-ppc64le", + package_type="binary", + static_binary_name="powerpc64le", + ), + "binary_amd64_compat": BuildConfig( + compiler="clang-16-amd64-compat", + package_type="binary", + static_binary_name="amd64compat", + comment="SSE2-only build", + ), + "binary_riscv64": BuildConfig( + compiler="clang-16-riscv64", + package_type="binary", + static_binary_name="riscv64", + ), + "binary_s390x": BuildConfig( + compiler="clang-16-s390x", + package_type="binary", + static_binary_name="s390x", + ), }, - "builds_report_config": { + builds_report_config={ "ClickHouse build check": [ "package_release", "package_aarch64", @@ -194,216 +203,84 @@ CI_CONFIG = { "binary_darwin_aarch64", "binary_ppc64le", "binary_riscv64", + "binary_s390x", "binary_amd64_compat", ], }, - "tests_config": { - # required_build - build name for artifacts - # force_tests - force success status for tests - "Install packages (amd64)": { - "required_build": "package_release", - }, - "Install packages (arm64)": { - "required_build": "package_aarch64", - }, - "Stateful tests (asan)": { - "required_build": "package_asan", - }, - "Stateful tests (tsan)": { - "required_build": "package_tsan", - }, - "Stateful tests (msan)": { - "required_build": "package_msan", - }, - "Stateful tests (ubsan)": { - "required_build": "package_ubsan", - }, - "Stateful tests (debug)": { - "required_build": "package_debug", - }, - "Stateful tests (release)": { - "required_build": "package_release", - }, - "Stateful tests (aarch64)": { - "required_build": "package_aarch64", - }, - "Stateful tests (release, DatabaseOrdinary)": { - "required_build": "package_release", - }, - "Stateful tests (release, DatabaseReplicated)": { - "required_build": "package_release", - }, + test_configs={ + "Install packages (amd64)": TestConfig("package_release"), + "Install packages (arm64)": TestConfig("package_aarch64"), + "Stateful tests (asan)": TestConfig("package_asan"), + "Stateful tests (tsan)": TestConfig("package_tsan"), + "Stateful tests (msan)": TestConfig("package_msan"), + "Stateful tests (ubsan)": TestConfig("package_ubsan"), + "Stateful tests (debug)": TestConfig("package_debug"), + "Stateful tests (release)": TestConfig("package_release"), + "Stateful tests (aarch64)": TestConfig("package_aarch64"), + "Stateful tests (release, DatabaseOrdinary)": TestConfig("package_release"), + "Stateful tests (release, DatabaseReplicated)": TestConfig("package_release"), # Stateful tests for parallel replicas - "Stateful tests (release, ParallelReplicas)": { - "required_build": "package_release", - }, - "Stateful tests (debug, ParallelReplicas)": { - "required_build": "package_debug", - }, - "Stateful tests (asan, ParallelReplicas)": { - "required_build": "package_asan", - }, - "Stateful tests (msan, ParallelReplicas)": { - "required_build": "package_msan", - }, - "Stateful tests (ubsan, ParallelReplicas)": { - "required_build": "package_ubsan", - }, - "Stateful tests (tsan, ParallelReplicas)": { - "required_build": "package_tsan", - }, + "Stateful tests (release, ParallelReplicas)": TestConfig("package_release"), + "Stateful tests (debug, ParallelReplicas)": TestConfig("package_debug"), + "Stateful tests (asan, ParallelReplicas)": TestConfig("package_asan"), + "Stateful tests (msan, ParallelReplicas)": TestConfig("package_msan"), + "Stateful tests (ubsan, ParallelReplicas)": TestConfig("package_ubsan"), + "Stateful tests (tsan, ParallelReplicas)": TestConfig("package_tsan"), # End stateful tests for parallel replicas - "Stateless tests (asan)": { - "required_build": "package_asan", - }, - "Stateless tests (tsan)": { - "required_build": "package_tsan", - }, - "Stateless tests (msan)": { - "required_build": "package_msan", - }, - "Stateless tests (ubsan)": { - "required_build": "package_ubsan", - }, - "Stateless tests (debug)": { - "required_build": "package_debug", - }, - "Stateless tests (release)": { - "required_build": "package_release", - }, - "Stateless tests (aarch64)": { - "required_build": "package_aarch64", - }, - "Stateless tests (release, wide parts enabled)": { - "required_build": "package_release", - }, - "Stateless tests (release, analyzer)": { - "required_build": "package_release", - }, - "Stateless tests (release, DatabaseOrdinary)": { - "required_build": "package_release", - }, - "Stateless tests (release, DatabaseReplicated)": { - "required_build": "package_release", - }, - "Stateless tests (release, s3 storage)": { - "required_build": "package_release", - }, - "Stateless tests (debug, s3 storage)": { - "required_build": "package_debug", - }, - "Stateless tests (tsan, s3 storage)": { - "required_build": "package_tsan", - }, - "Stress test (asan)": { - "required_build": "package_asan", - }, - "Stress test (tsan)": { - "required_build": "package_tsan", - }, - "Stress test (ubsan)": { - "required_build": "package_ubsan", - }, - "Stress test (msan)": { - "required_build": "package_msan", - }, - "Stress test (debug)": { - "required_build": "package_debug", - }, - "Upgrade check (asan)": { - "required_build": "package_asan", - }, - "Upgrade check (tsan)": { - "required_build": "package_tsan", - }, - "Upgrade check (msan)": { - "required_build": "package_msan", - }, - "Upgrade check (debug)": { - "required_build": "package_debug", - }, - "Integration tests (asan)": { - "required_build": "package_asan", - }, - "Integration tests (asan, analyzer)": { - "required_build": "package_asan", - }, - "Integration tests (tsan)": { - "required_build": "package_tsan", - }, - "Integration tests (release)": { - "required_build": "package_release", - }, - "Integration tests (msan)": { - "required_build": "package_msan", - }, - "Integration tests flaky check (asan)": { - "required_build": "package_asan", - }, - "Compatibility check (amd64)": { - "required_build": "package_release", - }, - "Compatibility check (aarch64)": { - "required_build": "package_aarch64", - }, - "Unit tests (release)": { - "required_build": "binary_release", - }, - "Unit tests (asan)": { - "required_build": "package_asan", - }, - "Unit tests (msan)": { - "required_build": "package_msan", - }, - "Unit tests (tsan)": { - "required_build": "package_tsan", - }, - "Unit tests (ubsan)": { - "required_build": "package_ubsan", - }, - "AST fuzzer (debug)": { - "required_build": "package_debug", - }, - "AST fuzzer (asan)": { - "required_build": "package_asan", - }, - "AST fuzzer (msan)": { - "required_build": "package_msan", - }, - "AST fuzzer (tsan)": { - "required_build": "package_tsan", - }, - "AST fuzzer (ubsan)": { - "required_build": "package_ubsan", - }, - "Stateless tests flaky check (asan)": { - "required_build": "package_asan", - }, - "ClickHouse Keeper Jepsen": { - "required_build": "binary_release", - }, - "ClickHouse Server Jepsen": { - "required_build": "binary_release", - }, - "Performance Comparison": { - "required_build": "package_release", - "test_grep_exclude_filter": "", - }, - "Performance Comparison Aarch64": { - "required_build": "package_aarch64", - "test_grep_exclude_filter": "", - }, - "SQLancer (release)": { - "required_build": "package_release", - }, - "SQLancer (debug)": { - "required_build": "package_debug", - }, - "Sqllogic test (release)": { - "required_build": "package_release", - }, + "Stateless tests (asan)": TestConfig("package_asan"), + "Stateless tests (tsan)": TestConfig("package_tsan"), + "Stateless tests (msan)": TestConfig("package_msan"), + "Stateless tests (ubsan)": TestConfig("package_ubsan"), + "Stateless tests (debug)": TestConfig("package_debug"), + "Stateless tests (release)": TestConfig("package_release"), + "Stateless tests (aarch64)": TestConfig("package_aarch64"), + "Stateless tests (release, wide parts enabled)": TestConfig("package_release"), + "Stateless tests (release, analyzer)": TestConfig("package_release"), + "Stateless tests (release, DatabaseOrdinary)": TestConfig("package_release"), + "Stateless tests (release, DatabaseReplicated)": TestConfig("package_release"), + "Stateless tests (release, s3 storage)": TestConfig("package_release"), + "Stateless tests (debug, s3 storage)": TestConfig("package_debug"), + "Stateless tests (tsan, s3 storage)": TestConfig("package_tsan"), + "Stress test (asan)": TestConfig("package_asan"), + "Stress test (tsan)": TestConfig("package_tsan"), + "Stress test (ubsan)": TestConfig("package_ubsan"), + "Stress test (msan)": TestConfig("package_msan"), + "Stress test (debug)": TestConfig("package_debug"), + "Upgrade check (asan)": TestConfig("package_asan"), + "Upgrade check (tsan)": TestConfig("package_tsan"), + "Upgrade check (msan)": TestConfig("package_msan"), + "Upgrade check (debug)": TestConfig("package_debug"), + "Integration tests (asan)": TestConfig("package_asan"), + "Integration tests (asan, analyzer)": TestConfig("package_asan"), + "Integration tests (tsan)": TestConfig("package_tsan"), + "Integration tests (release)": TestConfig("package_release"), + "Integration tests (msan)": TestConfig("package_msan"), + "Integration tests flaky check (asan)": TestConfig("package_asan"), + "Compatibility check (amd64)": TestConfig("package_release"), + "Compatibility check (aarch64)": TestConfig("package_aarch64"), + "Unit tests (release)": TestConfig("binary_release"), + "Unit tests (asan)": TestConfig("package_asan"), + "Unit tests (msan)": TestConfig("package_msan"), + "Unit tests (tsan)": TestConfig("package_tsan"), + "Unit tests (ubsan)": TestConfig("package_ubsan"), + "AST fuzzer (debug)": TestConfig("package_debug"), + "AST fuzzer (asan)": TestConfig("package_asan"), + "AST fuzzer (msan)": TestConfig("package_msan"), + "AST fuzzer (tsan)": TestConfig("package_tsan"), + "AST fuzzer (ubsan)": TestConfig("package_ubsan"), + "Stateless tests flaky check (asan)": TestConfig("package_asan"), + "ClickHouse Keeper Jepsen": TestConfig("binary_release"), + "ClickHouse Server Jepsen": TestConfig("binary_release"), + "Performance Comparison": TestConfig("package_release"), + "Performance Comparison Aarch64": TestConfig("package_aarch64"), + "SQLancer (release)": TestConfig("package_release"), + "SQLancer (debug)": TestConfig("package_debug"), + "Sqllogic test (release)": TestConfig("package_release"), + "SQLTest": TestConfig("package_release"), }, -} # type: dict +) +CI_CONFIG.validate() + # checks required by Mergeable Check REQUIRED_CHECKS = [ @@ -413,99 +290,6 @@ REQUIRED_CHECKS = [ "Fast test", "Stateful tests (release)", "Stateless tests (release)", - "Stateless tests (debug) [1/5]", - "Stateless tests (debug) [2/5]", - "Stateless tests (debug) [3/5]", - "Stateless tests (debug) [4/5]", - "Stateless tests (debug) [5/5]", - "AST fuzzer (asan)", - "AST fuzzer (msan)", - "AST fuzzer (tsan)", - "AST fuzzer (ubsan)", - "AST fuzzer (debug)", - "Compatibility check (aarch64)", - "Compatibility check (amd64)", - "Install packages (amd64)", - "Install packages (arm64)", - "Integration tests (asan) [1/6]", - "Integration tests (asan) [2/6]", - "Integration tests (asan) [3/6]", - "Integration tests (asan) [4/6]", - "Integration tests (asan) [5/6]", - "Integration tests (asan) [6/6]", - "Integration tests (release) [1/4]", - "Integration tests (release) [2/4]", - "Integration tests (release) [3/4]", - "Integration tests (release) [4/4]", - "Integration tests (tsan) [1/6]", - "Integration tests (tsan) [2/6]", - "Integration tests (tsan) [3/6]", - "Integration tests (tsan) [4/6]", - "Integration tests (tsan) [5/6]", - "Integration tests (tsan) [6/6]", - "Integration tests flaky check (asan)", - "Stateful tests (aarch64)", - "Stateful tests (asan)", - "Stateful tests (asan, ParallelReplicas)", - "Stateful tests (debug)", - "Stateful tests (debug, ParallelReplicas)", - "Stateful tests (msan)", - "Stateful tests (msan, ParallelReplicas)", - "Stateful tests (release, ParallelReplicas)", - "Stateful tests (tsan)", - "Stateful tests (tsan, ParallelReplicas)", - "Stateful tests (ubsan)", - "Stateful tests (ubsan, ParallelReplicas)", - "Stateless tests (aarch64)", - "Stateless tests (asan) [1/4]", - "Stateless tests (asan) [2/4]", - "Stateless tests (asan) [3/4]", - "Stateless tests (asan) [4/4]", - "Stateless tests (debug) [1/5]", - "Stateless tests (debug) [2/5]", - "Stateless tests (debug) [3/5]", - "Stateless tests (debug) [4/5]", - "Stateless tests (debug) [5/5]", - "Stateless tests (debug, s3 storage) [1/6]", - "Stateless tests (debug, s3 storage) [2/6]", - "Stateless tests (debug, s3 storage) [3/6]", - "Stateless tests (debug, s3 storage) [4/6]", - "Stateless tests (debug, s3 storage) [5/6]", - "Stateless tests (debug, s3 storage) [6/6]", - "Stateless tests (msan) [1/6]", - "Stateless tests (msan) [2/6]", - "Stateless tests (msan) [3/6]", - "Stateless tests (msan) [4/6]", - "Stateless tests (msan) [5/6]", - "Stateless tests (msan) [6/6]", - "Stateless tests (release, DatabaseReplicated) [1/4]", - "Stateless tests (release, DatabaseReplicated) [2/4]", - "Stateless tests (release, DatabaseReplicated) [3/4]", - "Stateless tests (release, DatabaseReplicated) [4/4]", - "Stateless tests (release, s3 storage) [1/2]", - "Stateless tests (release, s3 storage) [2/2]", - "Stateless tests (release, wide parts enabled)", - "Stateless tests (tsan) [1/5]", - "Stateless tests (tsan) [2/5]", - "Stateless tests (tsan) [3/5]", - "Stateless tests (tsan) [4/5]", - "Stateless tests (tsan) [5/5]", - "Stateless tests (tsan, s3 storage) [1/5]", - "Stateless tests (tsan, s3 storage) [2/5]", - "Stateless tests (tsan, s3 storage) [3/5]", - "Stateless tests (tsan, s3 storage) [4/5]", - "Stateless tests (tsan, s3 storage) [5/5]", - "Stateless tests (ubsan) [1/2]", - "Stateless tests (ubsan) [2/2]", - "Stress test (asan)", - "Stress test (debug)", - "Stress test (msan)", - "Stress test (tsan)", - "Stress test (ubsan)", - "Upgrade check (asan)", - "Upgrade check (debug)", - "Upgrade check (msan)", - "Upgrade check (tsan)", "Style Check", "Unit tests (asan)", "Unit tests (msan)", diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index 7f98f6e52d2..1ed7bd9ea4d 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 -from typing import List +from pathlib import Path +from typing import Dict, List, Optional import json import logging import time @@ -16,30 +17,61 @@ class InsertException(Exception): class ClickHouseHelper: - def __init__(self, url=None): + def __init__( + self, url: Optional[str] = None, auth: Optional[Dict[str, str]] = None + ): if url is None: url = get_parameter_from_ssm("clickhouse-test-stat-url") self.url = url - self.auth = { + self.auth = auth or { "X-ClickHouse-User": get_parameter_from_ssm("clickhouse-test-stat-login"), "X-ClickHouse-Key": get_parameter_from_ssm("clickhouse-test-stat-password"), } @staticmethod - def _insert_json_str_info_impl(url, auth, db, table, json_str): + def insert_file( + url: str, + auth: Optional[Dict[str, str]], + query: str, + file: Path, + additional_options: Optional[Dict[str, str]] = None, + ) -> None: + params = { + "query": query, + "date_time_input_format": "best_effort", + "send_logs_level": "warning", + } + if additional_options: + for k, v in additional_options.items(): + params[k] = v + + with open(file, "rb") as data_fd: + ClickHouseHelper._insert_post( + url, params=params, data=data_fd, headers=auth + ) + + @staticmethod + def insert_json_str(url, auth, db, table, json_str): params = { "database": db, "query": f"INSERT INTO {table} FORMAT JSONEachRow", "date_time_input_format": "best_effort", "send_logs_level": "warning", } + ClickHouseHelper._insert_post(url, params=params, data=json_str, headers=auth) + + @staticmethod + def _insert_post(*args, **kwargs): + url = "" + if args: + url = args[0] + url = kwargs.get("url", url) + kwargs["timeout"] = kwargs.get("timeout", 100) for i in range(5): try: - response = requests.post( - url, params=params, data=json_str, headers=auth - ) + response = requests.post(*args, **kwargs) except Exception as e: error = f"Received exception while sending data to {url} on {i} attempt: {e}" logging.warning(error) @@ -51,17 +83,12 @@ class ClickHouseHelper: break error = ( - "Cannot insert data into clickhouse at try " - + str(i) - + ": HTTP code " - + str(response.status_code) - + ": '" - + str(response.text) - + "'" + f"Cannot insert data into clickhouse at try {i}: HTTP code " + f"{response.status_code}: '{response.text}'" ) if response.status_code >= 500: - # A retriable error + # A retryable error time.sleep(1) continue @@ -76,7 +103,7 @@ class ClickHouseHelper: raise InsertException(error) def _insert_json_str_info(self, db, table, json_str): - self._insert_json_str_info_impl(self.url, self.auth, db, table, json_str) + self.insert_json_str(self.url, self.auth, db, table, json_str) def insert_event_into(self, db, table, event, safe=True): event_str = json.dumps(event) diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index efe149b0aa4..945bcfe05ed 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -51,7 +51,8 @@ class RerunHelper: def override_status(status: str, check_name: str, invert: bool = False) -> str: - if CI_CONFIG["tests_config"].get(check_name, {}).get("force_tests", False): + test_config = CI_CONFIG.test_configs.get(check_name) + if test_config and test_config.force_tests: return "success" if invert: diff --git a/tests/ci/compress_files.py b/tests/ci/compress_files.py index 8d52d030b84..d8e691ce3b4 100644 --- a/tests/ci/compress_files.py +++ b/tests/ci/compress_files.py @@ -1,24 +1,31 @@ #!/usr/bin/env python3 import subprocess import logging -import os + +from pathlib import Path +from typing import Optional -def compress_file_fast(path, archive_path): - if archive_path.endswith(".zst"): +PIGZ = Path("/usr/bin/pigz") + + +def compress_file_fast(path: Path, archive_path: Path) -> None: + if archive_path.suffix == ".zst": subprocess.check_call(f"zstd < {path} > {archive_path}", shell=True) - elif os.path.exists("/usr/bin/pigz"): + elif PIGZ.exists(): subprocess.check_call(f"pigz < {path} > {archive_path}", shell=True) else: subprocess.check_call(f"gzip < {path} > {archive_path}", shell=True) -def compress_fast(path, archive_path, exclude=None): +def compress_fast( + path: Path, archive_path: Path, exclude: Optional[Path] = None +) -> None: program_part = "" - if archive_path.endswith(".zst"): + if archive_path.suffix == ".zst": logging.info("zstd will be used for compression") program_part = "--use-compress-program='zstd --threads=0'" - elif os.path.exists("/usr/bin/pigz"): + elif PIGZ.exists(): logging.info("pigz found, will compress and decompress faster") program_part = "--use-compress-program='pigz'" else: @@ -32,27 +39,25 @@ def compress_fast(path, archive_path, exclude=None): else: exclude_part = f"--exclude {exclude}" - fname = os.path.basename(path) - if os.path.isfile(path): - path = os.path.dirname(path) - else: - path += "/.." + fname = path.name - cmd = f"tar {program_part} {exclude_part} -cf {archive_path} -C {path} {fname}" + cmd = ( + f"tar {program_part} {exclude_part} -cf {archive_path} -C {path.parent} {fname}" + ) logging.debug("compress_fast cmd: %s", cmd) subprocess.check_call(cmd, shell=True) -def decompress_fast(archive_path, result_path=None): +def decompress_fast(archive_path: Path, result_path: Optional[Path] = None) -> None: program_part = "" - if archive_path.endswith(".zst"): + if archive_path.suffix == ".zst": logging.info( "zstd will be used for decompression ('%s' -> '%s')", archive_path, result_path, ) program_part = "--use-compress-program='zstd --threads=0'" - elif os.path.exists("/usr/bin/pigz"): + elif PIGZ.exists(): logging.info( "pigz found, will compress and decompress faster ('%s' -> '%s')", archive_path, diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index fff2975cea4..16a58a90dcf 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -8,7 +8,6 @@ import shutil import subprocess import time import sys -from glob import glob from pathlib import Path from typing import Any, Dict, List, Optional, Set, Tuple, Union @@ -32,17 +31,6 @@ TEMP_PATH = os.path.join(RUNNER_TEMP, "docker_images_check") ImagesDict = Dict[str, dict] -# workaround for mypy issue [1]: -# -# "Argument 1 to "map" has incompatible type overloaded function" [1] -# -# [1]: https://github.com/python/mypy/issues/9864 -# -# NOTE: simply lambda will do the trick as well, but pylint will not like it -def realpath(*args, **kwargs): - return os.path.realpath(*args, **kwargs) - - class DockerImage: def __init__( self, @@ -123,23 +111,8 @@ def get_changed_docker_images( changed_images = [] for dockerfile_dir, image_description in images_dict.items(): - source_dir = GITHUB_WORKSPACE.rstrip("/") + "/" - dockerfile_files = glob(f"{source_dir}/{dockerfile_dir}/**", recursive=True) - # resolve symlinks - dockerfile_files = list(map(realpath, dockerfile_files)) - # trim prefix to get relative path again, to match with files_changed - dockerfile_files = list(map(lambda x: x[len(source_dir) :], dockerfile_files)) - logging.info( - "Docker %s (source_dir=%s) build context for PR %s @ %s: %s", - dockerfile_dir, - source_dir, - pr_info.number, - pr_info.sha, - str(dockerfile_files), - ) - for f in files_changed: - if f in dockerfile_files: + if f.startswith(dockerfile_dir): name = image_description["name"] only_amd64 = image_description.get("only_amd64", False) logging.info( @@ -272,8 +245,6 @@ def build_and_push_one_image( cache_from = f"{cache_from} --cache-from type=registry,ref={image.repo}:{tag}" cmd = ( - # tar is requried to follow symlinks, since docker-build cannot do this - f"tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#{image.full_path.lstrip('/')}#./#' --dereference --create {image.full_path} | " "docker buildx build --builder default " f"--label build-url={GITHUB_RUN_URL} " f"{from_tag_arg}" @@ -283,7 +254,7 @@ def build_and_push_one_image( f"{cache_from} " f"--cache-to type=inline,mode=max " f"{push_arg}" - f"--progress plain -" + f"--progress plain {image.full_path}" ) logging.info("Docker command to run: %s", cmd) with TeePopen(cmd, build_log) as proc: diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index a434d3cc841..89bd7b7755b 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -238,7 +238,7 @@ def build_and_push_image( result = [] # type: TestResults if os != "ubuntu": tag += f"-{os}" - init_args = ["docker", "buildx", "build", "--build-arg BUILDKIT_INLINE_CACHE=1"] + init_args = ["docker", "buildx", "build"] if push: init_args.append("--push") init_args.append("--output=type=image,push-by-digest=true") diff --git a/tests/ci/docker_test.py b/tests/ci/docker_test.py index c679ab984ee..14db348ef36 100644 --- a/tests/ci/docker_test.py +++ b/tests/ci/docker_test.py @@ -18,7 +18,7 @@ import docker_server as ds class TestDockerImageCheck(unittest.TestCase): docker_images_path = os.path.join( - os.path.dirname(__file__), "tests/docker_images.json" + os.path.dirname(__file__), "tests/docker_images_for_tests.json" ) def test_get_changed_docker_images(self): @@ -40,6 +40,12 @@ class TestDockerImageCheck(unittest.TestCase): [ di.DockerImage("docker/test/base", "clickhouse/test-base", False), di.DockerImage("docker/docs/builder", "clickhouse/docs-builder", True), + di.DockerImage( + "docker/test/sqltest", + "clickhouse/sqltest", + False, + "clickhouse/test-base", # type: ignore + ), di.DockerImage( "docker/test/stateless", "clickhouse/stateless-test", @@ -126,13 +132,12 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( - "tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | " f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " "--build-arg FROM_TAG=version " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version --cache-from type=registry,ref=name:version " "--cache-from type=registry,ref=name:latest " - "--cache-to type=inline,mode=max --push --progress plain -", + "--cache-to type=inline,mode=max --push --progress plain path", mock_popen.call_args.args, ) self.assertTrue(result) @@ -144,13 +149,12 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( - "tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | " f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " "--build-arg FROM_TAG=version2 " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version2 --cache-from type=registry,ref=name:version2 " "--cache-from type=registry,ref=name:latest " - "--cache-to type=inline,mode=max --progress plain -", + "--cache-to type=inline,mode=max --progress plain path", mock_popen.call_args.args, ) self.assertTrue(result) @@ -162,12 +166,11 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( - "tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | " f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version2 --cache-from type=registry,ref=name:version2 " "--cache-from type=registry,ref=name:latest " - "--cache-to type=inline,mode=max --progress plain -", + "--cache-to type=inline,mode=max --progress plain path", mock_popen.call_args.args, ) self.assertFalse(result) @@ -181,14 +184,13 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( - "tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | " f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version2 --cache-from type=registry,ref=name:version2 " "--cache-from type=registry,ref=name:latest " "--cache-from type=registry,ref=name:cached-version " "--cache-from type=registry,ref=name:another-cached " - "--cache-to type=inline,mode=max --progress plain -", + "--cache-to type=inline,mode=max --progress plain path", mock_popen.call_args.args, ) self.assertFalse(result) diff --git a/tests/ci/download_binary.py b/tests/ci/download_binary.py index c57780daa36..034e65f204d 100755 --- a/tests/ci/download_binary.py +++ b/tests/ci/download_binary.py @@ -9,7 +9,7 @@ import os from pathlib import Path from build_download_helper import download_build_with_progress -from ci_config import CI_CONFIG, BuildConfig +from ci_config import CI_CONFIG from env_helper import RUNNER_TEMP, S3_ARTIFACT_DOWNLOAD_TEMPLATE from git_helper import Git, commit from version_helper import get_version_from_repo, version_arg @@ -62,9 +62,9 @@ def main(): temp_path.mkdir(parents=True, exist_ok=True) for build in args.build_names: # check if it's in CI_CONFIG - config = CI_CONFIG["build_config"][build] # type: BuildConfig - if args.rename: - path = temp_path / f"clickhouse-{config['static_binary_name']}" + config = CI_CONFIG.build_config[build] + if args.rename and config.static_binary_name: + path = temp_path / f"clickhouse-{config.static_binary_name}" else: path = temp_path / "clickhouse" diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index aa8a0cf9553..74392947b82 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -8,6 +8,7 @@ from commit_status_helper import ( get_commit, get_commit_filtered_statuses, post_commit_status, + update_mergeable_check, ) from get_robot_token import get_best_robot_token from pr_info import PRInfo @@ -18,6 +19,8 @@ def main(): pr_info = PRInfo(need_orgs=True) gh = Github(get_best_robot_token(), per_page=100) + # Update the Mergeable Check at the final step + update_mergeable_check(gh, pr_info, CI_STATUS_NAME) commit = get_commit(gh, pr_info.sha) statuses = [ @@ -27,7 +30,8 @@ def main(): ] if not statuses: return - status = statuses[0] + # Take the latest status + status = statuses[-1] if status.state == "pending": post_commit_status( commit, diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index b773d1eddd9..2bab330bd66 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -17,6 +17,7 @@ from build_download_helper import download_all_deb_packages from clickhouse_helper import ( ClickHouseHelper, prepare_tests_results_for_clickhouse, + get_instance_type, ) from commit_status_helper import ( NotSet, @@ -73,9 +74,11 @@ def get_image_name(check_name): def get_run_command( + pr_info, + check_start_time, check_name, builds_path, - repo_tests_path, + repo_path, result_path, server_log_path, kill_timeout, @@ -105,16 +108,28 @@ def get_run_command( envs += [f"-e {e}" for e in additional_envs] + instance_type = get_instance_type() + + envs += [ + "-e CLICKHOUSE_CI_LOGS_HOST", + "-e CLICKHOUSE_CI_LOGS_PASSWORD", + f"-e PULL_REQUEST_NUMBER='{pr_info.number}'", + f"-e COMMIT_SHA='{pr_info.sha}'", + f"-e CHECK_START_TIME='{check_start_time}'", + f"-e CHECK_NAME='{check_name}'", + f"-e INSTANCE_TYPE='{instance_type}'", + ] + env_str = " ".join(envs) volume_with_broken_test = ( - f"--volume={repo_tests_path}/analyzer_tech_debt.txt:/analyzer_tech_debt.txt" + f"--volume={repo_path}/tests/analyzer_tech_debt.txt:/analyzer_tech_debt.txt" if "analyzer" in check_name else "" ) return ( f"docker run --volume={builds_path}:/package_folder " - f"--volume={repo_tests_path}:/usr/share/clickhouse-test " + f"--volume={repo_path}/tests:/usr/share/clickhouse-test " f"{volume_with_broken_test} " f"--volume={result_path}:/test_output --volume={server_log_path}:/var/log/clickhouse-server " f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}" @@ -306,8 +321,6 @@ def main(): image_name = get_image_name(check_name) docker_image = get_image_with_version(reports_path, image_name) - repo_tests_path = os.path.join(repo_path, "tests") - packages_path = os.path.join(temp_path, "packages") if not os.path.exists(packages_path): os.makedirs(packages_path) @@ -334,9 +347,11 @@ def main(): additional_envs.append("GLOBAL_TAGS=no-random-settings") run_command = get_run_command( + pr_info, + stopwatch.start_time_str, check_name, packages_path, - repo_tests_path, + repo_path, result_path, server_log_path, kill_timeout, @@ -368,6 +383,23 @@ def main(): ch_helper = ClickHouseHelper() + # Cleanup run log from the credentials of CI logs database. + # Note: a malicious user can still print them by splitting the value into parts. + # But we will be warned when a malicious user modifies CI script. + # Although they can also print them from inside tests. + # Nevertheless, the credentials of the CI logs have limited scope + # and does not provide access to sensitive info. + + ci_logs_host = os.getenv("CLICKHOUSE_CI_LOGS_HOST", "CLICKHOUSE_CI_LOGS_HOST") + ci_logs_password = os.getenv( + "CLICKHOUSE_CI_LOGS_PASSWORD", "CLICKHOUSE_CI_LOGS_PASSWORD" + ) + if ci_logs_host not in ("CLICKHOUSE_CI_LOGS_HOST", ""): + subprocess.check_call( + f"sed -i -r -e 's!{ci_logs_host}!CLICKHOUSE_CI_LOGS_HOST!g; s!{ci_logs_password}!CLICKHOUSE_CI_LOGS_PASSWORD!g;' '{run_log_path}'", + shell=True, + ) + report_url = upload_results( s3_helper, pr_info.number, diff --git a/tests/ci/install_check.py b/tests/ci/install_check.py index 73e1a6ef739..a5788e2af3f 100644 --- a/tests/ci/install_check.py +++ b/tests/ci/install_check.py @@ -50,8 +50,19 @@ def prepare_test_scripts(): server_test = r"""#!/bin/bash set -e trap "bash -ex /packages/preserve_logs.sh" ERR +test_env='TEST_THE_DEFAULT_PARAMETER=15' +echo "$test_env" >> /etc/default/clickhouse systemctl start clickhouse-server -clickhouse-client -q 'SELECT version()'""" +clickhouse-client -q 'SELECT version()' +grep "$test_env" /proc/$(cat /var/run/clickhouse-server/clickhouse-server.pid)/environ""" + initd_test = r"""#!/bin/bash +set -e +trap "bash -ex /packages/preserve_logs.sh" ERR +test_env='TEST_THE_DEFAULT_PARAMETER=15' +echo "$test_env" >> /etc/default/clickhouse +/etc/init.d/clickhouse-server start +clickhouse-client -q 'SELECT version()' +grep "$test_env" /proc/$(cat /var/run/clickhouse-server/clickhouse-server.pid)/environ""" keeper_test = r"""#!/bin/bash set -e trap "bash -ex /packages/preserve_logs.sh" ERR @@ -102,6 +113,7 @@ chmod a+rw -R /tests_logs exit 1 """ (TEMP_PATH / "server_test.sh").write_text(server_test, encoding="utf-8") + (TEMP_PATH / "initd_test.sh").write_text(initd_test, encoding="utf-8") (TEMP_PATH / "keeper_test.sh").write_text(keeper_test, encoding="utf-8") (TEMP_PATH / "binary_test.sh").write_text(binary_test, encoding="utf-8") (TEMP_PATH / "preserve_logs.sh").write_text(preserve_logs, encoding="utf-8") @@ -112,6 +124,9 @@ def test_install_deb(image: DockerImage) -> TestResults: "Install server deb": r"""#!/bin/bash -ex apt-get install /packages/clickhouse-{server,client,common}*deb bash -ex /packages/server_test.sh""", + "Run server init.d": r"""#!/bin/bash -ex +apt-get install /packages/clickhouse-{server,client,common}*deb +bash -ex /packages/initd_test.sh""", "Install keeper deb": r"""#!/bin/bash -ex apt-get install /packages/clickhouse-keeper*deb bash -ex /packages/keeper_test.sh""", @@ -191,18 +206,18 @@ def test_install(image: DockerImage, tests: Dict[str, str]) -> TestResults: retcode = process.wait() if retcode == 0: status = OK + subprocess.check_call( + f"docker kill -s 9 {container_id}", shell=True + ) break status = FAIL copy2(log_file, LOGS_PATH) archive_path = TEMP_PATH / f"{container_name}-{retry}.tar.gz" - compress_fast( - LOGS_PATH.as_posix(), - archive_path.as_posix(), - ) + compress_fast(LOGS_PATH, archive_path) logs.append(archive_path) + subprocess.check_call(f"docker kill -s 9 {container_id}", shell=True) - subprocess.check_call(f"docker kill -s 9 {container_id}", shell=True) test_results.append(TestResult(name, status, stopwatch.duration_seconds, logs)) return test_results @@ -279,7 +294,7 @@ def main(): sys.exit(0) docker_images = { - name: get_image_with_version(REPORTS_PATH, name) + name: get_image_with_version(REPORTS_PATH, name, args.download) for name in (RPM_IMAGE, DEB_IMAGE) } prepare_test_scripts() @@ -296,6 +311,8 @@ def main(): is_match = is_match or path.endswith(".rpm") if args.tgz: is_match = is_match or path.endswith(".tgz") + # We don't need debug packages, so let's filter them out + is_match = is_match and "-dbg" not in path return is_match download_builds_filter( diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 222b2197117..d5b70720ee9 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -37,19 +37,20 @@ from upload_result_helper import upload_results # When update, update -# integration/ci-runner.py:ClickhouseIntegrationTestsRunner.get_images_names too +# tests/integration/ci-runner.py:ClickhouseIntegrationTestsRunner.get_images_names too IMAGES = [ + "clickhouse/dotnet-client", + "clickhouse/integration-helper", + "clickhouse/integration-test", "clickhouse/integration-tests-runner", + "clickhouse/kerberized-hadoop", + "clickhouse/kerberos-kdc", "clickhouse/mysql-golang-client", "clickhouse/mysql-java-client", "clickhouse/mysql-js-client", "clickhouse/mysql-php-client", + "clickhouse/nginx-dav", "clickhouse/postgresql-java-client", - "clickhouse/integration-test", - "clickhouse/kerberos-kdc", - "clickhouse/kerberized-hadoop", - "clickhouse/integration-helper", - "clickhouse/dotnet-client", ] diff --git a/tests/ci/jepsen_check.py b/tests/ci/jepsen_check.py index c21fafa2605..54eb70536a8 100644 --- a/tests/ci/jepsen_check.py +++ b/tests/ci/jepsen_check.py @@ -1,11 +1,12 @@ #!/usr/bin/env python3 -import time +import argparse import logging import os import sys +import time -import argparse +from pathlib import Path import boto3 # type: ignore import requests # type: ignore @@ -268,8 +269,8 @@ if __name__ == "__main__": description = "Found invalid analysis (ノಥ益ಥ)ノ ┻━┻" compress_fast( - os.path.join(result_path, "store"), - os.path.join(result_path, "jepsen_store.tar.zst"), + Path(result_path) / "store", + Path(result_path) / "jepsen_store.tar.zst", ) additional_data.append(os.path.join(result_path, "jepsen_store.tar.zst")) except Exception as ex: diff --git a/tests/ci/lambda_shared_package/lambda_shared/__init__.py b/tests/ci/lambda_shared_package/lambda_shared/__init__.py index c56994cc86a..aa88342fcc3 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/__init__.py +++ b/tests/ci/lambda_shared_package/lambda_shared/__init__.py @@ -219,3 +219,12 @@ def list_runners(access_token: str) -> RunnerDescriptions: result.append(desc) return result + + +def cached_value_is_valid(updated_at: float, ttl: float) -> bool: + "a common function to identify if cachable value is still valid" + if updated_at == 0: + return False + if time.time() - ttl < updated_at: + return True + return False diff --git a/tests/ci/lambda_shared_package/lambda_shared/pr.py b/tests/ci/lambda_shared_package/lambda_shared/pr.py index ef47eacc082..82fc53b9356 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/pr.py +++ b/tests/ci/lambda_shared_package/lambda_shared/pr.py @@ -101,7 +101,7 @@ LABELS = { CATEGORY_TO_LABEL = {c: lb for lb, categories in LABELS.items() for c in categories} -def check_pr_description(pr_body: str) -> Tuple[str, str]: +def check_pr_description(pr_body: str, repo_name: str) -> Tuple[str, str]: """The function checks the body to being properly formatted according to .github/PULL_REQUEST_TEMPLATE.md, if the first returned string is not empty, then there is an error.""" @@ -109,11 +109,7 @@ def check_pr_description(pr_body: str) -> Tuple[str, str]: lines = [re.sub(r"\s+", " ", line) for line in lines] # Check if body contains "Reverts ClickHouse/ClickHouse#36337" - if [ - True - for line in lines - if re.match(r"\AReverts {GITHUB_REPOSITORY}#[\d]+\Z", line) - ]: + if [True for line in lines if re.match(rf"\AReverts {repo_name}#[\d]+\Z", line)]: return "", LABELS["pr-not-for-changelog"][0] category = "" diff --git a/tests/ci/lambda_shared_package/lambda_shared/token.py b/tests/ci/lambda_shared_package/lambda_shared/token.py index d3bf15ab259..6d5653f6a58 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/token.py +++ b/tests/ci/lambda_shared_package/lambda_shared/token.py @@ -8,6 +8,8 @@ import boto3 # type: ignore import jwt import requests # type: ignore +from . import cached_value_is_valid + def get_key_and_app_from_aws() -> Tuple[str, int]: secret_name = "clickhouse_github_secret_key" @@ -68,7 +70,7 @@ def get_access_token_by_key_app(private_key: str, app_id: int) -> str: @dataclass class CachedToken: - time: int + time: float value: str updating: bool = False @@ -81,12 +83,9 @@ def get_cached_access_token() -> str: return _cached_token.value # Indicate that the value is updating now, so the cached value can be # used. The first setting and close-to-ttl are not counted as update - if _cached_token.time != 0 or time.time() - 590 < _cached_token.time: - _cached_token.updating = True - else: - _cached_token.updating = False + _cached_token.updating = cached_value_is_valid(_cached_token.time, 590) private_key, app_id = get_key_and_app_from_aws() - _cached_token.time = int(time.time()) + _cached_token.time = time.time() _cached_token.value = get_access_token_by_key_app(private_key, app_id) _cached_token.updating = False return _cached_token.value diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py index 41ace95c350..27a67e2ae0e 100644 --- a/tests/ci/performance_comparison_check.py +++ b/tests/ci/performance_comparison_check.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 - import os import logging import sys @@ -20,11 +19,15 @@ from get_robot_token import get_best_robot_token, get_parameter_from_ssm from pr_info import PRInfo from s3_helper import S3Helper from tee_popen import TeePopen +from clickhouse_helper import get_instance_type +from stopwatch import Stopwatch IMAGE_NAME = "clickhouse/performance-comparison" def get_run_command( + check_start_time, + check_name, workspace, result_path, repo_tests_path, @@ -33,12 +36,24 @@ def get_run_command( additional_env, image, ): + instance_type = get_instance_type() + + envs = [ + f"-e CHECK_START_TIME='{check_start_time}'", + f"-e CHECK_NAME='{check_name}'", + f"-e INSTANCE_TYPE='{instance_type}'", + f"-e PR_TO_TEST={pr_to_test}", + f"-e SHA_TO_TEST={sha_to_test}", + ] + + env_str = " ".join(envs) + return ( f"docker run --privileged --volume={workspace}:/workspace " f"--volume={result_path}:/output " f"--volume={repo_tests_path}:/usr/share/clickhouse-test " f"--cap-add syslog --cap-add sys_admin --cap-add sys_rawio " - f"-e PR_TO_TEST={pr_to_test} -e SHA_TO_TEST={sha_to_test} {additional_env} " + f"{env_str} {additional_env} " f"{image}" ) @@ -62,6 +77,9 @@ class RamDrive: if __name__ == "__main__": logging.basicConfig(level=logging.INFO) + + stopwatch = Stopwatch() + temp_path = os.getenv("TEMP_PATH", os.path.abspath(".")) repo_path = os.getenv("REPO_COPY", os.path.abspath("../../")) repo_tests_path = os.path.join(repo_path, "tests") @@ -71,7 +89,7 @@ if __name__ == "__main__": reports_path = os.getenv("REPORTS_PATH", "./reports") check_name = sys.argv[1] - required_build = CI_CONFIG["tests_config"][check_name]["required_build"] + required_build = CI_CONFIG.test_configs[check_name].required_build if not os.path.exists(temp_path): os.makedirs(temp_path) @@ -121,15 +139,6 @@ if __name__ == "__main__": ) sys.exit(0) - test_grep_exclude_filter = CI_CONFIG["tests_config"][check_name][ - "test_grep_exclude_filter" - ] - if test_grep_exclude_filter: - docker_env += f" -e CHPC_TEST_GREP_EXCLUDE={test_grep_exclude_filter}" - logging.info( - "Fill fliter our performance tests by grep -v %s", test_grep_exclude_filter - ) - rerun_helper = RerunHelper(commit, check_name_with_group) if rerun_helper.is_already_finished_by_status(): logging.info("Check is already finished according to github status, exiting") @@ -166,6 +175,8 @@ if __name__ == "__main__": docker_env += "".join([f" -e {name}" for name in env_extra]) run_command = get_run_command( + stopwatch.start_time_str, + check_name, result_path, result_path, repo_tests_path, @@ -177,6 +188,7 @@ if __name__ == "__main__": logging.info("Going to run command %s", run_command) run_log_path = os.path.join(temp_path, "run.log") + compare_log_path = os.path.join(result_path, "compare.log") popen_env = os.environ.copy() popen_env.update(env_extra) @@ -190,7 +202,7 @@ if __name__ == "__main__": subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) paths = { - "compare.log": os.path.join(result_path, "compare.log"), + "compare.log": compare_log_path, "output.7z": os.path.join(result_path, "output.7z"), "report.html": os.path.join(result_path, "report.html"), "all-queries.html": os.path.join(result_path, "all-queries.html"), diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 86d4985c6b2..dee71b726df 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -279,7 +279,7 @@ class PRInfo: "user_orgs": self.user_orgs, } - def has_changes_in_documentation(self): + def has_changes_in_documentation(self) -> bool: # If the list wasn't built yet the best we can do is to # assume that there were changes. if self.changed_files is None or not self.changed_files: @@ -287,10 +287,9 @@ class PRInfo: for f in self.changed_files: _, ext = os.path.splitext(f) - path_in_docs = "docs" in f - path_in_website = "website" in f + path_in_docs = f.startswith("docs/") if ( - ext in DIFF_IN_DOCUMENTATION_EXT and (path_in_docs or path_in_website) + ext in DIFF_IN_DOCUMENTATION_EXT and path_in_docs ) or "docker/docs" in f: return True return False diff --git a/tests/ci/report.py b/tests/ci/report.py index 8b301d08d56..30c99719b05 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -239,7 +239,7 @@ def read_test_results(results_path: Path, with_raw_logs: bool = True) -> TestRes @dataclass class BuildResult: compiler: str - build_type: str + debug_build: bool sanitizer: str status: str elapsed_seconds: int @@ -484,8 +484,8 @@ def create_build_html_report( ): row = "" row += f"{build_result.compiler}" - if build_result.build_type: - row += f"{build_result.build_type}" + if build_result.debug_build: + row += "debug" else: row += "relwithdebuginfo" if build_result.sanitizer: diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 330a1309016..db98a2c1ab5 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -108,7 +108,7 @@ def main(): gh = Github(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) - description_error, category = check_pr_description(pr_info.body) + description_error, category = check_pr_description(pr_info.body, GITHUB_REPOSITORY) pr_labels_to_add = [] pr_labels_to_remove = [] if ( @@ -137,17 +137,20 @@ def main(): if pr_labels_to_remove: remove_labels(gh, pr_info, pr_labels_to_remove) - if FEATURE_LABEL in pr_info.labels: - print(f"The '{FEATURE_LABEL}' in the labels, expect the 'Docs Check' status") + if FEATURE_LABEL in pr_info.labels and not pr_info.has_changes_in_documentation(): + print( + f"The '{FEATURE_LABEL}' in the labels, " + "but there's no changed documentation" + ) post_commit_status( # do not pass pr_info here intentionally commit, - "pending", + "failure", NotSet, f"expect adding docs for {FEATURE_LABEL}", DOCS_NAME, + pr_info, ) - elif not description_error: - set_mergeable_check(commit, "skipped") + sys.exit(1) if description_error: print( @@ -173,6 +176,7 @@ def main(): ) sys.exit(1) + set_mergeable_check(commit, "skipped") ci_report_url = create_ci_report(pr_info, []) if not can_run: print("::notice ::Cannot run") diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py index 9ee0a431294..2bfe639739b 100644 --- a/tests/ci/s3_helper.py +++ b/tests/ci/s3_helper.py @@ -92,7 +92,7 @@ class S3Helper: file_path, ) else: - logging.info("No content type provied for %s", file_path) + logging.info("No content type provided for %s", file_path) else: if re.search(r"\.(txt|log|err|out)$", s3_path) or re.search( r"\.log\..*(? str: if CI: diff --git a/tests/ci/sqltest.py b/tests/ci/sqltest.py new file mode 100644 index 00000000000..be22a1c9312 --- /dev/null +++ b/tests/ci/sqltest.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 + +import logging +import subprocess +import os +import sys + +from github import Github + +from build_download_helper import get_build_name_for_check, read_build_urls +from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse +from commit_status_helper import ( + RerunHelper, + get_commit, + post_commit_status, +) +from docker_pull_helper import get_image_with_version +from env_helper import ( + GITHUB_RUN_URL, + REPORTS_PATH, + TEMP_PATH, +) +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from report import TestResult +from s3_helper import S3Helper +from stopwatch import Stopwatch + +IMAGE_NAME = "clickhouse/sqltest" + + +def get_run_command(pr_number, sha, download_url, workspace_path, image): + return ( + f"docker run " + # For sysctl + "--privileged " + "--network=host " + f"--volume={workspace_path}:/workspace " + "--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE " + f'-e PR_TO_TEST={pr_number} -e SHA_TO_TEST={sha} -e BINARY_URL_TO_DOWNLOAD="{download_url}" ' + f"{image}" + ) + + +def main(): + logging.basicConfig(level=logging.INFO) + + stopwatch = Stopwatch() + + temp_path = TEMP_PATH + reports_path = REPORTS_PATH + + check_name = sys.argv[1] + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + pr_info = PRInfo() + + gh = Github(get_best_robot_token(), per_page=100) + commit = get_commit(gh, pr_info.sha) + + rerun_helper = RerunHelper(commit, check_name) + if rerun_helper.is_already_finished_by_status(): + logging.info("Check is already finished according to github status, exiting") + sys.exit(0) + + docker_image = get_image_with_version(reports_path, IMAGE_NAME) + + build_name = get_build_name_for_check(check_name) + print(build_name) + urls = read_build_urls(build_name, reports_path) + if not urls: + raise Exception("No build URLs found") + + for url in urls: + if url.endswith("/clickhouse"): + build_url = url + break + else: + raise Exception("Cannot find the clickhouse binary among build results") + + logging.info("Got build url %s", build_url) + + workspace_path = os.path.join(temp_path, "workspace") + if not os.path.exists(workspace_path): + os.makedirs(workspace_path) + + run_command = get_run_command( + pr_info.number, pr_info.sha, build_url, workspace_path, docker_image + ) + logging.info("Going to run %s", run_command) + + run_log_path = os.path.join(temp_path, "run.log") + with open(run_log_path, "w", encoding="utf-8") as log: + with subprocess.Popen( + run_command, shell=True, stderr=log, stdout=log + ) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run successfully") + else: + logging.info("Run failed") + + subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + + check_name_lower = ( + check_name.lower().replace("(", "").replace(")", "").replace(" ", "") + ) + s3_prefix = f"{pr_info.number}/{pr_info.sha}/sqltest_{check_name_lower}/" + paths = { + "run.log": run_log_path, + "server.log.zst": os.path.join(workspace_path, "server.log.zst"), + "server.err.log.zst": os.path.join(workspace_path, "server.err.log.zst"), + "report.html": os.path.join(workspace_path, "report.html"), + "test.log": os.path.join(workspace_path, "test.log"), + } + + s3_helper = S3Helper() + for f in paths: + try: + paths[f] = s3_helper.upload_test_report_to_s3(paths[f], s3_prefix + f) + except Exception as ex: + logging.info("Exception uploading file %s text %s", f, ex) + paths[f] = "" + + report_url = GITHUB_RUN_URL + if paths["report.html"]: + report_url = paths["report.html"] + + status = "success" + description = "See the report" + test_result = TestResult(description, "OK") + + ch_helper = ClickHouseHelper() + + prepared_events = prepare_tests_results_for_clickhouse( + pr_info, + [test_result], + status, + stopwatch.duration_seconds, + stopwatch.start_time_str, + report_url, + check_name, + ) + + ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + + logging.info("Result: '%s', '%s', '%s'", status, description, report_url) + print(f"::notice ::Report url: {report_url}") + post_commit_status(commit, status, report_url, description, check_name, pr_info) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/stress.py b/tests/ci/stress.py index 6d17384c63f..2c566144f2c 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -125,6 +125,8 @@ def prepare_for_hung_check(drop_databases): # However, it obstruct checking for hung queries. logging.info("Will terminate gdb (if any)") call_with_retry("kill -TERM $(pidof gdb)") + # Sometimes there is a message `Child process was stopped by signal 19` in logs after stopping gdb + call_with_retry("kill -CONT $(lsof -ti:9000)") # ThreadFuzzer significantly slows down server and causes false-positive hung check failures call_with_retry("clickhouse client -q 'SYSTEM STOP THREAD FUZZER'") @@ -200,13 +202,14 @@ def prepare_for_hung_check(drop_databases): call( make_query_command( """ - select sleepEachRow(( - select maxOrDefault(300 - elapsed) + 1 - from system.processes - where query not like '%from system.processes%' and elapsed < 300 + SELECT sleepEachRow(( + SELECT maxOrDefault(300 - elapsed) + 1 + FROM system.processes + WHERE query NOT LIKE '%FROM system.processes%' AND elapsed < 300 ) / 300) - from numbers(300) - format Null + FROM numbers(300) + FORMAT Null + SETTINGS function_sleep_max_microseconds_per_block = 0 """ ), shell=True, @@ -299,7 +302,7 @@ if __name__ == "__main__": have_long_running_queries = prepare_for_hung_check(args.drop_databases) except Exception as ex: have_long_running_queries = True - logging.error("Failed to prepare for hung check %s", str(ex)) + logging.error("Failed to prepare for hung check: %s", str(ex)) logging.info("Checking if some queries hung") cmd = " ".join( [ diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index 895eb318bc4..21c3178faab 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -14,6 +14,7 @@ from build_download_helper import download_all_deb_packages from clickhouse_helper import ( ClickHouseHelper, prepare_tests_results_for_clickhouse, + get_instance_type, ) from commit_status_helper import RerunHelper, get_commit, post_commit_status from docker_pull_helper import get_image_with_version @@ -28,12 +29,34 @@ from upload_result_helper import upload_results def get_run_command( - build_path, result_folder, repo_tests_path, server_log_folder, image + pr_info, + check_start_time, + check_name, + build_path, + result_folder, + repo_tests_path, + server_log_folder, + image, ): + instance_type = get_instance_type() + + envs = [ + # a static link, don't use S3_URL or S3_DOWNLOAD + "-e S3_URL='https://s3.amazonaws.com/clickhouse-datasets'", + "-e CLICKHOUSE_CI_LOGS_HOST", + "-e CLICKHOUSE_CI_LOGS_PASSWORD", + f"-e PULL_REQUEST_NUMBER='{pr_info.number}'", + f"-e COMMIT_SHA='{pr_info.sha}'", + f"-e CHECK_START_TIME='{check_start_time}'", + f"-e CHECK_NAME='{check_name}'", + f"-e INSTANCE_TYPE='{instance_type}'", + ] + + env_str = " ".join(envs) + cmd = ( "docker run --cap-add=SYS_PTRACE " - # a static link, don't use S3_URL or S3_DOWNLOAD - "-e S3_URL='https://s3.amazonaws.com/clickhouse-datasets' " + f"{env_str} " # For dmesg and sysctl "--privileged " f"--volume={build_path}:/package_folder " @@ -149,9 +172,16 @@ def run_stress_test(docker_image_name): run_log_path = os.path.join(temp_path, "run.log") run_command = get_run_command( - packages_path, result_path, repo_tests_path, server_log_path, docker_image + pr_info, + stopwatch.start_time_str, + check_name, + packages_path, + result_path, + repo_tests_path, + server_log_path, + docker_image, ) - logging.info("Going to run func tests: %s", run_command) + logging.info("Going to run stress test: %s", run_command) with TeePopen(run_command, run_log_path, timeout=60 * 150) as process: retcode = process.wait() @@ -168,6 +198,23 @@ def run_stress_test(docker_image_name): ) ch_helper = ClickHouseHelper() + # Cleanup run log from the credentials of CI logs database. + # Note: a malicious user can still print them by splitting the value into parts. + # But we will be warned when a malicious user modifies CI script. + # Although they can also print them from inside tests. + # Nevertheless, the credentials of the CI logs have limited scope + # and does not provide access to sensitive info. + + ci_logs_host = os.getenv("CLICKHOUSE_CI_LOGS_HOST", "CLICKHOUSE_CI_LOGS_HOST") + ci_logs_password = os.getenv( + "CLICKHOUSE_CI_LOGS_PASSWORD", "CLICKHOUSE_CI_LOGS_PASSWORD" + ) + if ci_logs_host not in ("CLICKHOUSE_CI_LOGS_HOST", ""): + subprocess.check_call( + f"sed -i -r -e 's!{ci_logs_host}!CLICKHOUSE_CI_LOGS_HOST!g; s!{ci_logs_password}!CLICKHOUSE_CI_LOGS_PASSWORD!g;' '{run_log_path}'", + shell=True, + ) + report_url = upload_results( s3_helper, pr_info.number, diff --git a/tests/ci/terminate_runner_lambda/app.py b/tests/ci/terminate_runner_lambda/app.py index ed198d855b9..010f7dd6734 100644 --- a/tests/ci/terminate_runner_lambda/app.py +++ b/tests/ci/terminate_runner_lambda/app.py @@ -9,13 +9,13 @@ from typing import Any, Dict, List import boto3 # type: ignore -from lambda_shared import RunnerDescriptions, list_runners +from lambda_shared import RunnerDescriptions, list_runners, cached_value_is_valid from lambda_shared.token import get_access_token_by_key_app, get_cached_access_token @dataclass class CachedInstances: - time: int + time: float value: dict updating: bool = False @@ -27,17 +27,12 @@ def get_cached_instances() -> dict: """return cached instances description with updating it once per five minutes""" if time.time() - 250 < cached_instances.time or cached_instances.updating: return cached_instances.value - # Indicate that the value is updating now, so the cached value can be - # used. The first setting and close-to-ttl are not counted as update - if cached_instances.time != 0 or time.time() - 300 < cached_instances.time: - cached_instances.updating = True - else: - cached_instances.updating = False + cached_instances.updating = cached_value_is_valid(cached_instances.time, 300) ec2_client = boto3.client("ec2") instances_response = ec2_client.describe_instances( Filters=[{"Name": "instance-state-name", "Values": ["running"]}] ) - cached_instances.time = int(time.time()) + cached_instances.time = time.time() cached_instances.value = { instance["InstanceId"]: instance for reservation in instances_response["Reservations"] @@ -47,6 +42,28 @@ def get_cached_instances() -> dict: return cached_instances.value +@dataclass +class CachedRunners: + time: float + value: RunnerDescriptions + updating: bool = False + + +cached_runners = CachedRunners(0, []) + + +def get_cached_runners(access_token: str) -> RunnerDescriptions: + """From time to time request to GH api costs up to 3 seconds, and + it's a disaster from the termination lambda perspective""" + if time.time() - 5 < cached_runners.time or cached_instances.updating: + return cached_runners.value + cached_runners.updating = cached_value_is_valid(cached_runners.time, 15) + cached_runners.value = list_runners(access_token) + cached_runners.time = time.time() + cached_runners.updating = False + return cached_runners.value + + def how_many_instances_to_kill(event_data: dict) -> Dict[str, int]: data_array = event_data["CapacityToTerminate"] to_kill_by_zone = {} # type: Dict[str, int] @@ -104,7 +121,7 @@ def main(access_token: str, event: dict) -> Dict[str, List[str]]: ) print("Time spent on the requests to AWS: ", time.time() - start) - runners = list_runners(access_token) + runners = get_cached_runners(access_token) runner_ids = set(runner.name for runner in runners) # We used to delete potential hosts to terminate from GitHub runners pool, # but the documentation states: diff --git a/tests/ci/tests/docker_images.json b/tests/ci/tests/docker_images_for_tests.json similarity index 96% rename from tests/ci/tests/docker_images.json rename to tests/ci/tests/docker_images_for_tests.json index 0d40d43c33f..70db8760561 100644 --- a/tests/ci/tests/docker_images.json +++ b/tests/ci/tests/docker_images_for_tests.json @@ -119,7 +119,8 @@ "docker/test/stateless", "docker/test/integration/base", "docker/test/fuzzer", - "docker/test/keeper-jepsen" + "docker/test/keeper-jepsen", + "docker/test/sqltest" ] }, "docker/test/integration/kerberized_hadoop": { @@ -153,5 +154,9 @@ "docker/test/sqllogic": { "name": "clickhouse/sqllogic-test", "dependent": [] + }, + "docker/test/sqltest": { + "name": "clickhouse/sqltest", + "dependent": [] } } diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh index 171dd743f90..d452c985407 100644 --- a/tests/ci/worker/init_runner.sh +++ b/tests/ci/worker/init_runner.sh @@ -12,7 +12,8 @@ echo "Running init script" export DEBIAN_FRONTEND=noninteractive export RUNNER_HOME=/home/ubuntu/actions-runner -export RUNNER_URL="https://github.com/ClickHouse" +export RUNNER_ORG="ClickHouse" +export RUNNER_URL="https://github.com/${RUNNER_ORG}" # Funny fact, but metadata service has fixed IP INSTANCE_ID=$(ec2metadata --instance-id) export INSTANCE_ID @@ -102,7 +103,8 @@ check_proceed_spot_termination() { runner_pid=$(pgrep Runner.Listener) if [ -n "$runner_pid" ]; then # Kill the runner to not allow it cancelling the job - kill -9 "$runner_pid" + # shellcheck disable=SC2046 + kill -9 $(list_children "$runner_pid") fi sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)" terminate_and_exit @@ -171,6 +173,7 @@ set -uo pipefail echo "Runner's public DNS: $(ec2metadata --public-hostname)" echo "Runner's labels: ${LABELS}" +echo "Runner's instance type: $(ec2metadata --instance-type)" EOF # Create a post-run script that will restart docker daemon before the job started @@ -234,6 +237,19 @@ is_job_assigned() { || return 1 } +list_children () { + local children + children=$(ps --ppid "$1" -o pid=) + if [ -z "$children" ]; then + return + fi + + for pid in $children; do + list_children "$pid" + done + echo "$children" +} + while true; do runner_pid=$(pgrep Runner.Listener) echo "Got runner pid '$runner_pid'" @@ -268,17 +284,11 @@ while true; do RUNNER_AGE=$(( $(date +%s) - $(stat -c +%Y /proc/"$runner_pid" 2>/dev/null || date +%s) )) echo "The runner is launched $RUNNER_AGE seconds ago and still has hot received the job" if (( 60 < RUNNER_AGE )); then - echo "Check if the instance should tear down" - if ! no_terminating_metadata; then - # Another check if the worker still didn't start - if is_job_assigned; then - echo "During the metadata check the job was assigned, continue" - continue - fi - kill -9 "$runner_pid" - sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)" - terminate_on_event - fi + echo "Attempt to delete the runner for a graceful shutdown" + sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)" \ + || continue + echo "Runner didn't launch or have assigned jobs after ${RUNNER_AGE} seconds, shutting down" + terminate_and_exit fi fi sleep 5 diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py index 5e2331ece3c..e511d773577 100644 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -64,6 +64,7 @@ NEED_RERUN_WORKFLOWS = { "DocsCheck", "MasterCI", "NightlyBuilds", + "PublishedReleaseCI", "PullRequestCI", "ReleaseBranchCI", } diff --git a/tests/clickhouse-test b/tests/clickhouse-test index c63e1e3ae52..8a7ca59011d 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -383,19 +383,19 @@ def get_stacktraces_from_clickhouse(args): ) replicated_msg = ( f"{args.client} {settings_str} --query " - '"SELECT materialize((hostName(), tcpPort())) as host, thread_id, ' + '"SELECT materialize((hostName(), tcpPort())) as host, thread_name, thread_id, query_id, trace, ' "arrayStringConcat(arrayMap(x, y -> concat(x, ': ', y), " "arrayMap(x -> addressToLine(x), trace), " - "arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n') as trace " + "arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n') as trace_str " "FROM clusterAllReplicas('test_cluster_database_replicated', 'system.stack_trace') " 'ORDER BY host, thread_id FORMAT Vertical"' ) msg = ( f"{args.client} {settings_str} --query " - "\"SELECT arrayStringConcat(arrayMap(x, y -> concat(x, ': ', y), " + "\"SELECT thread_name, thread_id, query_id, trace, arrayStringConcat(arrayMap(x, y -> concat(x, ': ', y), " "arrayMap(x -> addressToLine(x), trace), " - "arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n') as trace " + "arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n') as trace_str " 'FROM system.stack_trace FORMAT Vertical"' ) @@ -505,7 +505,6 @@ class FailureReason(enum.Enum): REPLICATED_DB = "replicated-database" S3_STORAGE = "s3-storage" BUILD = "not running for current build" - NO_UPGRADE_CHECK = "not running for upgrade check" NO_PARALLEL_REPLICAS = "smth in not supported with parallel replicas" # UNKNOWN reasons @@ -608,6 +607,9 @@ class SettingsRandomizer: "enable_memory_bound_merging_of_aggregation_results": lambda: random.randint( 0, 1 ), + "min_count_to_compile_expression": lambda: random.choice([0, 3]), + "min_count_to_compile_aggregate_expression": lambda: random.choice([0, 3]), + "min_count_to_compile_sort_description": lambda: random.choice([0, 3]), "session_timezone": lambda: random.choice( [ # special non-deterministic around 1970 timezone, see [1]. @@ -946,14 +948,18 @@ class TestCase: elif tags and ("no-replicated-database" in tags) and args.replicated_database: return FailureReason.REPLICATED_DB - # TODO: remove checking "no-upgrade-check" after 23.1 - elif args.upgrade_check and ( - "no-upgrade-check" in tags or "no-upgrade-check" in tags - ): - return FailureReason.NO_UPGRADE_CHECK - elif tags and ("no-s3-storage" in tags) and args.s3_storage: return FailureReason.S3_STORAGE + elif ( + tags + and ("no-s3-storage-with-slow-build" in tags) + and args.s3_storage + and ( + BuildFlags.THREAD in args.build_flags + or BuildFlags.DEBUG in args.build_flags + ) + ): + return FailureReason.S3_STORAGE elif tags: for build_flag in args.build_flags: @@ -1190,8 +1196,18 @@ class TestCase: # This is for .sh tests os.environ["CLICKHOUSE_LOG_COMMENT"] = args.testcase_basename + query_params = "" + if "need-query-parameters" in self.tags: + query_params = ( + " --param_CLICKHOUSE_DATABASE=" + + database + + " --param_CLICKHOUSE_DATABASE_1=" + + database + + "_1" + ) + params = { - "client": client + " --database=" + database, + "client": client + " --database=" + database + query_params, "logs_level": server_logs_level, "options": client_options, "test": self.case_file, @@ -1451,13 +1467,13 @@ class TestSuite: else: raise Exception(f"Unknown file_extension: {filename}") - def parse_tags_from_line(line, comment_sign): + def parse_tags_from_line(line, comment_sign) -> Set[str]: if not line.startswith(comment_sign): - return None + return set() tags_str = line[len(comment_sign) :].lstrip() # noqa: ignore E203 tags_prefix = "Tags:" if not tags_str.startswith(tags_prefix): - return None + return set() tags_str = tags_str[len(tags_prefix) :] # noqa: ignore E203 tags = tags_str.split(",") tags = {tag.strip() for tag in tags} @@ -1475,12 +1491,23 @@ class TestSuite: def load_tags_from_file(filepath): comment_sign = get_comment_sign(filepath) + need_query_params = False with open(filepath, "r", encoding="utf-8") as file: try: - line = find_tag_line(file) + tag_line = find_tag_line(file) except UnicodeDecodeError: return [] - return parse_tags_from_line(line, comment_sign) + try: + if filepath.endswith(".sql"): + for line in file: + if "{CLICKHOUSE_DATABASE" in line: + need_query_params = True + except UnicodeDecodeError: + pass + parsed_tags = parse_tags_from_line(tag_line, comment_sign) + if need_query_params: + parsed_tags.add("need-query-parameters") + return parsed_tags all_tags = {} start_time = datetime.now() @@ -2146,7 +2173,7 @@ def reportLogStats(args): print("\n") query = """ - SELECT message_format_string, count(), substr(any(message), 1, 120) AS any_message + SELECT message_format_string, count(), any(message) AS any_message FROM system.text_log WHERE (now() - toIntervalMinute(240)) < event_time AND (message NOT LIKE (replaceRegexpAll(message_format_string, '{[:.0-9dfx]*}', '%') AS s)) diff --git a/tests/config/config.d/clusters.xml b/tests/config/config.d/clusters.xml index 031d6e64bc9..cfd4868f1dc 100644 --- a/tests/config/config.d/clusters.xml +++ b/tests/config/config.d/clusters.xml @@ -176,6 +176,38 @@ + + + false + + 127.0.0.1 + 9000 + + + 127.0.0.2 + 9000 + + + 127.0.0.3 + 9000 + + + + false + + 127.0.0.4 + 9000 + + + 127.0.0.5 + 9000 + + + 127.0.0.6 + 9000 + + + diff --git a/tests/config/config.d/filesystem_caches_path.xml b/tests/config/config.d/filesystem_caches_path.xml new file mode 100644 index 00000000000..ca946db2e0a --- /dev/null +++ b/tests/config/config.d/filesystem_caches_path.xml @@ -0,0 +1,3 @@ + + /var/lib/clickhouse/filesystem_caches/ + diff --git a/tests/config/config.d/s3_storage_policy_by_default.xml b/tests/config/config.d/s3_storage_policy_by_default.xml index 9685512a12a..dd93a317a77 100644 --- a/tests/config/config.d/s3_storage_policy_by_default.xml +++ b/tests/config/config.d/s3_storage_policy_by_default.xml @@ -6,14 +6,18 @@ http://localhost:11111/test/test/ clickhouse clickhouse - 1 - 22548578304 + + cache + 1Gi + cached_s3/ + s3 + -
s3
+
cached_s3
diff --git a/tests/config/config.d/text_log.xml b/tests/config/config.d/text_log.xml index dce4942d952..d3608c5439d 100644 --- a/tests/config/config.d/text_log.xml +++ b/tests/config/config.d/text_log.xml @@ -2,6 +2,6 @@ system text_log
- 7500 + 4000
diff --git a/tests/config/install.sh b/tests/config/install.sh index 77646cd6636..95ffbe2a3f9 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -43,7 +43,7 @@ ln -sf $SRC_PATH/config.d/transactions.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/encryption.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/CORS.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/zookeeper_log.xml $DEST_SERVER_PATH/config.d/ -ln -sf $SRC_PATH/config.d/logger_test.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/logger_trace.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/named_collection.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/ssl_certs.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/filesystem_cache_log.xml $DEST_SERVER_PATH/config.d/ @@ -60,6 +60,7 @@ ln -sf $SRC_PATH/config.d/compressed_marks_and_index.xml $DEST_SERVER_PATH/confi ln -sf $SRC_PATH/config.d/disable_s3_env_credentials.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/enable_wait_for_shutdown_replicated_tables.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/backups.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/filesystem_caches_path.xml $DEST_SERVER_PATH/config.d/ # Not supported with fasttest. if [ "${DEST_SERVER_PATH}" = "/etc/clickhouse-server" ] diff --git a/tests/integration/README.md b/tests/integration/README.md index f0160dcd444..af973d2b9fa 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -52,6 +52,8 @@ sudo -H pip install \ (highly not recommended) If you really want to use OS packages on modern debian/ubuntu instead of "pip": `sudo apt install -y docker docker-compose python3-pytest python3-dicttoxml python3-docker python3-pymysql python3-protobuf python3-pymongo python3-tzlocal python3-kazoo python3-psycopg2 kafka-python python3-pytest-timeout python3-minio` +Some tests have other dependencies, e.g. spark. See docker/test/integration/runner/Dockerfile for how to install those. See docker/test/integration/runner/dockerd-entrypoint.sh for environment variables that need to be set (e.g. JAVA_PATH). + If you want to run the tests under a non-privileged user, you must add this user to `docker` group: `sudo usermod -aG docker $USER` and re-login. (You must close all your sessions (for example, restart your computer)) To check, that you have access to Docker, run `docker ps`. @@ -90,7 +92,7 @@ plugins: repeat-0.9.1, xdist-2.5.0, forked-1.4.0, order-1.0.0, timeout-2.1.0 timeout: 900.0s timeout method: signal timeout func_only: False -collected 4 items +collected 4 items test_ssl_cert_authentication/test.py::test_https Copy common default production configuration from /clickhouse-config. Files: config.xml, users.xml PASSED diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index be4d019426a..aa89ccf11b3 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -313,6 +313,7 @@ class ClickhouseIntegrationTestsRunner: "clickhouse/mysql-java-client", "clickhouse/mysql-js-client", "clickhouse/mysql-php-client", + "clickhouse/nginx-dav", "clickhouse/postgresql-java-client", ] diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index b5f7aababc9..6e76270c607 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -17,15 +17,15 @@ def tune_local_port_range(): # Lots of services uses non privileged ports: # - hdfs -- 50020/50070/... # - minio - # - mysql - # - psql - # - # So instead of tuning all these thirdparty services, let's simply - # prohibit using such ports for outgoing connections, this should fix - # possible "Address already in use" errors. # # NOTE: 5K is not enough, and sometimes leads to EADDRNOTAVAIL error. - run_and_check(["sysctl net.ipv4.ip_local_port_range='55000 65535'"], shell=True) + # NOTE: it is not inherited, so you may need to specify this in docker_compose_$SERVICE.yml + try: + run_and_check(["sysctl net.ipv4.ip_local_port_range='55000 65535'"], shell=True) + except Exception as ex: + logging.warning( + "Failed to run sysctl, tests may fail with EADDRINUSE %s", str(ex) + ) @pytest.fixture(autouse=True, scope="session") diff --git a/tests/integration/helpers/client.py b/tests/integration/helpers/client.py index fdeedb9a80d..c5f0584f828 100644 --- a/tests/integration/helpers/client.py +++ b/tests/integration/helpers/client.py @@ -182,7 +182,8 @@ class CommandRequest: # we suppress stderror on client becase sometimes thread sanitizer # can print some debug information there env = {} - env["TSAN_OPTIONS"] = "verbosity=0" + env["ASAN_OPTIONS"] = "use_sigaltstack=0" + env["TSAN_OPTIONS"] = "use_sigaltstack=0 verbosity=0" self.process = sp.Popen( command, stdin=stdin_file, diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 0448eb2437f..eff44de842a 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -3199,7 +3199,6 @@ class ClickHouseInstance: ): self.name = name self.base_cmd = cluster.base_cmd - self.base_dir = base_path self.docker_id = cluster.get_instance_docker_id(self.name) self.cluster = cluster self.hostname = hostname if hostname is not None else self.name @@ -4194,14 +4193,6 @@ class ClickHouseInstance: ["bash", "-c", f"sed -i 's/{replace}/{replacement}/g' {path_to_config}"] ) - def put_users_config(self, config_path): - """Put new config (useful if you cannot put it at the start)""" - - instance_config_dir = p.abspath(p.join(self.path, "configs")) - users_d_dir = p.abspath(p.join(instance_config_dir, "users.d")) - config_path = p.join(self.base_dir, config_path) - shutil.copy(config_path, users_d_dir) - def create_dir(self): """Create the instance directory and all the needed files there.""" diff --git a/tests/integration/helpers/keeper_config1.xml b/tests/integration/helpers/keeper_config1.xml index daacd55887d..f40ed9ac6fa 100644 --- a/tests/integration/helpers/keeper_config1.xml +++ b/tests/integration/helpers/keeper_config1.xml @@ -18,6 +18,8 @@ 15000 trace false + 2000 + 4000 diff --git a/tests/integration/helpers/keeper_config2.xml b/tests/integration/helpers/keeper_config2.xml index 53eb023dba5..d5bdb92a79d 100644 --- a/tests/integration/helpers/keeper_config2.xml +++ b/tests/integration/helpers/keeper_config2.xml @@ -18,6 +18,8 @@ 15000 trace false + 2000 + 4000 diff --git a/tests/integration/helpers/keeper_config3.xml b/tests/integration/helpers/keeper_config3.xml index 1db091c12bc..aa69b554660 100644 --- a/tests/integration/helpers/keeper_config3.xml +++ b/tests/integration/helpers/keeper_config3.xml @@ -18,6 +18,8 @@ 15000 trace false + 2000 + 4000 diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index 6e1604f4eb5..d056225fee4 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -48,6 +48,7 @@ "test_system_metrics/test.py::test_readonly_metrics", "test_system_replicated_fetches/test.py::test_system_replicated_fetches", "test_zookeeper_config_load_balancing/test.py::test_round_robin", + "test_zookeeper_fallback_session/test.py::test_fallback_session", "test_global_overcommit_tracker/test.py::test_global_overcommit", @@ -81,5 +82,15 @@ "test_system_flush_logs/test.py::test_log_buffer_size_rows_flush_threshold", "test_system_flush_logs/test.py::test_log_max_size", "test_crash_log/test.py::test_pkill_query_log", - "test_crash_log/test.py::test_pkill" + "test_crash_log/test.py::test_pkill", + + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_tcp", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_postgres", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_mysql", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_http", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_http_named_session", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_grpc", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_tcp_and_others", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_setting_in_query", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_client_suggestions_load" ] diff --git a/tests/integration/runner b/tests/integration/runner index 1b902803741..4c2b1054538 100755 --- a/tests/integration/runner +++ b/tests/integration/runner @@ -331,18 +331,8 @@ if __name__ == "__main__": if args.docker_compose_images_tags is not None: for img_tag in args.docker_compose_images_tags: [image, tag] = img_tag.split(":") - if image == "clickhouse/mysql-golang-client": - env_tags += "-e {}={} ".format("DOCKER_MYSQL_GOLANG_CLIENT_TAG", tag) - elif image == "clickhouse/dotnet-client": + if image == "clickhouse/dotnet-client": env_tags += "-e {}={} ".format("DOCKER_DOTNET_CLIENT_TAG", tag) - elif image == "clickhouse/mysql-java-client": - env_tags += "-e {}={} ".format("DOCKER_MYSQL_JAVA_CLIENT_TAG", tag) - elif image == "clickhouse/mysql-js-client": - env_tags += "-e {}={} ".format("DOCKER_MYSQL_JS_CLIENT_TAG", tag) - elif image == "clickhouse/mysql-php-client": - env_tags += "-e {}={} ".format("DOCKER_MYSQL_PHP_CLIENT_TAG", tag) - elif image == "clickhouse/postgresql-java-client": - env_tags += "-e {}={} ".format("DOCKER_POSTGRESQL_JAVA_CLIENT_TAG", tag) elif image == "clickhouse/integration-helper": env_tags += "-e {}={} ".format("DOCKER_HELPER_TAG", tag) elif image == "clickhouse/integration-test": @@ -351,6 +341,18 @@ if __name__ == "__main__": env_tags += "-e {}={} ".format("DOCKER_KERBERIZED_HADOOP_TAG", tag) elif image == "clickhouse/kerberos-kdc": env_tags += "-e {}={} ".format("DOCKER_KERBEROS_KDC_TAG", tag) + elif image == "clickhouse/mysql-golang-client": + env_tags += "-e {}={} ".format("DOCKER_MYSQL_GOLANG_CLIENT_TAG", tag) + elif image == "clickhouse/mysql-java-client": + env_tags += "-e {}={} ".format("DOCKER_MYSQL_JAVA_CLIENT_TAG", tag) + elif image == "clickhouse/mysql-js-client": + env_tags += "-e {}={} ".format("DOCKER_MYSQL_JS_CLIENT_TAG", tag) + elif image == "clickhouse/mysql-php-client": + env_tags += "-e {}={} ".format("DOCKER_MYSQL_PHP_CLIENT_TAG", tag) + elif image == "clickhouse/nginx-dav": + env_tags += "-e {}={} ".format("DOCKER_NGINX_DAV_TAG", tag) + elif image == "clickhouse/postgresql-java-client": + env_tags += "-e {}={} ".format("DOCKER_POSTGRESQL_JAVA_CLIENT_TAG", tag) else: logging.info("Unknown image %s" % (image)) diff --git a/tests/integration/test_access_for_functions/test.py b/tests/integration/test_access_for_functions/test.py index be4d71502d2..5069468110c 100644 --- a/tests/integration/test_access_for_functions/test.py +++ b/tests/integration/test_access_for_functions/test.py @@ -22,7 +22,7 @@ def test_access_rights_for_function(): instance.query("CREATE USER A") instance.query("CREATE USER B") assert ( - "it's necessary to have grant CREATE FUNCTION ON *.*" + "it's necessary to have the grant CREATE FUNCTION ON *.*" in instance.query_and_get_error(create_function_query, user="A") ) @@ -32,7 +32,7 @@ def test_access_rights_for_function(): assert instance.query("SELECT MySum(1, 2)") == "3\n" assert ( - "it's necessary to have grant DROP FUNCTION ON *.*" + "it's necessary to have the grant DROP FUNCTION ON *.*" in instance.query_and_get_error("DROP FUNCTION MySum", user="B") ) @@ -44,7 +44,7 @@ def test_access_rights_for_function(): instance.query("REVOKE CREATE FUNCTION ON *.* FROM A") assert ( - "it's necessary to have grant CREATE FUNCTION ON *.*" + "it's necessary to have the grant CREATE FUNCTION ON *.*" in instance.query_and_get_error(create_function_query, user="A") ) diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index c19cca4126a..a6ba0c8918f 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -882,7 +882,7 @@ def test_required_privileges(): instance.query("CREATE USER u1") backup_name = new_backup_name() - expected_error = "necessary to have grant BACKUP ON test.table" + expected_error = "necessary to have the grant BACKUP ON test.table" assert expected_error in instance.query_and_get_error( f"BACKUP TABLE test.table TO {backup_name}", user="u1" ) @@ -890,12 +890,12 @@ def test_required_privileges(): instance.query("GRANT BACKUP ON test.table TO u1") instance.query(f"BACKUP TABLE test.table TO {backup_name}", user="u1") - expected_error = "necessary to have grant INSERT, CREATE TABLE ON test.table" + expected_error = "necessary to have the grant INSERT, CREATE TABLE ON test.table" assert expected_error in instance.query_and_get_error( f"RESTORE TABLE test.table FROM {backup_name}", user="u1" ) - expected_error = "necessary to have grant INSERT, CREATE TABLE ON test.table2" + expected_error = "necessary to have the grant INSERT, CREATE TABLE ON test.table2" assert expected_error in instance.query_and_get_error( f"RESTORE TABLE test.table AS test.table2 FROM {backup_name}", user="u1" ) @@ -907,7 +907,7 @@ def test_required_privileges(): instance.query("DROP TABLE test.table") - expected_error = "necessary to have grant INSERT, CREATE TABLE ON test.table" + expected_error = "necessary to have the grant INSERT, CREATE TABLE ON test.table" assert expected_error in instance.query_and_get_error( f"RESTORE ALL FROM {backup_name}", user="u1" ) @@ -1014,14 +1014,14 @@ def test_system_users_required_privileges(): backup_name = new_backup_name() - expected_error = "necessary to have grant BACKUP ON system.users" + expected_error = "necessary to have the grant BACKUP ON system.users" assert expected_error in instance.query_and_get_error( f"BACKUP TABLE system.users, TABLE system.roles TO {backup_name}", user="u2" ) instance.query("GRANT BACKUP ON system.users TO u2") - expected_error = "necessary to have grant BACKUP ON system.roles" + expected_error = "necessary to have the grant BACKUP ON system.roles" assert expected_error in instance.query_and_get_error( f"BACKUP TABLE system.users, TABLE system.roles TO {backup_name}", user="u2" ) @@ -1035,7 +1035,7 @@ def test_system_users_required_privileges(): instance.query("DROP ROLE r1") expected_error = ( - "necessary to have grant CREATE USER, CREATE ROLE, ROLE ADMIN ON *.*" + "necessary to have the grant CREATE USER, CREATE ROLE, ROLE ADMIN ON *.*" ) assert expected_error in instance.query_and_get_error( f"RESTORE ALL FROM {backup_name}", user="u2" @@ -1043,7 +1043,7 @@ def test_system_users_required_privileges(): instance.query("GRANT CREATE USER, CREATE ROLE, ROLE ADMIN ON *.* TO u2") - expected_error = "necessary to have grant SELECT ON test.* WITH GRANT OPTION" + expected_error = "necessary to have the grant SELECT ON test.* WITH GRANT OPTION" assert expected_error in instance.query_and_get_error( f"RESTORE ALL FROM {backup_name}", user="u2" ) diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py index 39496b8a5c8..a9ebbeb66c2 100644 --- a/tests/integration/test_backup_restore_on_cluster/test.py +++ b/tests/integration/test_backup_restore_on_cluster/test.py @@ -561,7 +561,7 @@ def test_required_privileges(): node1.query("GRANT CLUSTER ON *.* TO u1") backup_name = new_backup_name() - expected_error = "necessary to have grant BACKUP ON default.tbl" + expected_error = "necessary to have the grant BACKUP ON default.tbl" assert expected_error in node1.query_and_get_error( f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}", user="u1" ) @@ -571,7 +571,7 @@ def test_required_privileges(): node1.query(f"DROP TABLE tbl ON CLUSTER 'cluster' SYNC") - expected_error = "necessary to have grant INSERT, CREATE TABLE ON default.tbl2" + expected_error = "necessary to have the grant INSERT, CREATE TABLE ON default.tbl2" assert expected_error in node1.query_and_get_error( f"RESTORE TABLE tbl AS tbl2 ON CLUSTER 'cluster' FROM {backup_name}", user="u1" ) @@ -587,7 +587,7 @@ def test_required_privileges(): node1.query(f"DROP TABLE tbl2 ON CLUSTER 'cluster' SYNC") node1.query("REVOKE ALL FROM u1") - expected_error = "necessary to have grant INSERT, CREATE TABLE ON default.tbl" + expected_error = "necessary to have the grant INSERT, CREATE TABLE ON default.tbl" assert expected_error in node1.query_and_get_error( f"RESTORE ALL ON CLUSTER 'cluster' FROM {backup_name}", user="u1" ) @@ -607,7 +607,7 @@ def test_system_users(): node1.query("CREATE USER u2 SETTINGS allow_backup=false") node1.query("GRANT CLUSTER ON *.* TO u2") - expected_error = "necessary to have grant BACKUP ON system.users" + expected_error = "necessary to have the grant BACKUP ON system.users" assert expected_error in node1.query_and_get_error( f"BACKUP TABLE system.users ON CLUSTER 'cluster' TO {backup_name}", user="u2" ) @@ -619,14 +619,16 @@ def test_system_users(): node1.query("DROP USER u1") - expected_error = "necessary to have grant CREATE USER ON *.*" + expected_error = "necessary to have the grant CREATE USER ON *.*" assert expected_error in node1.query_and_get_error( f"RESTORE TABLE system.users ON CLUSTER 'cluster' FROM {backup_name}", user="u2" ) node1.query("GRANT CREATE USER ON *.* TO u2") - expected_error = "necessary to have grant SELECT ON default.tbl WITH GRANT OPTION" + expected_error = ( + "necessary to have the grant SELECT ON default.tbl WITH GRANT OPTION" + ) assert expected_error in node1.query_and_get_error( f"RESTORE TABLE system.users ON CLUSTER 'cluster' FROM {backup_name}", user="u2" ) diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py index 5c3f06a9d9d..c9f20333654 100644 --- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py +++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py @@ -5,6 +5,7 @@ import time import concurrent from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV, assert_eq_with_retry +import re cluster = ClickHouseCluster(__file__) @@ -110,6 +111,73 @@ def create_and_fill_table(): nodes[i].query(f"INSERT INTO tbl SELECT number FROM numbers(40000000)") +def wait_for_fail_backup(node, backup_id, backup_name): + expected_errors = [ + "Concurrent backups not supported", + f"Backup {backup_name} already exists", + ] + status = node.query( + f"SELECT status FROM system.backups WHERE id == '{backup_id}'" + ).rstrip("\n") + # It is possible that the second backup was picked up first, and then the async backup + if status == "BACKUP_FAILED": + error = node.query( + f"SELECT error FROM system.backups WHERE id == '{backup_id}'" + ).rstrip("\n") + assert any([expected_error in error for expected_error in expected_errors]) + return + elif status == "CREATING_BACKUP": + assert_eq_with_retry( + node, + f"SELECT status FROM system.backups WHERE id = '{backup_id}'", + "BACKUP_FAILED", + sleep_time=2, + retry_count=50, + ) + error = node.query( + f"SELECT error FROM system.backups WHERE id == '{backup_id}'" + ).rstrip("\n") + assert re.search(f"Backup {backup_name} already exists", error) + return + else: + assert False, "Concurrent backups both passed, when one is expected to fail" + + +def wait_for_fail_restore(node, restore_id): + expected_errors = [ + "Concurrent restores not supported", + "Cannot restore the table default.tbl because it already contains some data", + ] + status = node.query( + f"SELECT status FROM system.backups WHERE id == '{restore_id}'" + ).rstrip("\n") + # It is possible that the second backup was picked up first, and then the async backup + if status == "RESTORE_FAILED": + error = node.query( + f"SELECT error FROM system.backups WHERE id == '{restore_id}'" + ).rstrip("\n") + assert any([expected_error in error for expected_error in expected_errors]) + return + elif status == "RESTORING": + assert_eq_with_retry( + node, + f"SELECT status FROM system.backups WHERE id = '{backup_id}'", + "RESTORE_FAILED", + sleep_time=2, + retry_count=50, + ) + error = node.query( + f"SELECT error FROM system.backups WHERE id == '{backup_id}'" + ).rstrip("\n") + assert re.search( + "Cannot restore the table default.tbl because it already contains some data", + error, + ) + return + else: + assert False, "Concurrent restores both passed, when one is expected to fail" + + # All the tests have concurrent backup/restores with same backup names # The same works with different backup names too. Since concurrency # check comes before backup name check, separate tests are not added for different names @@ -137,33 +205,13 @@ def test_concurrent_backups_on_same_node(): f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" ) - if not error: - status = ( - nodes[0] - .query(f"SELECT status FROM system.backups WHERE id == '{id}'") - .rstrip("\n") - ) - # It is possible that the second backup was picked up first, and then the async backup - if status == "BACKUP_FAILED": - return - elif status == "CREATING_BACKUP": - assert_eq_with_retry( - nodes[0], - f"SELECT status FROM system.backups WHERE id = '{id}'", - "BACKUP_FAILED", - sleep_time=2, - retry_count=50, - ) - return - else: - raise Exception( - "Concurrent backups both passed, when one is expected to fail" - ) - expected_errors = [ "Concurrent backups not supported", f"Backup {backup_name} already exists", ] + if not error: + wait_for_fail_backup(nodes[0], id, backup_name) + assert any([expected_error in error for expected_error in expected_errors]) assert_eq_with_retry( @@ -207,33 +255,14 @@ def test_concurrent_backups_on_different_nodes(): f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" ) - if not error: - status = ( - nodes[1] - .query(f"SELECT status FROM system.backups WHERE id == '{id}'") - .rstrip("\n") - ) - # It is possible that the second backup was picked up first, and then the async backup - if status == "BACKUP_FAILED": - return - elif status == "CREATING_BACKUP": - assert_eq_with_retry( - nodes[1], - f"SELECT status FROM system.backups WHERE id = '{id}'", - "BACKUP_FAILED", - sleep_time=2, - retry_count=50, - ) - return - else: - raise Exception( - "Concurrent backups both passed, when one is expected to fail" - ) - expected_errors = [ "Concurrent backups not supported", f"Backup {backup_name} already exists", ] + + if not error: + wait_for_fail_backup(nodes[1], id, backup_name) + assert any([expected_error in error for expected_error in expected_errors]) assert_eq_with_retry( @@ -276,33 +305,14 @@ def test_concurrent_restores_on_same_node(): f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" ) - if not error: - status = ( - nodes[0] - .query(f"SELECT status FROM system.backups WHERE id == '{restore_id}'") - .rstrip("\n") - ) - # It is possible that the second backup was picked up first, and then the async backup - if status == "RESTORE_FAILED": - return - elif status == "RESTORING": - assert_eq_with_retry( - nodes[0], - f"SELECT status FROM system.backups WHERE id == '{restore_id}'", - "RESTORE_FAILED", - sleep_time=2, - retry_count=50, - ) - return - else: - raise Exception( - "Concurrent restores both passed, when one is expected to fail" - ) - expected_errors = [ "Concurrent restores not supported", "Cannot restore the table default.tbl because it already contains some data", ] + + if not error: + wait_for_fail_restore(nodes[0], restore_id) + assert any([expected_error in error for expected_error in expected_errors]) assert_eq_with_retry( @@ -345,33 +355,14 @@ def test_concurrent_restores_on_different_node(): f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" ) - if not error: - status = ( - nodes[0] - .query(f"SELECT status FROM system.backups WHERE id == '{restore_id}'") - .rstrip("\n") - ) - # It is possible that the second backup was picked up first, and then the async backup - if status == "RESTORE_FAILED": - return - elif status == "RESTORING": - assert_eq_with_retry( - nodes[0], - f"SELECT status FROM system.backups WHERE id == '{restore_id}'", - "RESTORE_FAILED", - sleep_time=2, - retry_count=50, - ) - return - else: - raise Exception( - "Concurrent restores both passed, when one is expected to fail" - ) - expected_errors = [ "Concurrent restores not supported", "Cannot restore the table default.tbl because it already contains some data", ] + + if not error: + wait_for_fail_restore(nodes[0], restore_id) + assert any([expected_error in error for expected_error in expected_errors]) assert_eq_with_retry( diff --git a/tests/integration/test_backup_restore_s3/configs/disk_s3.xml b/tests/integration/test_backup_restore_s3/configs/disk_s3.xml index c1fd059bc67..d635e39e13f 100644 --- a/tests/integration/test_backup_restore_s3/configs/disk_s3.xml +++ b/tests/integration/test_backup_restore_s3/configs/disk_s3.xml @@ -21,6 +21,13 @@ minio123 33554432 + + cache + disk_s3 + /tmp/s3_cache/ + 1000000000 + 1 +
@@ -37,11 +44,19 @@ + + +
+ disk_s3_cache +
+
+
default disk_s3 disk_s3_plain + disk_s3_cache diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index 8701bf0d832..f8ec39d240b 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -57,29 +57,42 @@ def get_events_for_query(query_id: str) -> Dict[str, int]: } +def format_settings(settings): + if not settings: + return "" + return "SETTINGS " + ",".join(f"{k}={v}" for k, v in settings.items()) + + def check_backup_and_restore( storage_policy, backup_destination, size=1000, - backup_name=None, + backup_settings=None, + restore_settings=None, + insert_settings=None, + optimize_table=True, ): + optimize_table_query = "OPTIMIZE TABLE data FINAL;" if optimize_table else "" + node.query( f""" DROP TABLE IF EXISTS data SYNC; CREATE TABLE data (key Int, value String, array Array(String)) Engine=MergeTree() ORDER BY tuple() SETTINGS storage_policy='{storage_policy}'; - INSERT INTO data SELECT * FROM generateRandom('key Int, value String, array Array(String)') LIMIT {size}; - OPTIMIZE TABLE data FINAL; + INSERT INTO data SELECT * FROM generateRandom('key Int, value String, array Array(String)') LIMIT {size} {format_settings(insert_settings)}; + {optimize_table_query} """ ) + try: backup_query_id = uuid.uuid4().hex node.query( - f"BACKUP TABLE data TO {backup_destination}", query_id=backup_query_id + f"BACKUP TABLE data TO {backup_destination} {format_settings(backup_settings)}", + query_id=backup_query_id, ) restore_query_id = uuid.uuid4().hex node.query( f""" - RESTORE TABLE data AS data_restored FROM {backup_destination}; + RESTORE TABLE data AS data_restored FROM {backup_destination} {format_settings(restore_settings)}; """, query_id=restore_query_id, ) @@ -114,6 +127,7 @@ def check_system_tables(): expected_disks = ( ("default", "local"), ("disk_s3", "s3"), + ("disk_s3_cache", "s3"), ("disk_s3_other_bucket", "s3"), ("disk_s3_plain", "s3_plain"), ) @@ -184,7 +198,6 @@ def test_backup_to_s3_multipart(): storage_policy, backup_destination, size=1000000, - backup_name=backup_name, ) assert node.contains_in_log( f"copyDataToS3File: Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}" @@ -312,3 +325,77 @@ def test_incremental_backup_append_table_def(): assert node.query("SELECT count(), sum(x) FROM data") == "100\t4950\n" assert "parts_to_throw_insert = 100" in node.query("SHOW CREATE TABLE data") + + +@pytest.mark.parametrize( + "in_cache_initially, allow_backup_read_cache, allow_s3_native_copy", + [ + (False, True, False), + (True, False, False), + (True, True, False), + (True, True, True), + ], +) +def test_backup_with_fs_cache( + in_cache_initially, allow_backup_read_cache, allow_s3_native_copy +): + storage_policy = "policy_s3_cache" + + backup_name = new_backup_name() + backup_destination = ( + f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')" + ) + + insert_settings = { + "enable_filesystem_cache_on_write_operations": int(in_cache_initially) + } + + backup_settings = { + "read_from_filesystem_cache": int(allow_backup_read_cache), + "allow_s3_native_copy": int(allow_s3_native_copy), + } + + restore_settings = {"allow_s3_native_copy": int(allow_s3_native_copy)} + + backup_events, restore_events = check_backup_and_restore( + storage_policy, + backup_destination, + size=10, + insert_settings=insert_settings, + optimize_table=False, + backup_settings=backup_settings, + restore_settings=restore_settings, + ) + + # print(f"backup_events = {backup_events}") + # print(f"restore_events = {restore_events}") + + # BACKUP never updates the filesystem cache but it may read it if `read_from_filesystem_cache_if_exists_otherwise_bypass_cache` allows that. + if allow_backup_read_cache and in_cache_initially: + assert backup_events["CachedReadBufferReadFromCacheBytes"] > 0 + assert not "CachedReadBufferReadFromSourceBytes" in backup_events + elif allow_backup_read_cache: + assert not "CachedReadBufferReadFromCacheBytes" in backup_events + assert backup_events["CachedReadBufferReadFromSourceBytes"] > 0 + else: + assert not "CachedReadBufferReadFromCacheBytes" in backup_events + assert not "CachedReadBufferReadFromSourceBytes" in backup_events + + assert not "CachedReadBufferCacheWriteBytes" in backup_events + assert not "CachedWriteBufferCacheWriteBytes" in backup_events + + # RESTORE doesn't use the filesystem cache during write operations. + # However while attaching parts it may use the cache while reading such files as "columns.txt" or "checksums.txt" or "primary.idx", + # see IMergeTreeDataPart::loadColumnsChecksumsIndexes() + if "CachedReadBufferReadFromSourceBytes" in restore_events: + assert ( + restore_events["CachedReadBufferReadFromSourceBytes"] + == restore_events["CachedReadBufferCacheWriteBytes"] + ) + + assert not "CachedReadBufferReadFromCacheBytes" in restore_events + + # "format_version.txt" is written when a table is created, + # see MergeTreeData::initializeDirectoriesAndFormatVersion() + if "CachedWriteBufferCacheWriteBytes" in restore_events: + assert restore_events["CachedWriteBufferCacheWriteBytes"] <= 1 diff --git a/tests/integration/test_hive_query/__init__.py b/tests/integration/test_backup_restore_storage_policy/__init__.py similarity index 100% rename from tests/integration/test_hive_query/__init__.py rename to tests/integration/test_backup_restore_storage_policy/__init__.py diff --git a/tests/integration/test_backup_restore_storage_policy/configs/storage_config.xml b/tests/integration/test_backup_restore_storage_policy/configs/storage_config.xml new file mode 100644 index 00000000000..de9f68cbe87 --- /dev/null +++ b/tests/integration/test_backup_restore_storage_policy/configs/storage_config.xml @@ -0,0 +1,33 @@ + + + + + local + /var/lib/disks/one/ + + + local + /var/lib/disks/two/ + + + + + + + one + + + + + + + two + + + + + + + /backups + + diff --git a/tests/integration/test_backup_restore_storage_policy/test.py b/tests/integration/test_backup_restore_storage_policy/test.py new file mode 100644 index 00000000000..4e9d309a220 --- /dev/null +++ b/tests/integration/test_backup_restore_storage_policy/test.py @@ -0,0 +1,76 @@ +import pytest +from helpers.cluster import ClickHouseCluster + + +backup_id_counter = 0 + +cluster = ClickHouseCluster(__file__) + +instance = cluster.add_instance( + "instance", + main_configs=["configs/storage_config.xml"], +) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def cleanup_after_test(): + try: + yield + finally: + instance.query("DROP DATABASE IF EXISTS test") + + +def new_backup_name(): + global backup_id_counter + backup_id_counter += 1 + return f"File('/backups/{backup_id_counter}/')" + + +def create_table_backup(backup_name, storage_policy=None): + instance.query("CREATE DATABASE test") + create_query = "CREATE TABLE test.table(x UInt32) ENGINE=MergeTree ORDER BY x" + if storage_policy is not None: + create_query += f" SETTINGS storage_policy = '{storage_policy}'" + instance.query(create_query) + instance.query(f"INSERT INTO test.table SELECT number FROM numbers(10)") + instance.query(f"BACKUP TABLE test.table TO {backup_name}") + instance.query("DROP TABLE test.table SYNC") + + +def restore_table(backup_name, storage_policy=None): + restore_query = f"RESTORE TABLE test.table FROM {backup_name}" + if storage_policy is not None: + restore_query += f" SETTINGS storage_policy = '{storage_policy}'" + instance.query(restore_query) + + +@pytest.mark.parametrize( + "origin_policy, restore_policy, expected_policy", + [ + (None, "", "default"), + (None, None, "default"), + (None, "policy1", "policy1"), + ("policy1", "policy1", "policy1"), + ("policy1", "policy2", "policy2"), + ("policy1", "", "default"), + ("policy1", None, "policy1"), + ], +) +def test_storage_policies(origin_policy, restore_policy, expected_policy): + backup_name = new_backup_name() + create_table_backup(backup_name, origin_policy) + restore_table(backup_name, restore_policy) + + assert ( + instance.query("SELECT storage_policy FROM system.tables WHERE name='table'") + == f"{expected_policy}\n" + ) diff --git a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/__init__.py b/tests/integration/test_backup_s3_storage_class/__init__.py similarity index 100% rename from tests/integration/test_replicated_merge_tree_hdfs_zero_copy/__init__.py rename to tests/integration/test_backup_s3_storage_class/__init__.py diff --git a/tests/integration/test_backup_s3_storage_class/test.py b/tests/integration/test_backup_s3_storage_class/test.py new file mode 100644 index 00000000000..2b11f20afc6 --- /dev/null +++ b/tests/integration/test_backup_s3_storage_class/test.py @@ -0,0 +1,47 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + stay_alive=True, + with_minio=True, +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_backup_s3_storage_class(started_cluster): + node.query( + """ + CREATE TABLE test_s3_storage_class + ( + `id` UInt64, + `value` String + ) + ENGINE = MergeTree + ORDER BY id; + """, + ) + node.query( + """ + INSERT INTO test_s3_storage_class VALUES (1, 'a'); + """, + ) + result = node.query( + """ + BACKUP TABLE test_s3_storage_class TO S3('http://minio1:9001/root/data', 'minio', 'minio123') + SETTINGS s3_storage_class='STANDARD'; + """ + ) + + minio = cluster.minio_client + lst = list(minio.list_objects(cluster.minio_bucket, "data/.backup")) + assert lst[0].storage_class == "STANDARD" diff --git a/tests/integration/test_broken_detached_part_clean_up/test.py b/tests/integration/test_broken_detached_part_clean_up/test.py index 9a70ebe0d48..bdf993ddedf 100644 --- a/tests/integration/test_broken_detached_part_clean_up/test.py +++ b/tests/integration/test_broken_detached_part_clean_up/test.py @@ -57,27 +57,30 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix): ] ) - node.exec_in_container(["mkdir", f"{path_to_detached}../unexpected_all_42_1337_5"]) - node.exec_in_container( - [ - "touch", - "-t", - "1312031429.30", - f"{path_to_detached}../unexpected_all_42_1337_5", - ] - ) - result = node.exec_in_container( - ["stat", f"{path_to_detached}../unexpected_all_42_1337_5"] - ) - print(result) - assert "Modify: 2013-12-03" in result - node.exec_in_container( - [ - "mv", - f"{path_to_detached}../unexpected_all_42_1337_5", - f"{path_to_detached}unexpected_all_42_1337_5", - ] - ) + for name in [ + "unexpected_all_42_1337_5", + "deleting_all_123_456_7", + "covered-by-broken_all_12_34_5", + ]: + node.exec_in_container(["mkdir", f"{path_to_detached}../{name}"]) + node.exec_in_container( + [ + "touch", + "-t", + "1312031429.30", + f"{path_to_detached}../{name}", + ] + ) + result = node.exec_in_container(["stat", f"{path_to_detached}../{name}"]) + print(result) + assert "Modify: 2013-12-03" in result + node.exec_in_container( + [ + "mv", + f"{path_to_detached}../{name}", + f"{path_to_detached}{name}", + ] + ) result = node.query( f"CHECK TABLE {table}", settings={"check_query_single_value_result": 0} @@ -87,17 +90,20 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix): node.query(f"DETACH TABLE {table}") node.query(f"ATTACH TABLE {table}") - result = node.exec_in_container(["ls", path_to_detached]) - print(result) - assert f"{expect_broken_prefix}_all_3_3_0" in result - assert "all_1_1_0" in result - assert "trash" in result - assert "broken_all_fake" in result - assert "unexpected_all_42_1337_5" in result - - time.sleep(15) - assert node.contains_in_log( - "Removed broken detached part unexpected_all_42_1337_5 due to a timeout" + node.wait_for_log_line( + "Removing detached part deleting_all_123_456_7", + timeout=90, + look_behind_lines=1000000, + ) + node.wait_for_log_line( + f"Removed broken detached part {expect_broken_prefix}_all_3_3_0 due to a timeout", + timeout=10, + look_behind_lines=1000000, + ) + node.wait_for_log_line( + "Removed broken detached part unexpected_all_42_1337_5 due to a timeout", + timeout=10, + look_behind_lines=1000000, ) result = node.exec_in_container(["ls", path_to_detached]) @@ -106,7 +112,16 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix): assert "all_1_1_0" in result assert "trash" in result assert "broken_all_fake" in result + assert "covered-by-broken_all_12_34_5" in result assert "unexpected_all_42_1337_5" not in result + assert "deleting_all_123_456_7" not in result + + node.query( + f"ALTER TABLE {table} DROP DETACHED PART 'covered-by-broken_all_12_34_5'", + settings={"allow_drop_detached": 1}, + ) + result = node.exec_in_container(["ls", path_to_detached]) + assert "covered-by-broken_all_12_34_5" not in result node.query(f"DROP TABLE {table} SYNC") diff --git a/tests/integration/test_cluster_all_replicas/configs/remote_servers.xml b/tests/integration/test_cluster_all_replicas/configs/remote_servers.xml index 8791c5f6ee8..d4f97fe2f4c 100644 --- a/tests/integration/test_cluster_all_replicas/configs/remote_servers.xml +++ b/tests/integration/test_cluster_all_replicas/configs/remote_servers.xml @@ -1,6 +1,6 @@ - + node1 @@ -11,6 +11,6 @@ 9000 - + diff --git a/tests/integration/test_cluster_all_replicas/test.py b/tests/integration/test_cluster_all_replicas/test.py index 445eef64fcb..5ad96468950 100644 --- a/tests/integration/test_cluster_all_replicas/test.py +++ b/tests/integration/test_cluster_all_replicas/test.py @@ -4,12 +4,8 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance( - "node1", main_configs=["configs/remote_servers.xml"], with_zookeeper=True -) -node2 = cluster.add_instance( - "node2", main_configs=["configs/remote_servers.xml"], with_zookeeper=True -) +node1 = cluster.add_instance("node1", main_configs=["configs/remote_servers.xml"]) +node2 = cluster.add_instance("node2", main_configs=["configs/remote_servers.xml"]) @pytest.fixture(scope="module") @@ -24,11 +20,13 @@ def start_cluster(): def test_remote(start_cluster): assert ( node1.query( - """SELECT hostName() FROM clusterAllReplicas("two_shards", system.one)""" + """SELECT hostName() FROM clusterAllReplicas("one_shard_two_nodes", system.one)""" ) == "node1\nnode2\n" ) assert ( - node1.query("""SELECT hostName() FROM cluster("two_shards", system.one)""") + node1.query( + """SELECT hostName() FROM cluster("one_shard_two_nodes", system.one)""" + ) == "node1\n" ) diff --git a/tests/integration/test_cluster_copier/test_two_nodes.py b/tests/integration/test_cluster_copier/test_two_nodes.py index 2b6fcf6cac2..1bd3561f24f 100644 --- a/tests/integration/test_cluster_copier/test_two_nodes.py +++ b/tests/integration/test_cluster_copier/test_two_nodes.py @@ -595,24 +595,3 @@ def execute_task(started_cluster, task, cmd_options): task.check() finally: zk.delete(task.zk_task_path, recursive=True) - - -# Tests -@pytest.mark.skip(reason="Too flaky :(") -def test_different_schema(started_cluster): - execute_task(started_cluster, TaskWithDifferentSchema(started_cluster), []) - - -@pytest.mark.skip(reason="Too flaky :(") -def test_ttl_columns(started_cluster): - execute_task(started_cluster, TaskTTL(started_cluster), []) - - -@pytest.mark.skip(reason="Too flaky :(") -def test_skip_index(started_cluster): - execute_task(started_cluster, TaskSkipIndex(started_cluster), []) - - -@pytest.mark.skip(reason="Too flaky :(") -def test_ttl_move_to_volume(started_cluster): - execute_task(started_cluster, TaskTTLMoveToVolume(started_cluster), []) diff --git a/tests/integration/test_compression_codec_read/test.py b/tests/integration/test_compression_codec_read/test.py index 38cd61e241d..b39e5147d38 100644 --- a/tests/integration/test_compression_codec_read/test.py +++ b/tests/integration/test_compression_codec_read/test.py @@ -11,6 +11,7 @@ node1 = cluster.add_instance( tag="20.8.11.17", with_installed_binary=True, stay_alive=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_concurrent_ttl_merges/test.py b/tests/integration/test_concurrent_ttl_merges/test.py index 96264e53522..3a3981d65ba 100644 --- a/tests/integration/test_concurrent_ttl_merges/test.py +++ b/tests/integration/test_concurrent_ttl_merges/test.py @@ -103,10 +103,6 @@ def test_no_ttl_merges_in_busy_pool(started_cluster): rows_count.append(int(node1.query("SELECT count() FROM test_ttl").strip())) time.sleep(0.5) - # at least several seconds we didn't run any TTL merges and rows count equal - # to the original value - assert sum([1 for count in rows_count if count == 30]) > 4 - assert_eq_with_retry(node1, "SELECT COUNT() FROM test_ttl", "0") node1.query("DROP TABLE test_ttl SYNC") diff --git a/tests/integration/test_crash_log/test.py b/tests/integration/test_crash_log/test.py index 1b7e7f38242..a5b82039a84 100644 --- a/tests/integration/test_crash_log/test.py +++ b/tests/integration/test_crash_log/test.py @@ -30,7 +30,7 @@ def send_signal(started_node, signal): def wait_for_clickhouse_stop(started_node): result = None - for attempt in range(60): + for attempt in range(120): time.sleep(1) pid = started_node.get_process_pid("clickhouse") if pid is None: diff --git a/tests/integration/test_default_compression_codec/test.py b/tests/integration/test_default_compression_codec/test.py index c7c30f5eea4..c8b75ea9751 100644 --- a/tests/integration/test_default_compression_codec/test.py +++ b/tests/integration/test_default_compression_codec/test.py @@ -24,6 +24,7 @@ node3 = cluster.add_instance( tag="20.3.16", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) node4 = cluster.add_instance("node4") diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py deleted file mode 100644 index 90287e19bd0..00000000000 --- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py +++ /dev/null @@ -1,99 +0,0 @@ -import os -import math -import pytest - -# FIXME This test is too flaky -# https://github.com/ClickHouse/ClickHouse/issues/33006 - -pytestmark = pytest.mark.skip - -from .common import * - -from helpers.cluster import ClickHouseCluster -from helpers.dictionary import Field, Row, Dictionary, DictionaryStructure, Layout -from helpers.external_sources import SourceCassandra - -SOURCE = None -cluster = None -node = None -simple_tester = None -complex_tester = None -ranged_tester = None -test_name = "cassandra" - - -def setup_module(module): - global cluster - global node - global simple_tester - global complex_tester - global ranged_tester - - cluster = ClickHouseCluster(__file__) - - SOURCE = SourceCassandra( - "Cassandra", - None, - cluster.cassandra_port, - cluster.cassandra_host, - cluster.cassandra_port, - "", - "", - ) - - simple_tester = SimpleLayoutTester(test_name) - simple_tester.cleanup() - simple_tester.create_dictionaries(SOURCE) - - complex_tester = ComplexLayoutTester(test_name) - complex_tester.create_dictionaries(SOURCE) - - ranged_tester = RangedLayoutTester(test_name) - ranged_tester.create_dictionaries(SOURCE) - # Since that all .xml configs were created - - main_configs = [] - main_configs.append(os.path.join("configs", "disable_ssl_verification.xml")) - - dictionaries = simple_tester.list_dictionaries() - - node = cluster.add_instance( - "cass_node", - main_configs=main_configs, - dictionaries=dictionaries, - with_cassandra=True, - ) - - -def teardown_module(module): - simple_tester.cleanup() - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - - simple_tester.prepare(cluster) - complex_tester.prepare(cluster) - ranged_tester.prepare(cluster) - - yield cluster - - finally: - cluster.shutdown() - - -@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_SIMPLE)) -def test_simple(started_cluster, layout_name): - simple_tester.execute(layout_name, node) - - -@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_COMPLEX)) -def test_complex(started_cluster, layout_name): - complex_tester.execute(layout_name, node) - - -@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_RANGED)) -def test_ranged(started_cluster, layout_name): - ranged_tester.execute(layout_name, node) diff --git a/tests/integration/test_dictionaries_complex_key_cache_string/test.py b/tests/integration/test_dictionaries_complex_key_cache_string/test.py index ae9cd4e7c91..bca7122a2a8 100644 --- a/tests/integration/test_dictionaries_complex_key_cache_string/test.py +++ b/tests/integration/test_dictionaries_complex_key_cache_string/test.py @@ -27,71 +27,3 @@ def started_cluster(): yield cluster finally: cluster.shutdown() - - -@pytest.mark.skip(reason="SSD cache test can run on disk only") -@pytest.mark.parametrize("type", ["memory", "ssd"]) -def test_memory_consumption(started_cluster, type): - node = started_cluster.instances[f"node_{type}"] - node.query( - "insert into radars_table select toString(rand() % 5000), '{0}', '{0}' from numbers(1000)".format( - "w" * 8 - ) - ) - node.query( - "insert into radars_table select toString(rand() % 5000), '{0}', '{0}' from numbers(1000)".format( - "x" * 16 - ) - ) - node.query( - "insert into radars_table select toString(rand() % 5000), '{0}', '{0}' from numbers(1000)".format( - "y" * 32 - ) - ) - node.query( - "insert into radars_table select toString(rand() % 5000), '{0}', '{0}' from numbers(1000)".format( - "z" * 64 - ) - ) - - # Fill dictionary - node.query( - "select dictGetString('radars', 'client_id', tuple(toString(number))) from numbers(0, 5000)" - ) - - allocated_first = int( - node.query( - "select bytes_allocated from system.dictionaries where name = 'radars'" - ).strip() - ) - - alloc_array = [] - for i in range(5): - node.query( - "select dictGetString('radars', 'client_id', tuple(toString(number))) from numbers(0, 5000)" - ) - - allocated = int( - node.query( - "select bytes_allocated from system.dictionaries where name = 'radars'" - ).strip() - ) - alloc_array.append(allocated) - - # size doesn't grow - assert all(allocated_first >= a for a in alloc_array) - - for i in range(5): - node.query( - "select dictGetString('radars', 'client_id', tuple(toString(number))) from numbers(0, 5000)" - ) - - allocated = int( - node.query( - "select bytes_allocated from system.dictionaries where name = 'radars'" - ).strip() - ) - alloc_array.append(allocated) - - # size doesn't grow - assert all(allocated_first >= a for a in alloc_array) diff --git a/tests/integration/test_dictionaries_update_and_reload/test.py b/tests/integration/test_dictionaries_update_and_reload/test.py index a973b697d0d..99d08f1b6ea 100644 --- a/tests/integration/test_dictionaries_update_and_reload/test.py +++ b/tests/integration/test_dictionaries_update_and_reload/test.py @@ -281,7 +281,7 @@ def test_reload_after_fail_in_cache_dictionary(started_cluster): query_and_get_error = instance.query_and_get_error # Can't get a value from the cache dictionary because the source (table `test.xypairs`) doesn't respond. - expected_error = "Table test.xypairs doesn't exist" + expected_error = "Table test.xypairs does not exist" update_error = "Could not update cache dictionary cache_xypairs now" assert expected_error in query_and_get_error( "SELECT dictGetUInt64('cache_xypairs', 'y', toUInt64(1))" diff --git a/tests/integration/test_dictionary_allow_read_expired_keys/test_default_reading.py b/tests/integration/test_dictionary_allow_read_expired_keys/test_default_reading.py index 85c45d5df3c..921fb4e4154 100644 --- a/tests/integration/test_dictionary_allow_read_expired_keys/test_default_reading.py +++ b/tests/integration/test_dictionary_allow_read_expired_keys/test_default_reading.py @@ -38,7 +38,6 @@ def started_cluster(): cluster.shutdown() -# @pytest.mark.skip(reason="debugging") def test_default_reading(started_cluster): assert None != dictionary_node.get_process_pid( "clickhouse" diff --git a/tests/integration/test_dictionary_allow_read_expired_keys/test_default_string.py b/tests/integration/test_dictionary_allow_read_expired_keys/test_default_string.py index 92d681698bc..6eedf63f95c 100644 --- a/tests/integration/test_dictionary_allow_read_expired_keys/test_default_string.py +++ b/tests/integration/test_dictionary_allow_read_expired_keys/test_default_string.py @@ -53,7 +53,6 @@ def started_cluster(): cluster.shutdown() -# @pytest.mark.skip(reason="debugging") def test_return_real_values(started_cluster): assert None != dictionary_node.get_process_pid( "clickhouse" diff --git a/tests/integration/test_dictionary_allow_read_expired_keys/test_dict_get.py b/tests/integration/test_dictionary_allow_read_expired_keys/test_dict_get.py index 05f638ec337..8e45af44640 100644 --- a/tests/integration/test_dictionary_allow_read_expired_keys/test_dict_get.py +++ b/tests/integration/test_dictionary_allow_read_expired_keys/test_dict_get.py @@ -38,7 +38,6 @@ def started_cluster(): cluster.shutdown() -# @pytest.mark.skip(reason="debugging") def test_simple_dict_get(started_cluster): assert None != dictionary_node.get_process_pid( "clickhouse" diff --git a/tests/integration/test_dictionary_allow_read_expired_keys/test_dict_get_or_default.py b/tests/integration/test_dictionary_allow_read_expired_keys/test_dict_get_or_default.py index 1da8fd3325a..8d6242f4711 100644 --- a/tests/integration/test_dictionary_allow_read_expired_keys/test_dict_get_or_default.py +++ b/tests/integration/test_dictionary_allow_read_expired_keys/test_dict_get_or_default.py @@ -38,7 +38,6 @@ def started_cluster(): cluster.shutdown() -# @pytest.mark.skip(reason="debugging") def test_simple_dict_get_or_default(started_cluster): assert None != dictionary_node.get_process_pid( "clickhouse" diff --git a/tests/integration/test_dictionary_custom_settings/test.py b/tests/integration/test_dictionary_custom_settings/test.py index 715219ceb87..eb394da8bb6 100644 --- a/tests/integration/test_dictionary_custom_settings/test.py +++ b/tests/integration/test_dictionary_custom_settings/test.py @@ -2,6 +2,8 @@ import os import pytest from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry + DICTIONARY_FILES = [ "configs/dictionaries/FileSourceConfig.xml", @@ -78,5 +80,11 @@ def test_work(start_cluster): assert caught_exception.find("Limit for result exceeded") != -1 - assert query("SELECT dictGetString('test_http', 'first', toUInt64(1))") == "\\'a\n" - assert query("SELECT dictGetString('test_http', 'second', toUInt64(1))") == '"b\n' + # It is possible that the HTTP server takes long time to start accepting connections + + assert_eq_with_retry( + instance, "SELECT dictGetString('test_http', 'first', toUInt64(1))", "\\'a\n" + ) + assert_eq_with_retry( + instance, "SELECT dictGetString('test_http', 'second', toUInt64(1))", '"b\n' + ) diff --git a/tests/integration/test_disabled_access_control_improvements/test_row_policy.py b/tests/integration/test_disabled_access_control_improvements/test_row_policy.py index 509b4de1a37..b620e88e7eb 100644 --- a/tests/integration/test_disabled_access_control_improvements/test_row_policy.py +++ b/tests/integration/test_disabled_access_control_improvements/test_row_policy.py @@ -76,7 +76,7 @@ def test_introspection(): "mydb", "filtered_table1", "6068883a-0e9d-f802-7e22-0144f8e66d3c", - "users.xml", + "users_xml", "1", 0, 0, @@ -89,7 +89,7 @@ def test_introspection(): "mydb", "filtered_table2", "c019e957-c60b-d54e-cc52-7c90dac5fb01", - "users.xml", + "users_xml", "1", 0, 0, @@ -102,7 +102,7 @@ def test_introspection(): "mydb", "filtered_table3", "4cb080d0-44e8-dbef-6026-346655143628", - "users.xml", + "users_xml", "1", 0, 0, @@ -115,7 +115,7 @@ def test_introspection(): "mydb", "local", "5b23c389-7e18-06bf-a6bc-dd1afbbc0a97", - "users.xml", + "users_xml", "a = 1", 0, 0, @@ -128,7 +128,7 @@ def test_introspection(): "mydb", "filtered_table1", "9e8a8f62-4965-2b5e-8599-57c7b99b3549", - "users.xml", + "users_xml", "a = 1", 0, 0, @@ -141,7 +141,7 @@ def test_introspection(): "mydb", "filtered_table2", "cffae79d-b9bf-a2ef-b798-019c18470b25", - "users.xml", + "users_xml", "a + b < 1 or c - d > 5", 0, 0, @@ -154,7 +154,7 @@ def test_introspection(): "mydb", "filtered_table3", "12fc5cef-e3da-3940-ec79-d8be3911f42b", - "users.xml", + "users_xml", "c = 1", 0, 0, @@ -167,7 +167,7 @@ def test_introspection(): "mydb", "local", "cdacaeb5-1d97-f99d-2bb0-4574f290629c", - "users.xml", + "users_xml", "1", 0, 0, diff --git a/tests/integration/test_disabled_access_control_improvements/test_select_from_system_tables.py b/tests/integration/test_disabled_access_control_improvements/test_select_from_system_tables.py index 5d760c9fc2c..894464fd813 100644 --- a/tests/integration/test_disabled_access_control_improvements/test_select_from_system_tables.py +++ b/tests/integration/test_disabled_access_control_improvements/test_select_from_system_tables.py @@ -43,7 +43,7 @@ def test_system_db(): assert node.query("SELECT count() FROM system.tables WHERE name='table2'") == "1\n" assert node.query("SELECT count()>0 FROM system.settings", user="another") == "1\n" - expected_error = "necessary to have grant SHOW USERS ON *.*" + expected_error = "necessary to have the grant SHOW USERS ON *.*" assert expected_error in node.query_and_get_error( "SELECT count()>0 FROM system.users", user="another" ) @@ -62,7 +62,7 @@ def test_system_db(): ) assert node.query("SELECT count()>0 FROM system.settings", user="sqluser") == "1\n" - expected_error = "necessary to have grant SHOW USERS ON *.*" + expected_error = "necessary to have the grant SHOW USERS ON *.*" assert expected_error in node.query_and_get_error( "SELECT count()>0 FROM system.users", user="sqluser" ) diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py index d62adfb3343..bc60e5c865e 100644 --- a/tests/integration/test_disk_over_web_server/test.py +++ b/tests/integration/test_disk_over_web_server/test.py @@ -36,6 +36,7 @@ def cluster(): with_installed_binary=True, image="clickhouse/clickhouse-server", tag="22.8.14.53", + allow_analyzer=False, ) cluster.start() diff --git a/tests/integration/test_distributed_backward_compatability/test.py b/tests/integration/test_distributed_backward_compatability/test.py index cb51142d249..c48a7ad1fa1 100644 --- a/tests/integration/test_distributed_backward_compatability/test.py +++ b/tests/integration/test_distributed_backward_compatability/test.py @@ -11,6 +11,7 @@ node_old = cluster.add_instance( tag="20.8.9.6", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) node_new = cluster.add_instance( "node2", diff --git a/tests/integration/test_distributed_insert_backward_compatibility/test.py b/tests/integration/test_distributed_insert_backward_compatibility/test.py index ad61a2ad6f5..1e566d5e2da 100644 --- a/tests/integration/test_distributed_insert_backward_compatibility/test.py +++ b/tests/integration/test_distributed_insert_backward_compatibility/test.py @@ -14,6 +14,7 @@ node_dist = cluster.add_instance( tag="21.11.9.1", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 6dd25789f36..1aeaddcf3c5 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -33,6 +33,7 @@ backward = make_instance( # version without DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 tag="23.2.3", with_installed_binary=True, + allow_analyzer=False, ) users = pytest.mark.parametrize( @@ -109,10 +110,6 @@ def start_cluster(): cluster.shutdown() -def query_with_id(node, id_, query, **kwargs): - return node.query("WITH '{}' AS __id {}".format(id_, query), **kwargs) - - # @return -- [user, initial_user] def get_query_user_info(node, query_pattern): node.query("SYSTEM FLUSH LOGS") @@ -333,7 +330,7 @@ def test_secure_disagree_insert(): @users def test_user_insecure_cluster(user, password): id_ = "query-dist_insecure-" + user - query_with_id(n1, id_, "SELECT * FROM dist_insecure", user=user, password=password) + n1.query(f"SELECT *, '{id_}' FROM dist_insecure", user=user, password=password) assert get_query_user_info(n1, id_) == [ user, user, @@ -344,7 +341,7 @@ def test_user_insecure_cluster(user, password): @users def test_user_secure_cluster(user, password): id_ = "query-dist_secure-" + user - query_with_id(n1, id_, "SELECT * FROM dist_secure", user=user, password=password) + n1.query(f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password) assert get_query_user_info(n1, id_) == [user, user] assert get_query_user_info(n2, id_) == [user, user] @@ -352,16 +349,14 @@ def test_user_secure_cluster(user, password): @users def test_per_user_inline_settings_insecure_cluster(user, password): id_ = "query-ddl-settings-dist_insecure-" + user - query_with_id( - n1, - id_, - """ - SELECT * FROM dist_insecure - SETTINGS - prefer_localhost_replica=0, - max_memory_usage_for_user=1e9, - max_untracked_memory=0 - """, + n1.query( + f""" + SELECT *, '{id_}' FROM dist_insecure + SETTINGS + prefer_localhost_replica=0, + max_memory_usage_for_user=1e9, + max_untracked_memory=0 + """, user=user, password=password, ) @@ -371,16 +366,14 @@ def test_per_user_inline_settings_insecure_cluster(user, password): @users def test_per_user_inline_settings_secure_cluster(user, password): id_ = "query-ddl-settings-dist_secure-" + user - query_with_id( - n1, - id_, - """ - SELECT * FROM dist_secure - SETTINGS - prefer_localhost_replica=0, - max_memory_usage_for_user=1e9, - max_untracked_memory=0 - """, + n1.query( + f""" + SELECT *, '{id_}' FROM dist_secure + SETTINGS + prefer_localhost_replica=0, + max_memory_usage_for_user=1e9, + max_untracked_memory=0 + """, user=user, password=password, ) @@ -392,10 +385,8 @@ def test_per_user_inline_settings_secure_cluster(user, password): @users def test_per_user_protocol_settings_insecure_cluster(user, password): id_ = "query-protocol-settings-dist_insecure-" + user - query_with_id( - n1, - id_, - "SELECT * FROM dist_insecure", + n1.query( + f"SELECT *, '{id_}' FROM dist_insecure", user=user, password=password, settings={ @@ -410,10 +401,8 @@ def test_per_user_protocol_settings_insecure_cluster(user, password): @users def test_per_user_protocol_settings_secure_cluster(user, password): id_ = "query-protocol-settings-dist_secure-" + user - query_with_id( - n1, - id_, - "SELECT * FROM dist_secure", + n1.query( + f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password, settings={ @@ -430,8 +419,8 @@ def test_per_user_protocol_settings_secure_cluster(user, password): @users def test_user_secure_cluster_with_backward(user, password): id_ = "with-backward-query-dist_secure-" + user - query_with_id( - n1, id_, "SELECT * FROM dist_secure_backward", user=user, password=password + n1.query( + f"SELECT *, '{id_}' FROM dist_secure_backward", user=user, password=password ) assert get_query_user_info(n1, id_) == [user, user] assert get_query_user_info(backward, id_) == [user, user] @@ -440,13 +429,7 @@ def test_user_secure_cluster_with_backward(user, password): @users def test_user_secure_cluster_from_backward(user, password): id_ = "from-backward-query-dist_secure-" + user - query_with_id( - backward, - id_, - "SELECT * FROM dist_secure_backward", - user=user, - password=password, - ) + backward.query(f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password) assert get_query_user_info(n1, id_) == [user, user] assert get_query_user_info(backward, id_) == [user, user] diff --git a/tests/integration/test_drop_is_lock_free/test.py b/tests/integration/test_drop_is_lock_free/test.py index 9f595800bea..61d52a1d9b1 100644 --- a/tests/integration/test_drop_is_lock_free/test.py +++ b/tests/integration/test_drop_is_lock_free/test.py @@ -125,7 +125,7 @@ def test_query_is_lock_free(lock_free_query, exclusive_table): SELECT count() FROM {exclusive_table}; """ ) - assert f"Table default.{exclusive_table} doesn't exist" in result + assert f"Table default.{exclusive_table} does not exist" in result else: assert 0 == int( node.query( diff --git a/tests/integration/test_drop_replica/test.py b/tests/integration/test_drop_replica/test.py index 0941e664982..e0928c6ab08 100644 --- a/tests/integration/test_drop_replica/test.py +++ b/tests/integration/test_drop_replica/test.py @@ -159,11 +159,11 @@ def test_drop_replica(start_cluster): for i in range(1, 5): node_1_1.query("DETACH DATABASE test{}".format(i)) - assert "doesn't exist" in node_1_3.query_and_get_error( + assert "does not exist" in node_1_3.query_and_get_error( "SYSTEM DROP REPLICA 'node_1_1' FROM TABLE test.test_table" ) - assert "doesn't exist" in node_1_3.query_and_get_error( + assert "does not exist" in node_1_3.query_and_get_error( "SYSTEM DROP REPLICA 'node_1_1' FROM DATABASE test1" ) diff --git a/tests/integration/test_executable_user_defined_function/functions/test_function_config.xml b/tests/integration/test_executable_user_defined_function/functions/test_function_config.xml index d35cb173dd0..99efd99ab31 100644 --- a/tests/integration/test_executable_user_defined_function/functions/test_function_config.xml +++ b/tests/integration/test_executable_user_defined_function/functions/test_function_config.xml @@ -322,4 +322,76 @@ input_parameter.py {test_parameter:UInt64} + + executable + test_function_always_error_throw_python + String + + UInt64 + + TabSeparated + input_always_error.py + throw + + + + executable + test_function_always_error_log_python + String + + UInt64 + + TabSeparated + input_always_error.py + log + + + + executable + test_function_always_error_log_first_python + String + + UInt64 + + TabSeparated + input_log_error.py + log_first + + + + executable + test_function_always_error_log_last_python + String + + UInt64 + + TabSeparated + input_log_error.py + log_last + + + + executable + test_function_exit_error_ignore_python + String + + UInt64 + + TabSeparated + input_exit_error.py + 0 + + + + executable + test_function_exit_error_fail_python + String + + UInt64 + + TabSeparated + input_exit_error.py + + +
diff --git a/tests/integration/test_executable_user_defined_function/test.py b/tests/integration/test_executable_user_defined_function/test.py index 1f4e14470c3..ccc06a7b43a 100644 --- a/tests/integration/test_executable_user_defined_function/test.py +++ b/tests/integration/test_executable_user_defined_function/test.py @@ -1,6 +1,7 @@ import os import sys import time +import uuid import pytest @@ -285,3 +286,51 @@ def test_executable_function_parameter_python(started_cluster): node.query("SELECT test_function_parameter_python(2)(toUInt64(1))") == "Parameter 2 key 1\n" ) + + +def test_executable_function_always_error_python(started_cluster): + skip_test_msan(node) + try: + node.query("SELECT test_function_always_error_throw_python(1)") + assert False, "Exception have to be thrown" + except Exception as ex: + assert "DB::Exception: Executable generates stderr: Fake error" in str(ex) + + query_id = uuid.uuid4().hex + assert ( + node.query("SELECT test_function_always_error_log_python(1)", query_id=query_id) + == "Key 1\n" + ) + assert node.contains_in_log( + f"{{{query_id}}} TimeoutReadBufferFromFileDescriptor: Executable generates stderr: Fake error" + ) + + query_id = uuid.uuid4().hex + assert ( + node.query( + "SELECT test_function_always_error_log_first_python(1)", query_id=query_id + ) + == "Key 1\n" + ) + assert node.contains_in_log( + f"{{{query_id}}} TimeoutReadBufferFromFileDescriptor: Executable generates stderr at the beginning: {'a' * (3 * 1024)}{'b' * 1024}\n" + ) + + query_id = uuid.uuid4().hex + assert ( + node.query( + "SELECT test_function_always_error_log_last_python(1)", query_id=query_id + ) + == "Key 1\n" + ) + assert node.contains_in_log( + f"{{{query_id}}} TimeoutReadBufferFromFileDescriptor: Executable generates stderr at the end: {'b' * 1024}{'c' * (3 * 1024)}\n" + ) + + assert node.query("SELECT test_function_exit_error_ignore_python(1)") == "Key 1\n" + + try: + node.query("SELECT test_function_exit_error_fail_python(1)") + assert False, "Exception have to be thrown" + except Exception as ex: + assert "DB::Exception: Child process was exited with return code 1" in str(ex) diff --git a/tests/integration/test_executable_user_defined_function/user_scripts/input_always_error.py b/tests/integration/test_executable_user_defined_function/user_scripts/input_always_error.py new file mode 100755 index 00000000000..f50c1235e3c --- /dev/null +++ b/tests/integration/test_executable_user_defined_function/user_scripts/input_always_error.py @@ -0,0 +1,10 @@ +#!/usr/bin/python3 + +import sys + +if __name__ == "__main__": + print("Fake error", file=sys.stderr) + sys.stderr.flush() + for line in sys.stdin: + print("Key " + line, end="") + sys.stdout.flush() diff --git a/tests/integration/test_executable_user_defined_function/user_scripts/input_exit_error.py b/tests/integration/test_executable_user_defined_function/user_scripts/input_exit_error.py new file mode 100755 index 00000000000..036d69a8297 --- /dev/null +++ b/tests/integration/test_executable_user_defined_function/user_scripts/input_exit_error.py @@ -0,0 +1,10 @@ +#!/usr/bin/python3 + +import sys + +if __name__ == "__main__": + for line in sys.stdin: + print("Key " + line, end="") + sys.stdout.flush() + + sys.exit(1) diff --git a/tests/integration/test_executable_user_defined_function/user_scripts/input_log_error.py b/tests/integration/test_executable_user_defined_function/user_scripts/input_log_error.py new file mode 100755 index 00000000000..d622f44f75d --- /dev/null +++ b/tests/integration/test_executable_user_defined_function/user_scripts/input_log_error.py @@ -0,0 +1,10 @@ +#!/usr/bin/python3 + +import sys + +if __name__ == "__main__": + print(f"{'a' * (3 * 1024)}{'b' * (3 * 1024)}{'c' * (3 * 1024)}", file=sys.stderr) + sys.stderr.flush() + for line in sys.stdin: + print("Key " + line, end="") + sys.stdout.flush() diff --git a/tests/integration/test_extreme_deduplication/test.py b/tests/integration/test_extreme_deduplication/test.py index 03451933171..3632369154a 100644 --- a/tests/integration/test_extreme_deduplication/test.py +++ b/tests/integration/test_extreme_deduplication/test.py @@ -71,73 +71,3 @@ def test_deduplication_window_in_seconds(started_cluster): assert TSV.toMat(node.query("SELECT count() FROM simple"))[0][0] == "3" node1.query("""DROP TABLE simple ON CLUSTER test_cluster""") - - -# Currently this test just reproduce incorrect behavior that sould be fixed -@pytest.mark.skip(reason="Flapping test") -def test_deduplication_works_in_case_of_intensive_inserts(started_cluster): - inserters = [] - fetchers = [] - - node1.query( - """ - CREATE TABLE simple ON CLUSTER test_cluster (date Date, id UInt32) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/simple', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id""" - ) - - node1.query("INSERT INTO simple VALUES (0, 0)") - - for node in nodes: - host = node.ip_address - - inserters.append( - CommandRequest( - ["/bin/bash"], - timeout=10, - stdin=""" -set -e -for i in `seq 1000`; do - {} --host {} -q "INSERT INTO simple VALUES (0, 0)" -done -""".format( - cluster.get_client_cmd(), host - ), - ) - ) - - fetchers.append( - CommandRequest( - ["/bin/bash"], - timeout=10, - stdin=""" -set -e -for i in `seq 1000`; do - res=`{} --host {} -q "SELECT count() FROM simple"` - if [[ $? -ne 0 || $res -ne 1 ]]; then - echo "Selected $res elements! Host: {}" 1>&2 - exit -1 - fi; -done -""".format( - cluster.get_client_cmd(), host, node.name - ), - ) - ) - - # There were not errors during INSERTs - for inserter in inserters: - try: - inserter.get_answer() - except QueryTimeoutExceedException: - # Only timeout is accepted - pass - - # There were not errors during SELECTs - for fetcher in fetchers: - try: - fetcher.get_answer() - except QueryTimeoutExceedException: - # Only timeout is accepted - pass - - node1.query("""DROP TABLE simple ON CLUSTER test_cluster""") diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index ee5d4b5df93..35f4d81b66a 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -188,7 +188,7 @@ def test_grant_all_on_table(): instance.query("SHOW GRANTS FOR B") == "GRANT SHOW TABLES, SHOW COLUMNS, SHOW DICTIONARIES, SELECT, INSERT, ALTER TABLE, ALTER VIEW, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, " "DROP TABLE, DROP VIEW, DROP DICTIONARY, UNDROP TABLE, TRUNCATE, OPTIMIZE, BACKUP, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, SHOW ROW POLICIES, " - "SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, " + "SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, " "SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM FLUSH DISTRIBUTED, dictGet ON test.table TO B\n" ) instance.query("REVOKE ALL ON test.table FROM B", user="A") @@ -449,7 +449,7 @@ def test_introspection(): ] ) - expected_error = "necessary to have grant SHOW USERS" + expected_error = "necessary to have the grant SHOW USERS" assert expected_error in instance.query_and_get_error("SHOW GRANTS FOR B", user="A") expected_access1 = ( @@ -471,7 +471,7 @@ def test_introspection(): [ [ "A", - "local directory", + "local_directory", "no_password", "{}", "['::/0']", @@ -484,7 +484,7 @@ def test_introspection(): ], [ "B", - "local directory", + "local_directory", "no_password", "{}", "['::/0']", @@ -556,7 +556,7 @@ def test_grant_with_replace_option(): ) expected_error = ( - "it's necessary to have grant INSERT ON test.table WITH GRANT OPTION" + "it's necessary to have the grant INSERT ON test.table WITH GRANT OPTION" ) assert expected_error in instance.query_and_get_error( "GRANT INSERT ON test.table TO B WITH REPLACE OPTION", user="A" @@ -568,7 +568,7 @@ def test_grant_with_replace_option(): instance.query("GRANT INSERT ON test.table TO A WITH GRANT OPTION") expected_error = ( - "it's necessary to have grant SELECT ON test.table WITH GRANT OPTION" + "it's necessary to have the grant SELECT ON test.table WITH GRANT OPTION" ) assert expected_error in instance.query_and_get_error( "GRANT INSERT ON test.table TO B WITH REPLACE OPTION", user="A" diff --git a/tests/integration/test_graphite_merge_tree/test.py b/tests/integration/test_graphite_merge_tree/test.py index c4364a03fd9..7c19888df6b 100644 --- a/tests/integration/test_graphite_merge_tree/test.py +++ b/tests/integration/test_graphite_merge_tree/test.py @@ -509,6 +509,6 @@ CREATE TABLE test.graphite_not_created ) # The order of retentions is not guaranteed - assert "age and precision should only grow up: " in str(exc.value) + assert "Age and precision should only grow up: " in str(exc.value) assert "36000:600" in str(exc.value) assert "72000:300" in str(exc.value) diff --git a/tests/integration/test_groupBitmapAnd_on_distributed/test.py b/tests/integration/test_groupBitmapAnd_on_distributed/test.py index 4dbc81236e7..8cf7e0fb2c1 100644 --- a/tests/integration/test_groupBitmapAnd_on_distributed/test.py +++ b/tests/integration/test_groupBitmapAnd_on_distributed/test.py @@ -5,13 +5,22 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["configs/clusters.xml"], with_zookeeper=True + "node1", + main_configs=["configs/clusters.xml"], + with_zookeeper=True, + allow_analyzer=False, ) node2 = cluster.add_instance( - "node2", main_configs=["configs/clusters.xml"], with_zookeeper=True + "node2", + main_configs=["configs/clusters.xml"], + with_zookeeper=True, + allow_analyzer=False, ) node3 = cluster.add_instance( - "node3", main_configs=["configs/clusters.xml"], with_zookeeper=True + "node3", + main_configs=["configs/clusters.xml"], + with_zookeeper=True, + allow_analyzer=False, ) node4 = cluster.add_instance( "node4", @@ -19,6 +28,7 @@ node4 = cluster.add_instance( image="yandex/clickhouse-server", tag="21.5", with_zookeeper=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_groupBitmapAnd_on_distributed/test_groupBitmapAndState_on_distributed_table.py b/tests/integration/test_groupBitmapAnd_on_distributed/test_groupBitmapAndState_on_distributed_table.py index 774b7708587..115e6009801 100644 --- a/tests/integration/test_groupBitmapAnd_on_distributed/test_groupBitmapAndState_on_distributed_table.py +++ b/tests/integration/test_groupBitmapAnd_on_distributed/test_groupBitmapAndState_on_distributed_table.py @@ -5,13 +5,22 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["configs/clusters.xml"], with_zookeeper=True + "node1", + main_configs=["configs/clusters.xml"], + with_zookeeper=True, + allow_analyzer=False, ) node2 = cluster.add_instance( - "node2", main_configs=["configs/clusters.xml"], with_zookeeper=True + "node2", + main_configs=["configs/clusters.xml"], + with_zookeeper=True, + allow_analyzer=False, ) node3 = cluster.add_instance( - "node3", main_configs=["configs/clusters.xml"], with_zookeeper=True + "node3", + main_configs=["configs/clusters.xml"], + with_zookeeper=True, + allow_analyzer=False, ) node4 = cluster.add_instance( "node4", @@ -20,6 +29,7 @@ node4 = cluster.add_instance( tag="21.6", with_installed_binary=True, with_zookeeper=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_storage_nats/__init__.py b/tests/integration/test_group_array_element_size/__init__.py similarity index 100% rename from tests/integration/test_storage_nats/__init__.py rename to tests/integration/test_group_array_element_size/__init__.py diff --git a/tests/integration/test_group_array_element_size/configs/group_array_max_element_size.xml b/tests/integration/test_group_array_element_size/configs/group_array_max_element_size.xml new file mode 100644 index 00000000000..7a9cda7ccbd --- /dev/null +++ b/tests/integration/test_group_array_element_size/configs/group_array_max_element_size.xml @@ -0,0 +1,3 @@ + + 10 + diff --git a/tests/integration/test_group_array_element_size/test.py b/tests/integration/test_group_array_element_size/test.py new file mode 100644 index 00000000000..86b1d5feeee --- /dev/null +++ b/tests/integration/test_group_array_element_size/test.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance( + "node1", + main_configs=["configs/group_array_max_element_size.xml"], + stay_alive=True, +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + + +def test_max_exement_size(started_cluster): + node1.query( + "CREATE TABLE tab3 (x AggregateFunction(groupArray, Array(UInt8))) ENGINE = MergeTree ORDER BY tuple()" + ) + node1.query("insert into tab3 select groupArrayState([zero]) from zeros(10)") + assert node1.query("select length(groupArrayMerge(x)) from tab3") == "10\n" + + # First query should always fail + with pytest.raises(Exception, match=r"Too large array size"): + node1.query("insert into tab3 select groupArrayState([zero]) from zeros(11)") + + node1.replace_in_config( + "/etc/clickhouse-server/config.d/group_array_max_element_size.xml", + "10", + "11", + ) + + node1.restart_clickhouse() + + node1.query("insert into tab3 select groupArrayState([zero]) from zeros(11)") + assert node1.query("select length(groupArrayMerge(x)) from tab3") == "21\n" + + node1.replace_in_config( + "/etc/clickhouse-server/config.d/group_array_max_element_size.xml", + "11", + "10", + ) + + node1.restart_clickhouse() + + with pytest.raises(Exception, match=r"Too large array size"): + node1.query("select length(groupArrayMerge(x)) from tab3") + + node1.replace_in_config( + "/etc/clickhouse-server/config.d/group_array_max_element_size.xml", + "10", + "11", + ) + + node1.restart_clickhouse() + + assert node1.query("select length(groupArrayMerge(x)) from tab3") == "21\n" diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index dead4d447ec..efc7d98e820 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -352,9 +352,13 @@ def test_authentication(): def test_logs(): - logs = query_and_get_logs("SELECT 1", settings={"send_logs_level": "debug"}) - assert "SELECT 1" in logs - assert "Read 1 rows" in logs + query = "SELECT has(groupArray(number), 42) FROM numbers(1000000) SETTINGS max_block_size=100000" + logs = query_and_get_logs( + query, + settings={"send_logs_level": "debug"}, + ) + assert query in logs + assert "Read 1000000 rows" in logs assert "Peak memory usage" in logs diff --git a/tests/integration/test_hive_query/configs/config.xml b/tests/integration/test_hive_query/configs/config.xml deleted file mode 100644 index 7de1391e56c..00000000000 --- a/tests/integration/test_hive_query/configs/config.xml +++ /dev/null @@ -1,32 +0,0 @@ - - - - - - - localhost - 9000 - - - - - - - true - /tmp/clickhouse_local_cache,/tmp/clickhouse_local_cache1 - 207374182400 - 1048576 - - - - /etc/clickhouse-server/extra_conf.d/hdfs-site.xml - - - - system - metric_log
- 1000 - 1000 -
- -
diff --git a/tests/integration/test_hive_query/configs/hdfs-site.xml b/tests/integration/test_hive_query/configs/hdfs-site.xml deleted file mode 100644 index 82c525ea414..00000000000 --- a/tests/integration/test_hive_query/configs/hdfs-site.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - dfs.replication - 1 - - diff --git a/tests/integration/test_hive_query/data/prepare_hive_data.sh b/tests/integration/test_hive_query/data/prepare_hive_data.sh deleted file mode 100755 index 495ea201870..00000000000 --- a/tests/integration/test_hive_query/data/prepare_hive_data.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash -hive -e "create database test" - -hive -e "drop table if exists test.demo; create table test.demo(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'; create table test.demo_orc(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'; " -hive -e "drop table if exists test.parquet_demo; create table test.parquet_demo(id string, score int) PARTITIONED BY(day string, hour string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'" -hive -e "drop table if exists test.demo_text; create table test.demo_text(id string, score int, day string)row format delimited fields terminated by ','; load data local inpath '/demo_data.txt' into table test.demo_text " -hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo partition(day) select * from test.demo_text; insert into test.demo_orc partition(day) select * from test.demo_text" - -hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '00' as hour from test.demo;" -hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '01' as hour from test.demo;" - -hive -e "drop table if exists test.test_hive_types; CREATE TABLE test.test_hive_types( f_tinyint tinyint, f_smallint smallint, f_int int, f_integer int, f_bigint bigint, f_float float, f_double double, f_decimal decimal(10,0), f_timestamp timestamp, f_date date, f_string string, f_varchar varchar(100), f_char char(100), f_bool boolean, f_array_int array, f_array_string array, f_array_float array, f_map_int map, f_map_string map, f_map_float map, f_struct struct>) PARTITIONED BY( day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat';" - -hive -e "insert into test.test_hive_types partition(day='2022-02-20') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, '2022-02-20 14:47:04', '2022-02-20', 'hello world', 'hello world', 'hello world', true, array(1,2,3), array('hello world', 'hello world'), array(float(1.1),float(1.2)), map('a', 100, 'b', 200, 'c', 300), map('a', 'aa', 'b', 'bb', 'c', 'cc'), map('a', float(111.1), 'b', float(222.2), 'c', float(333.3)), named_struct('a', 'aaa', 'b', 200, 'c', float(333.3), 'd', named_struct('x', 10, 'y', 'xyz')); insert into test.test_hive_types partition(day='2022-02-19') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, '2022-02-19 14:47:04', '2022-02-19', 'hello world', 'hello world', 'hello world', true, array(1,2,3), array('hello world', 'hello world'), array(float(1.1),float(1.2)), map('a', 100, 'b', 200, 'c', 300), map('a', 'aa', 'b', 'bb', 'c', 'cc'), map('a', float(111.1), 'b', float(222.2), 'c', float(333.3)), named_struct('a', 'aaa', 'b', 200, 'c', float(333.3), 'd', named_struct('x', 11, 'y', 'abc'));" - diff --git a/tests/integration/test_hive_query/test.py b/tests/integration/test_hive_query/test.py deleted file mode 100644 index 791ae03f9f6..00000000000 --- a/tests/integration/test_hive_query/test.py +++ /dev/null @@ -1,498 +0,0 @@ -import pytest - -# FIXME This test is too flaky -# https://github.com/ClickHouse/ClickHouse/issues/43541 - -pytestmark = pytest.mark.skip - -import logging -import os - -import time -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import TSV - -logging.getLogger().setLevel(logging.INFO) -logging.getLogger().addHandler(logging.StreamHandler()) - -SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster = ClickHouseCluster(__file__) - cluster.add_instance( - "h0_0_0", - main_configs=["configs/config.xml"], - extra_configs=["configs/hdfs-site.xml", "data/prepare_hive_data.sh"], - with_hive=True, - ) - - logging.info("Starting cluster ...") - cluster.start() - cluster.copy_file_to_container( - "roottesthivequery_hdfs1_1", - "/ClickHouse/tests/integration/test_hive_query/data/prepare_hive_data.sh", - "/prepare_hive_data.sh", - ) - cluster.exec_in_container( - "roottesthivequery_hdfs1_1", ["bash", "-c", "bash /prepare_hive_data.sh"] - ) - yield cluster - finally: - cluster.shutdown() - - -def test_create_parquet_table(started_cluster): - logging.info("Start testing creating hive table ...") - node = started_cluster.instances["h0_0_0"] - test_passed = False - for i in range(10): - node.query("set input_format_parquet_allow_missing_columns = true") - result = node.query( - """ -DROP TABLE IF EXISTS default.demo_parquet; -CREATE TABLE default.demo_parquet (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) - """ - ) - logging.info("create result {}".format(result)) - if result.strip() == "": - test_passed = True - break - time.sleep(60) - assert test_passed - - -def test_create_parquet_table_1(started_cluster): - logging.info("Start testing creating hive table ...") - node = started_cluster.instances["h0_0_0"] - for i in range(10): - node.query("set input_format_parquet_allow_missing_columns = true") - result = node.query( - """ -DROP TABLE IF EXISTS default.demo_parquet_parts; -CREATE TABLE default.demo_parquet_parts (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String), `hour` String) ENGINE = Hive('thrift://hivetest:9083', 'test', 'parquet_demo') PARTITION BY(day, hour); - """ - ) - logging.info("create result {}".format(result)) - if result.strip() == "": - test_passed = True - break - time.sleep(60) - assert test_passed - - -def test_create_orc_table(started_cluster): - logging.info("Start testing creating hive table ...") - node = started_cluster.instances["h0_0_0"] - test_passed = False - for i in range(10): - result = node.query( - """ - DROP TABLE IF EXISTS default.demo_orc; - CREATE TABLE default.demo_orc (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo_orc') PARTITION BY(day) - """ - ) - logging.info("create result {}".format(result)) - if result.strip() == "": - test_passed = True - break - time.sleep(60) - - assert test_passed - - -def test_create_text_table(started_cluster): - logging.info("Start testing creating hive table ...") - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - DROP TABLE IF EXISTS default.demo_text; - CREATE TABLE default.demo_text (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo_text') PARTITION BY (tuple()) - """ - ) - logging.info("create result {}".format(result)) - - assert result.strip() == "" - - -def test_parquet_groupby(started_cluster): - logging.info("Start testing groupby ...") - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - SELECT day, count(*) FROM default.demo_parquet group by day order by day - """ - ) - expected_result = """2021-11-01 1 -2021-11-05 2 -2021-11-11 1 -2021-11-16 2 -""" - assert result == expected_result - - -def test_parquet_in_filter(started_cluster): - logging.info("Start testing groupby ...") - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - SELECT count(*) FROM default.demo_parquet_parts where day = '2021-11-05' and hour in ('00') - """ - ) - expected_result = """2 -""" - logging.info("query result:{}".format(result)) - assert result == expected_result - - -def test_orc_groupby(started_cluster): - logging.info("Start testing groupby ...") - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - DROP TABLE IF EXISTS default.demo_orc; - CREATE TABLE default.demo_orc (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo_orc') PARTITION BY(day); - SELECT day, count(*) FROM default.demo_orc group by day order by day - """ - ) - expected_result = """2021-11-01 1 -2021-11-05 2 -2021-11-11 1 -2021-11-16 2 -""" - assert result == expected_result - - -@pytest.mark.parametrize( - "table,use_local_cache_for_remote_storage,enable_orc_file_minmax_index,enable_orc_stripe_minmax_index", - [ - pytest.param( - "demo_orc_no_cache_no_index", - "false", - "false", - "false", - id="demo_orc_no_cache_no_index", - ), - pytest.param( - "demo_orc_with_cache_no_index", - "true", - "false", - "false", - id="demo_orc_with_cache_no_index", - ), - pytest.param( - "demo_orc_no_cache_file_index", - "false", - "true", - "false", - id="demo_orc_no_cache_file_index", - ), - pytest.param( - "demo_orc_with_cache_file_index", - "true", - "true", - "false", - id="demo_orc_with_cache_file_index", - ), - pytest.param( - "demo_orc_no_cache_stripe_index", - "false", - "true", - "true", - id="demo_orc_no_cache_stripe_index", - ), - pytest.param( - "demo_orc_with_cache_stripe_index", - "true", - "true", - "true", - id="demo_orc_with_cache_stripe_index", - ), - ], -) -def test_orc_minmax_index( - started_cluster, - table, - use_local_cache_for_remote_storage, - enable_orc_file_minmax_index, - enable_orc_stripe_minmax_index, -): - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - DROP TABLE IF EXISTS default.{table}; - CREATE TABLE default.{table} (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo_orc') PARTITION BY(day) - SETTINGS enable_orc_file_minmax_index = {enable_orc_file_minmax_index}, enable_orc_stripe_minmax_index = {enable_orc_stripe_minmax_index}; - """.format( - table=table, - enable_orc_file_minmax_index=enable_orc_file_minmax_index, - enable_orc_stripe_minmax_index=enable_orc_stripe_minmax_index, - ) - ) - assert result.strip() == "" - - for i in range(2): - result = node.query( - """ - SELECT day, id, score FROM default.{table} where day >= '2021-11-05' and day <= '2021-11-16' and score >= 15 and score <= 30 order by day, id - SETTINGS use_local_cache_for_remote_storage = {use_local_cache_for_remote_storage} - """.format( - table=table, - use_local_cache_for_remote_storage=use_local_cache_for_remote_storage, - ) - ) - - assert ( - result - == """2021-11-05 abd 15 -2021-11-16 aaa 22 -""" - ) - - -@pytest.mark.parametrize( - "table,use_local_cache_for_remote_storage,enable_parquet_rowgroup_minmax_index", - [ - pytest.param( - "demo_parquet_no_cache_no_index", - "false", - "false", - id="demo_parquet_no_cache_no_index", - ), - pytest.param( - "demo_parquet_with_cache_no_index", - "true", - "false", - id="demo_parquet_with_cache_no_index", - ), - pytest.param( - "demo_parquet_no_cache_rowgroup_index", - "false", - "true", - id="demo_parquet_no_cache_rowgroup_index", - ), - pytest.param( - "demo_parquet_with_cache_rowgroup_index", - "true", - "true", - id="demo_parquet_with_cache_rowgroup_index", - ), - ], -) -def test_parquet_minmax_index( - started_cluster, - table, - use_local_cache_for_remote_storage, - enable_parquet_rowgroup_minmax_index, -): - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - DROP TABLE IF EXISTS default.{table}; - CREATE TABLE default.{table} (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) - SETTINGS enable_parquet_rowgroup_minmax_index = {enable_parquet_rowgroup_minmax_index} - """.format( - table=table, - enable_parquet_rowgroup_minmax_index=enable_parquet_rowgroup_minmax_index, - ) - ) - assert result.strip() == "" - - for i in range(2): - result = node.query( - """ - SELECT day, id, score FROM default.{table} where day >= '2021-11-05' and day <= '2021-11-16' and score >= 15 and score <= 30 order by day, id - SETTINGS use_local_cache_for_remote_storage = {use_local_cache_for_remote_storage} - """.format( - table=table, - use_local_cache_for_remote_storage=use_local_cache_for_remote_storage, - ) - ) - - assert ( - result - == """2021-11-05 abd 15 -2021-11-16 aaa 22 -""" - ) - - -def test_hive_columns_prunning(started_cluster): - logging.info("Start testing groupby ...") - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - SELECT count(*) FROM default.demo_parquet_parts where day = '2021-11-05' - """ - ) - expected_result = """4 -""" - logging.info("query result:{}".format(result)) - assert result == expected_result - - -def test_text_count(started_cluster): - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - DROP TABLE IF EXISTS default.demo_orc; - CREATE TABLE default.demo_orc (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo_orc') PARTITION BY(day); - SELECT day, count(*) FROM default.demo_orc group by day order by day SETTINGS format_csv_delimiter = '\x01' - """ - ) - expected_result = """2021-11-01 1 -2021-11-05 2 -2021-11-11 1 -2021-11-16 2 -""" - assert result == expected_result - - -def test_parquet_groupby_with_cache(started_cluster): - logging.info("Start testing groupby ...") - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - SELECT day, count(*) FROM default.demo_parquet group by day order by day - """ - ) - expected_result = """2021-11-01 1 -2021-11-05 2 -2021-11-11 1 -2021-11-16 2 -""" - assert result == expected_result - - -def test_parquet_groupby_by_hive_function(started_cluster): - logging.info("Start testing groupby ...") - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - SELECT day, count(*) FROM hive('thrift://hivetest:9083', 'test', 'demo', '`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)', 'day') group by day order by day - """ - ) - expected_result = """2021-11-01 1 -2021-11-05 2 -2021-11-11 1 -2021-11-16 2 -""" - assert result == expected_result - - -def test_cache_read_bytes(started_cluster): - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - CREATE TABLE IF NOT EXISTS default.demo_parquet_1 (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) - """ - ) - test_passed = False - for i in range(10): - result = node.query( - """ - SELECT * FROM default.demo_parquet_1 settings input_format_parquet_allow_missing_columns = true - """ - ) - node.query("system flush logs") - result = node.query( - "select sum(ProfileEvent_ExternalDataSourceLocalCacheReadBytes) from system.metric_log where ProfileEvent_ExternalDataSourceLocalCacheReadBytes > 0" - ) - if result.strip() == "0": - logging.info("ProfileEvent_ExternalDataSourceLocalCacheReadBytes == 0") - time.sleep(10) - continue - test_passed = True - break - assert test_passed - - -def test_cache_dir_use(started_cluster): - node = started_cluster.instances["h0_0_0"] - result0 = node.exec_in_container( - ["bash", "-c", "ls /tmp/clickhouse_local_cache | wc -l"] - ) - result1 = node.exec_in_container( - ["bash", "-c", "ls /tmp/clickhouse_local_cache1 | wc -l"] - ) - assert result0 != "0" and result1 != "0" - - -def test_hive_struct_type(started_cluster): - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - CREATE TABLE IF NOT EXISTS default.test_hive_types (`f_tinyint` Int8, `f_smallint` Int16, `f_int` Int32, `f_integer` Int32, `f_bigint` Int64, `f_float` Float32, `f_double` Float64, `f_decimal` Float64, `f_timestamp` DateTime, `f_date` Date, `f_string` String, `f_varchar` String, `f_char` String, `f_bool` Boolean, `f_array_int` Array(Int32), `f_array_string` Array(String), `f_array_float` Array(Float32), `f_map_int` Map(String, Int32), `f_map_string` Map(String, String), `f_map_float` Map(String, Float32), `f_struct` Tuple(a String, b Int32, c Float32, d Tuple(x Int32, y String)), `day` String) ENGINE = Hive('thrift://hivetest:9083', 'test', 'test_hive_types') PARTITION BY (day) - """ - ) - result = node.query( - """ - SELECT * FROM default.test_hive_types WHERE day = '2022-02-20' SETTINGS input_format_parquet_import_nested=1 - """ - ) - expected_result = """1 2 3 4 5 6.11 7.22 8 2022-02-20 14:47:04 2022-02-20 hello world hello world hello world true [1,2,3] ['hello world','hello world'] [1.1,1.2] {'a':100,'b':200,'c':300} {'a':'aa','b':'bb','c':'cc'} {'a':111.1,'b':222.2,'c':333.3} ('aaa',200,333.3,(10,'xyz')) 2022-02-20""" - assert result.strip() == expected_result - - result = node.query( - """ - SELECT day, f_struct.a, f_struct.d.x FROM default.test_hive_types WHERE day = '2022-02-20' SETTINGS input_format_parquet_import_nested=1 - """ - ) - expected_result = """2022-02-20 aaa 10""" - - -def test_table_alter_add(started_cluster): - node = started_cluster.instances["h0_0_0"] - result = node.query("DROP TABLE IF EXISTS default.demo_parquet_1") - result = node.query( - """ -CREATE TABLE IF NOT EXISTS default.demo_parquet_1 (`score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) - """ - ) - result = node.query( - """ -ALTER TABLE default.demo_parquet_1 ADD COLUMN id Nullable(String) FIRST - """ - ) - result = node.query("""DESC default.demo_parquet_1 FORMAT TSV""") - - expected_result = "id\tNullable(String)\t\t\t\t\t\nscore\tNullable(Int32)\t\t\t\t\t\nday\tNullable(String)" - assert result.strip() == expected_result - - -def test_table_alter_drop(started_cluster): - node = started_cluster.instances["h0_0_0"] - result = node.query("DROP TABLE IF EXISTS default.demo_parquet_1") - result = node.query( - """ -CREATE TABLE IF NOT EXISTS default.demo_parquet_1 (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) - """ - ) - result = node.query( - """ -ALTER TABLE default.demo_parquet_1 DROP COLUMN id - """ - ) - - result = node.query("""DESC default.demo_parquet_1 FORMAT TSV""") - expected_result = """score\tNullable(Int32)\t\t\t\t\t\nday\tNullable(String)""" - assert result.strip() == expected_result - - -def test_table_alter_comment(started_cluster): - node = started_cluster.instances["h0_0_0"] - result = node.query("DROP TABLE IF EXISTS default.demo_parquet_1") - result = node.query( - """ -CREATE TABLE IF NOT EXISTS default.demo_parquet_1 (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) - """ - ) - - result = node.query( - """ALTER TABLE default.demo_parquet_1 COMMENT COLUMN id 'Text comment'""" - ) - result = node.query("""DESC default.demo_parquet_1 FORMAT TSV""") - expected_result = """id\tNullable(String)\t\t\tText comment\t\t\nscore\tNullable(Int32)\t\t\t\t\t\nday\tNullable(String)""" - assert result.strip() == expected_result diff --git a/tests/integration/test_host_regexp_multiple_ptr_records/configs/host_regexp.xml b/tests/integration/test_host_regexp_multiple_ptr_records/configs/host_regexp.xml index 7a2141e6c7e..6ca61fb3c44 100644 --- a/tests/integration/test_host_regexp_multiple_ptr_records/configs/host_regexp.xml +++ b/tests/integration/test_host_regexp_multiple_ptr_records/configs/host_regexp.xml @@ -1,4 +1,4 @@ - + @@ -8,4 +8,4 @@ default - \ No newline at end of file + \ No newline at end of file diff --git a/tests/integration/test_host_regexp_multiple_ptr_records/configs/listen_host.xml b/tests/integration/test_host_regexp_multiple_ptr_records/configs/listen_host.xml index 58ef55cd3f3..9c27c612f63 100644 --- a/tests/integration/test_host_regexp_multiple_ptr_records/configs/listen_host.xml +++ b/tests/integration/test_host_regexp_multiple_ptr_records/configs/listen_host.xml @@ -1,5 +1,5 @@ - + :: 0.0.0.0 1 - + diff --git a/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/example.com b/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/example.com index 9beb415c290..6c6e4cbee2e 100644 --- a/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/example.com +++ b/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/example.com @@ -1 +1 @@ -filled in runtime, but needs to exist in order to be volume mapped in docker \ No newline at end of file +filled in runtime, but needs to exist in order to be volume mapped in docker diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml index 7a2141e6c7e..9329c8dbde2 100644 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml +++ b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml @@ -1,4 +1,4 @@ - + @@ -8,4 +8,4 @@ default - \ No newline at end of file + diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml index 58ef55cd3f3..9c27c612f63 100644 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml +++ b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml @@ -1,5 +1,5 @@ - + :: 0.0.0.0 1 - + diff --git a/tests/integration/test_insert_into_distributed/test.py b/tests/integration/test_insert_into_distributed/test.py index 3bee2149387..09a14c11727 100644 --- a/tests/integration/test_insert_into_distributed/test.py +++ b/tests/integration/test_insert_into_distributed/test.py @@ -246,7 +246,7 @@ def test_inserts_local(started_cluster): def test_inserts_single_replica_local_internal_replication(started_cluster): with pytest.raises( - QueryRuntimeException, match="Table default.single_replicated doesn't exist" + QueryRuntimeException, match="Table default.single_replicated does not exist" ): node1.query( "INSERT INTO distributed_one_replica_internal_replication VALUES ('2000-01-01', 1)", @@ -279,7 +279,8 @@ def test_inserts_single_replica_internal_replication(started_cluster): def test_inserts_single_replica_no_internal_replication(started_cluster): try: with pytest.raises( - QueryRuntimeException, match="Table default.single_replicated doesn't exist" + QueryRuntimeException, + match="Table default.single_replicated does not exist", ): node1.query( "INSERT INTO distributed_one_replica_no_internal_replication VALUES ('2000-01-01', 1)", diff --git a/tests/integration/test_insert_into_distributed_through_materialized_view/test.py b/tests/integration/test_insert_into_distributed_through_materialized_view/test.py index a5f92002450..b1eb0df2d43 100644 --- a/tests/integration/test_insert_into_distributed_through_materialized_view/test.py +++ b/tests/integration/test_insert_into_distributed_through_materialized_view/test.py @@ -108,74 +108,6 @@ def test_reconnect(started_cluster): assert remote.query("SELECT count(*) FROM local1").strip() == "3" -@pytest.mark.skip(reason="Flapping test") -def test_inserts_batching(started_cluster): - instance = instance_test_inserts_batching - - with PartitionManager() as pm: - pm.partition_instances(instance, remote) - - instance.query("INSERT INTO local2_source(d, x) VALUES ('2000-01-01', 1)") - # Sleep a bit so that this INSERT forms a batch of its own. - time.sleep(0.2) - - instance.query("INSERT INTO local2_source(x, d) VALUES (2, '2000-01-01')") - - for i in range(3, 7): - instance.query( - "INSERT INTO local2_source(d, x) VALUES ('2000-01-01', {})".format(i) - ) - - for i in range(7, 9): - instance.query( - "INSERT INTO local2_source(x, d) VALUES ({}, '2000-01-01')".format(i) - ) - - instance.query("INSERT INTO local2_source(d, x) VALUES ('2000-01-01', 9)") - - # After ALTER the structure of the saved blocks will be different - instance.query("DROP TABLE local2_view") - instance.query("ALTER TABLE distributed ADD COLUMN s String") - - # Memory Engine doesn't support ALTER so we just DROP/CREATE everything - instance.query("DROP TABLE local2_source") - instance.query( - "CREATE TABLE local2_source (d Date, x UInt32, s String) ENGINE = Memory" - ) - instance.query( - "CREATE MATERIALIZED VIEW local2_view to distributed AS SELECT d,x,s FROM local2_source" - ) - - for i in range(10, 13): - instance.query( - "INSERT INTO local2_source(d, x) VALUES ('2000-01-01', {})".format(i) - ) - - time.sleep(1.0) - - result = remote.query( - "SELECT _part, groupArray(x) FROM local2 GROUP BY _part ORDER BY _part" - ) - - # Explanation: as merges are turned off on remote instance, active parts in local2 table correspond 1-to-1 - # to inserted blocks. - # Batches of max 3 rows are formed as min_insert_block_size_rows = 3. - # Blocks: - # 1. Failed batch that is retried with the same contents. - # 2. Full batch of inserts regardless of the order of columns thanks to the view. - # 3. Full batch of inserts regardless order of columns thanks to the view. - # 4. Full batch of inserts after ALTER (that have different block structure). - # 5. What was left to insert before ALTER. - expected = """\ -20000101_20000101_1_1_0 [1] -20000101_20000101_2_2_0 [2,3,4] -20000101_20000101_3_3_0 [5,6,7] -20000101_20000101_4_4_0 [10,11,12] -20000101_20000101_5_5_0 [8,9] -""" - assert TSV(result) == TSV(expected) - - def test_inserts_local(started_cluster): instance = instance_test_inserts_local_cluster instance.query("INSERT INTO local_source VALUES ('2000-01-01', 1)") diff --git a/tests/integration/test_kafka_bad_messages/test.py b/tests/integration/test_kafka_bad_messages/test.py index da3cf36c853..a634ce36631 100644 --- a/tests/integration/test_kafka_bad_messages/test.py +++ b/tests/integration/test_kafka_bad_messages/test.py @@ -90,7 +90,9 @@ def producer_serializer(x): return x.encode() if isinstance(x, str) else x -def kafka_produce(kafka_cluster, topic, messages, timestamp=None, retries=15): +def kafka_produce( + kafka_cluster, topic, messages, timestamp=None, retries=15, partition=None +): logging.debug( "kafka_produce server:{}:{} topic:{}".format( "localhost", kafka_cluster.kafka_port, topic @@ -100,7 +102,9 @@ def kafka_produce(kafka_cluster, topic, messages, timestamp=None, retries=15): kafka_cluster.kafka_port, producer_serializer, retries ) for message in messages: - producer.send(topic=topic, value=message, timestamp_ms=timestamp) + producer.send( + topic=topic, value=message, timestamp_ms=timestamp, partition=partition + ) producer.flush() @@ -115,7 +119,7 @@ def kafka_cluster(): cluster.shutdown() -def test_bad_messages_parsing(kafka_cluster): +def test_bad_messages_parsing_stream(kafka_cluster): admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) @@ -244,7 +248,7 @@ struct Message f""" DROP TABLE IF EXISTS view; DROP TABLE IF EXISTS kafka; - + CREATE TABLE kafka (key UInt64, value UInt64) ENGINE = Kafka SETTINGS kafka_broker_list = 'kafka1:19092', @@ -253,9 +257,9 @@ struct Message kafka_format = 'CapnProto', kafka_handle_error_mode='stream', kafka_schema='schema_test_errors:Message'; - + CREATE MATERIALIZED VIEW view Engine=Log AS - SELECT _error FROM kafka WHERE length(_error) != 0 ; + SELECT _error FROM kafka WHERE length(_error) != 0; """ ) @@ -279,6 +283,68 @@ struct Message kafka_delete_topic(admin_client, "CapnProto_err") +def test_bad_messages_parsing_exception(kafka_cluster, max_retries=20): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + for format_name in [ + "Avro", + "JSONEachRow", + ]: + print(format_name) + + kafka_create_topic(admin_client, f"{format_name}_err") + + instance.query( + f""" + DROP TABLE IF EXISTS view_{format_name}; + DROP TABLE IF EXISTS kafka_{format_name}; + DROP TABLE IF EXISTS kafka; + + CREATE TABLE kafka_{format_name} (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{format_name}_err', + kafka_group_name = '{format_name}', + kafka_format = '{format_name}', + kafka_num_consumers = 1; + + CREATE MATERIALIZED VIEW view_{format_name} Engine=Log AS + SELECT * FROM kafka_{format_name}; + """ + ) + + kafka_produce( + kafka_cluster, f"{format_name}_err", ["qwertyuiop", "asdfghjkl", "zxcvbnm"] + ) + + expected_result = """avro::Exception: Invalid data file. Magic does not match: : while parsing Kafka message (topic: Avro_err, partition: 0, offset: 0)\\'|1|1|1|default|kafka_Avro +Cannot parse input: expected \\'{\\' before: \\'qwertyuiop\\': while parsing Kafka message (topic: JSONEachRow_err, partition: 0, offset: 0)\\'|1|1|1|default|kafka_JSONEachRow +""" + retries = 0 + result_system_kafka_consumers = "" + while True: + result_system_kafka_consumers = instance.query( + """ + SELECT exceptions.text[1], length(exceptions.text) > 1 AND length(exceptions.text) < 15, length(exceptions.time) > 1 AND length(exceptions.time) < 15, abs(dateDiff('second', exceptions.time[1], now())) < 40, database, table FROM system.kafka_consumers ORDER BY table, assignments.partition_id[1] + """ + ) + result_system_kafka_consumers = result_system_kafka_consumers.replace("\t", "|") + if result_system_kafka_consumers == expected_result or retries > max_retries: + break + retries += 1 + time.sleep(1) + + assert result_system_kafka_consumers == expected_result + + for format_name in [ + "Avro", + "JSONEachRow", + ]: + kafka_delete_topic(admin_client, f"{format_name}_err") + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_keeper_client/test.py b/tests/integration/test_keeper_client/test.py index 00c7908eeed..8f7056a5afd 100644 --- a/tests/integration/test_keeper_client/test.py +++ b/tests/integration/test_keeper_client/test.py @@ -1,6 +1,7 @@ import pytest from helpers.client import CommandRequest from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV cluster = ClickHouseCluster(__file__) @@ -13,7 +14,7 @@ node = cluster.add_instance( ) -@pytest.fixture(scope="module") +@pytest.fixture(scope="module", autouse=True) def started_cluster(): try: cluster.start() @@ -23,41 +24,123 @@ def started_cluster(): cluster.shutdown() -def test_base_commands(started_cluster): - _ = started_cluster - - command = CommandRequest( +def keeper_query(query: str): + return CommandRequest( [ - started_cluster.server_bin_path, + cluster.server_bin_path, "keeper-client", "--host", str(cluster.get_instance_ip("zoo1")), "--port", str(cluster.zookeeper_port), "-q", - "create test_create_zk_node1 testvalue1;create test_create_zk_node_2 testvalue2;get test_create_zk_node1;", + query, ], stdin="", ) + +def test_big_family(): + command = keeper_query( + "touch test_big_family;" + "touch test_big_family/1;" + "touch test_big_family/1/1;" + "touch test_big_family/1/2;" + "touch test_big_family/1/3;" + "touch test_big_family/1/4;" + "touch test_big_family/1/5;" + "touch test_big_family/2;" + "touch test_big_family/2/1;" + "touch test_big_family/2/2;" + "touch test_big_family/2/3;" + "find_big_family test_big_family;" + ) + + assert command.get_answer() == TSV( + [ + ["/test_big_family/1", "5"], + ["/test_big_family/2", "3"], + ["/test_big_family/2/3", "0"], + ["/test_big_family/2/2", "0"], + ["/test_big_family/2/1", "0"], + ["/test_big_family/1/5", "0"], + ["/test_big_family/1/4", "0"], + ["/test_big_family/1/3", "0"], + ["/test_big_family/1/2", "0"], + ["/test_big_family/1/1", "0"], + ] + ) + + command = keeper_query("find_big_family test_big_family 1;") + + assert command.get_answer() == TSV( + [ + ["/test_big_family/1", "5"], + ] + ) + + +def test_find_super_nodes(): + command = keeper_query( + "touch test_find_super_nodes;" + "touch test_find_super_nodes/1;" + "touch test_find_super_nodes/1/1;" + "touch test_find_super_nodes/1/2;" + "touch test_find_super_nodes/1/3;" + "touch test_find_super_nodes/1/4;" + "touch test_find_super_nodes/1/5;" + "touch test_find_super_nodes/2;" + "touch test_find_super_nodes/2/1;" + "touch test_find_super_nodes/2/2;" + "touch test_find_super_nodes/2/3;" + "touch test_find_super_nodes/2/4;" + "cd test_find_super_nodes;" + "find_super_nodes 4;" + ) + + assert command.get_answer() == TSV( + [ + ["/test_find_super_nodes/1", "5"], + ["/test_find_super_nodes/2", "4"], + ] + ) + + +def test_delete_stale_backups(): + command = keeper_query( + "touch /clickhouse;" + "touch /clickhouse/backups;" + "touch /clickhouse/backups/1;" + "touch /clickhouse/backups/1/stage;" + "touch /clickhouse/backups/1/stage/alive123;" + "touch /clickhouse/backups/2;" + "touch /clickhouse/backups/2/stage;" + "touch /clickhouse/backups/2/stage/dead123;" + "delete_stale_backups;" + "y;" + "ls clickhouse/backups;" + ) + + assert command.get_answer() == ( + "You are going to delete all inactive backups in /clickhouse/backups. Continue?\n" + 'Found backup "/clickhouse/backups/1", checking if it\'s active\n' + 'Backup "/clickhouse/backups/1" is active, not going to delete\n' + 'Found backup "/clickhouse/backups/2", checking if it\'s active\n' + 'Backup "/clickhouse/backups/2" is not active, deleting it\n' + "1\n" + ) + + +def test_base_commands(): + command = keeper_query( + "create test_create_zk_node1 testvalue1;" + "create test_create_zk_node_2 testvalue2;" + "get test_create_zk_node1;" + ) + assert command.get_answer() == "testvalue1\n" -def test_four_letter_word_commands(started_cluster): - _ = started_cluster - - command = CommandRequest( - [ - started_cluster.server_bin_path, - "keeper-client", - "--host", - str(cluster.get_instance_ip("zoo1")), - "--port", - str(cluster.zookeeper_port), - "-q", - "ruok", - ], - stdin="", - ) - +def test_four_letter_word_commands(): + command = keeper_query("ruok") assert command.get_answer() == "imok\n" diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py index e23d0674c12..b099d0513e1 100644 --- a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py +++ b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py @@ -41,80 +41,3 @@ def started_cluster(): def get_fake_zk(node): return ku.get_fake_zk(cluster, node) - - -def test_reconfig_replace_leader_in_one_command(started_cluster): - """ - Remove leader from a cluster of 3 and add a new node to this cluster in a single command - """ - - zk1 = get_fake_zk(node1) - config = ku.get_config_str(zk1) - - assert len(config.split("\n")) == 3 - assert "node1" in config - assert "node2" in config - assert "node3" in config - assert "node4" not in config - - for i in range(100): - zk1.create(f"/test_four_{i}", b"somedata") - - zk2 = get_fake_zk(node2) - zk2.sync("/test_four_0") - ku.wait_configs_equal(config, zk2) - - zk3 = get_fake_zk(node3) - zk3.sync("/test_four_0") - ku.wait_configs_equal(config, zk3) - - for i in range(100): - assert zk2.exists(f"/test_four_{i}") is not None - assert zk3.exists(f"/test_four_{i}") is not None - - assert ku.is_leader(cluster, node1) - node4.start_clickhouse() - config, _ = zk2.reconfig( - joining="server.4=node4:9234", leaving="1", new_members=None - ) - config = config.decode("utf-8") - - print("After removing 1 and adding 4", config) - assert len(config.split("\n")) == 3 - assert "node1" not in config - assert "node2" in config - assert "node3" in config - assert "node4" in config - - ku.wait_until_connected(cluster, node4) - time.sleep(1) - - zk4 = get_fake_zk(node4) - zk4.sync("/test_four_0") - ku.wait_configs_equal(config, zk4) - - for i in range(100): - assert zk4.exists(f"test_four_{i}") is not None - zk4.create(f"/test_four_{100 + i}", b"somedata") - - with pytest.raises(Exception): - zk1.stop() - zk1.close() - zk1 = get_fake_zk(node1) - zk1.sync("/test_four_0") - - zk2.stop() - zk2.close() - zk2 = get_fake_zk(node2) - zk2.sync("/test_four_0") - ku.wait_configs_equal(config, zk2) - - zk3.stop() - zk3.close() - zk3 = get_fake_zk(node3) - zk3.sync("/test_four_0") - ku.wait_configs_equal(config, zk3) - - for i in range(200): - assert zk2.exists(f"test_four_{i}") is not None - assert zk3.exists(f"test_four_{i}") is not None diff --git a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py index 389d430622d..18cfdfa5070 100644 --- a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py +++ b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py @@ -1249,7 +1249,7 @@ def err_sync_user_privs_with_materialized_mysql_database( ) assert "priv_err_db" in clickhouse_node.query("SHOW DATABASES") assert "test_table_1" not in clickhouse_node.query("SHOW TABLES FROM priv_err_db") - clickhouse_node.query_with_retry("DETACH DATABASE priv_err_db") + clickhouse_node.query_with_retry("DETACH DATABASE priv_err_db SYNC") mysql_node.query("REVOKE SELECT ON priv_err_db.* FROM 'test'@'%'") time.sleep(3) @@ -1442,7 +1442,7 @@ def mysql_kill_sync_thread_restore_test(clickhouse_node, mysql_node, service_nam time.sleep(sleep_time) clickhouse_node.query("SELECT * FROM test_database.test_table") - clickhouse_node.query_with_retry("DETACH DATABASE test_database") + clickhouse_node.query_with_retry("DETACH DATABASE test_database SYNC") clickhouse_node.query("ATTACH DATABASE test_database") check_query( clickhouse_node, @@ -1506,7 +1506,7 @@ def mysql_killed_while_insert(clickhouse_node, mysql_node, service_name): mysql_node.alloc_connection() - clickhouse_node.query_with_retry("DETACH DATABASE kill_mysql_while_insert") + clickhouse_node.query_with_retry("DETACH DATABASE kill_mysql_while_insert SYNC") clickhouse_node.query("ATTACH DATABASE kill_mysql_while_insert") result = mysql_node.query_and_get_data( @@ -1671,22 +1671,24 @@ def utf8mb4_name_test(clickhouse_node, mysql_node, service_name): f"CREATE TABLE `{db}`.`{table}` (id INT(11) NOT NULL PRIMARY KEY, `{table}` DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4" ) mysql_node.query(f"INSERT INTO `{db}`.`{table}` VALUES(1, now())") + mysql_node.query(f"INSERT INTO `{db}`.`{table}`(id, `{table}`) VALUES(2, now())") mysql_node.query( f"CREATE TABLE {db}.{table}_unquoted (id INT(11) NOT NULL PRIMARY KEY, {table} DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4" ) mysql_node.query(f"INSERT INTO {db}.{table}_unquoted VALUES(1, now())") + mysql_node.query(f"INSERT INTO {db}.{table}_unquoted(id, {table}) VALUES(2, now())") clickhouse_node.query( f"CREATE DATABASE `{db}` ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')" ) check_query( clickhouse_node, - f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}`", - "1\n", + f"/* expect: 2 */ SELECT COUNT() FROM `{db}`.`{table}`", + "2\n", ) check_query( clickhouse_node, - f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}_unquoted`", - "1\n", + f"/* expect: 2 */ SELECT COUNT() FROM `{db}`.`{table}_unquoted`", + "2\n", ) # Inc sync @@ -1694,20 +1696,24 @@ def utf8mb4_name_test(clickhouse_node, mysql_node, service_name): f"CREATE TABLE `{db}`.`{table}2` (id INT(11) NOT NULL PRIMARY KEY, `{table}` DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4" ) mysql_node.query(f"INSERT INTO `{db}`.`{table}2` VALUES(1, now())") + mysql_node.query(f"INSERT INTO `{db}`.`{table}2`(id, `{table}`) VALUES(2, now())") check_query( clickhouse_node, - f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}2`", - "1\n", + f"/* expect: 2 */ SELECT COUNT() FROM `{db}`.`{table}2`", + "2\n", ) mysql_node.query( f"CREATE TABLE {db}.{table}2_unquoted (id INT(11) NOT NULL PRIMARY KEY, {table} DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4" ) mysql_node.query(f"INSERT INTO {db}.{table}2_unquoted VALUES(1, now())") + mysql_node.query( + f"INSERT INTO {db}.{table}2_unquoted(id, {table}) VALUES(2, now())" + ) check_query( clickhouse_node, - f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}2_unquoted`", - "1\n", + f"/* expect: 2 */ SELECT COUNT() FROM `{db}`.`{table}2_unquoted`", + "2\n", ) clickhouse_node.query(f"DROP DATABASE IF EXISTS `{db}`") @@ -2586,6 +2592,20 @@ def named_collections(clickhouse_node, mysql_node, service_name): f"/* expect: (1, 'a', 1), (2, 'b', 2) */ SELECT * FROM {db}.t1", "1\ta\t1\n2\tb\t2\n", ) + clickhouse_node.query(f"ALTER NAMED COLLECTION {db} SET port=9999") + clickhouse_node.query_with_retry(f"DETACH DATABASE {db} SYNC") + mysql_node.query(f"INSERT INTO {db}.t1 VALUES (3, 'c', 3)") + assert "ConnectionFailed:" in clickhouse_node.query_and_get_error( + f"ATTACH DATABASE {db}" + ) + clickhouse_node.query(f"ALTER NAMED COLLECTION {db} SET port=3306") + clickhouse_node.query(f"ATTACH DATABASE {db}") + check_query( + clickhouse_node, + f"/* expect: (1, 'a', 1), (2, 'b', 2), (3, 'c', 3) */ SELECT * FROM {db}.t1", + "1\ta\t1\n2\tb\t2\n3\tc\t3\n", + ) + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") mysql_node.query(f"DROP DATABASE IF EXISTS {db}") @@ -2617,3 +2637,37 @@ def create_table_as_select(clickhouse_node, mysql_node, service_name): clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + + +def table_with_indexes(clickhouse_node, mysql_node, service_name): + db = "table_with_indexes" + mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") + mysql_node.query(f"CREATE DATABASE {db}") + + mysql_node.query( + f"CREATE TABLE {db}.t1(id INT NOT NULL PRIMARY KEY," + f"data varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL) ENGINE = InnoDB" + ) + + mysql_node.query(f"INSERT INTO {db}.t1 VALUES(1, 'some test string 1')") + mysql_node.query(f"INSERT INTO {db}.t1 VALUES(2, 'some test string 2')") + + clickhouse_node.query( + f""" + CREATE DATABASE {db} ENGINE = MaterializeMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse') + TABLE OVERRIDE t1 (COLUMNS ( + INDEX data_idx data TYPE ngrambf_v1(5, 65536, 4, 0) GRANULARITY 1 + )) + """ + ) + + check_query( + clickhouse_node, + "SELECT data_uncompressed_bytes FROM system.data_skipping_indices WHERE " + "database = 'table_with_indexes' and table = 't1' and name = 'data_idx'", + "65536\n", + ) + + mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py index 1fd09f733f0..64c4fb45e9b 100644 --- a/tests/integration/test_materialized_mysql_database/test.py +++ b/tests/integration/test_materialized_mysql_database/test.py @@ -1,17 +1,11 @@ -import os -import os.path as p import time -import pwd -import re import pymysql.cursors import pytest from helpers.cluster import ( ClickHouseCluster, ClickHouseInstance, get_docker_compose_path, - run_and_check, ) -import docker import logging from . import materialized_with_ddl @@ -63,8 +57,6 @@ class MySQLConnection: user="root", password="clickhouse", ip_address=None, - docker_compose=None, - project_name=cluster.project_name, ): self.user = user self.port = port @@ -554,3 +546,9 @@ def test_create_table_as_select(started_cluster, started_mysql_8_0, clickhouse_n materialized_with_ddl.create_table_as_select( clickhouse_node, started_mysql_8_0, "mysql80" ) + + +def test_table_with_indexes(started_cluster, started_mysql_8_0, clickhouse_node): + materialized_with_ddl.table_with_indexes( + clickhouse_node, started_mysql_8_0, "mysql80" + ) diff --git a/tests/integration/test_merge_tree_azure_blob_storage/test.py b/tests/integration/test_merge_tree_azure_blob_storage/test.py index 86b70f8db70..121b9b294f0 100644 --- a/tests/integration/test_merge_tree_azure_blob_storage/test.py +++ b/tests/integration/test_merge_tree_azure_blob_storage/test.py @@ -7,6 +7,7 @@ import pytest from helpers.cluster import ClickHouseCluster from helpers.utility import generate_values, replace_config, SafeThread from azure.storage.blob import BlobServiceClient +from test_storage_azure_blob_storage.test import azure_query SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -38,27 +39,10 @@ def cluster(): cluster.shutdown() -# Note: use this for selects and inserts and create table queries. +# Note: use azure_query for selects and inserts and create table queries. # For inserts there is no guarantee that retries will not result in duplicates. -# But it is better to retry anyway because 'Connection was closed by the server' error +# But it is better to retry anyway because connection related errors # happens in fact only for inserts because reads already have build-in retries in code. -def azure_query(node, query, try_num=3, settings={}): - for i in range(try_num): - try: - return node.query(query, settings=settings) - except Exception as ex: - retriable_errors = [ - "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response" - ] - retry = False - for error in retriable_errors: - if error in str(ex): - retry = True - logging.info(f"Try num: {i}. Having retriable error: {ex}") - break - if not retry or i == try_num - 1: - raise Exception(ex) - continue def create_table(node, table_name, **additional_settings): diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index d7c267eed50..507f25209a4 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -855,6 +855,11 @@ def test_s3_engine_heavy_write_check_mem( memory = in_flight_memory[1] node = cluster.instances[node_name] + + # it's bad idea to test something related to memory with sanitizers + if node.is_built_with_sanitizer(): + pytest.skip("Disabled for sanitizers") + node.query("DROP TABLE IF EXISTS s3_test SYNC") node.query( "CREATE TABLE s3_test" @@ -887,7 +892,7 @@ def test_s3_engine_heavy_write_check_mem( assert int(memory_usage) < 1.2 * memory assert int(memory_usage) > 0.8 * memory - assert int(wait_inflight) > 10 * 1000 * 1000 + assert int(wait_inflight) > in_flight * 1000 * 1000 check_no_objects_after_drop(cluster, node_name=node_name) diff --git a/tests/integration/test_storage_postgresql_replica/__init__.py b/tests/integration/test_multi_access_storage_role_management/__init__.py similarity index 100% rename from tests/integration/test_storage_postgresql_replica/__init__.py rename to tests/integration/test_multi_access_storage_role_management/__init__.py diff --git a/tests/integration/test_multi_access_storage_role_management/configs/memory.xml b/tests/integration/test_multi_access_storage_role_management/configs/memory.xml new file mode 100644 index 00000000000..c2f60c8bbc1 --- /dev/null +++ b/tests/integration/test_multi_access_storage_role_management/configs/memory.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/tests/integration/test_multi_access_storage_role_management/configs/roles.xml b/tests/integration/test_multi_access_storage_role_management/configs/roles.xml new file mode 100644 index 00000000000..faaf1ee27ae --- /dev/null +++ b/tests/integration/test_multi_access_storage_role_management/configs/roles.xml @@ -0,0 +1,9 @@ + + + + + GRANT ALL ON *.* WITH GRANT OPTION + + + + diff --git a/tests/integration/test_multi_access_storage_role_management/test.py b/tests/integration/test_multi_access_storage_role_management/test.py new file mode 100644 index 00000000000..77aea411394 --- /dev/null +++ b/tests/integration/test_multi_access_storage_role_management/test.py @@ -0,0 +1,153 @@ +import pytest +import os +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV +from helpers.client import QueryRuntimeException + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", stay_alive=True, main_configs=["configs/memory.xml"] +) + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + node.query("CREATE DATABASE mydb") + node.query("CREATE TABLE mydb.table1(x UInt32) ENGINE=Log") + + node.query("CREATE USER test_user") + node.query("CREATE USER test_user2") + yield cluster + + finally: + cluster.shutdown() + + +def execute_test_for_access_type(access_type: str, system_table_name: str): + node.query(f"CREATE {access_type} test1 IN local_directory") + node.query(f"CREATE {access_type} test2 IN local_directory") + node.query(f"CREATE {access_type} test3 IN local_directory") + + node.query(f"CREATE {access_type} test4 IN memory") + node.query(f"CREATE {access_type} test5 IN memory") + node.query(f"CREATE {access_type} test6 IN memory") + + # Already exists + with pytest.raises(QueryRuntimeException): + node.query(f"CREATE {access_type} test1 IN memory") + + node.query(f"MOVE {access_type} test1 TO memory") + assert node.query( + f"SELECT storage FROM system.{system_table_name} WHERE name = 'test1'" + ) == TSV(["memory"]) + + node.query(f"MOVE {access_type} test2 TO local_directory") + assert node.query( + f"SELECT storage FROM system.{system_table_name} WHERE name = 'test2'" + ) == TSV(["local_directory"]) + + node.query(f"MOVE {access_type} test2,test3 TO memory") + assert node.query( + f"SELECT storage FROM system.{system_table_name} WHERE name = 'test2'" + ) == TSV(["memory"]) + assert node.query( + f"SELECT storage FROM system.{system_table_name} WHERE name = 'test3'" + ) == TSV(["memory"]) + + node.query(f"MOVE {access_type} test4,test5 TO local_directory") + + # Different storages + with pytest.raises(QueryRuntimeException): + node.query(f"MOVE {access_type} test4,test1 TO memory") + + # Doesn't exist + with pytest.raises(QueryRuntimeException): + node.query(f"MOVE {access_type} test7 TO local_directory") + + # Storage doesn't exist + with pytest.raises(QueryRuntimeException): + node.query(f"MOVE {access_type} test6 TO non_existing_storage") + + # Unwriteable storage + with pytest.raises(QueryRuntimeException): + node.query(f"MOVE {access_type} test6 TO users_xml") + + node.query(f"DROP {access_type} test1") + node.query(f"DROP {access_type} test2") + node.query(f"DROP {access_type} test3") + node.query(f"DROP {access_type} test4") + node.query(f"DROP {access_type} test5") + node.query(f"DROP {access_type} test6") + + +def test_roles(): + execute_test_for_access_type("ROLE", "roles") + + +def test_users(): + execute_test_for_access_type("USER", "users") + + +def test_settings_profiles(): + execute_test_for_access_type("SETTINGS PROFILE", "settings_profiles") + + +def test_quotas(): + execute_test_for_access_type("QUOTA", "quotas") + + +def test_role_from_different_storages(): + node.query("CREATE ROLE default_role") + node.query("GRANT SELECT ON system.* TO default_role") + + assert node.query("SHOW GRANTS FOR default_role") == TSV( + ["GRANT SELECT ON system.* TO default_role"] + ) + assert node.query("SHOW ROLES") == TSV(["default_role"]) + + node.query("GRANT default_role TO test_user") + + node.copy_file_to_container( + os.path.join(SCRIPT_DIR, "configs/roles.xml"), + "/etc/clickhouse-server/users.d/roles.xml", + ) + + node.restart_clickhouse() + + assert node.query("SELECT name, storage FROM system.roles") == TSV( + [["default_role", "users_xml"], ["default_role", "local_directory"]] + ) + + # Role from users.xml will have priority + assert node.query("SHOW GRANTS FOR default_role") == TSV( + ["GRANT ALL ON *.* TO default_role WITH GRANT OPTION"] + ) + + node.query("GRANT default_role TO test_user") + node.query("GRANT default_role TO test_user2") + assert node.query( + "SELECT granted_role_id FROM system.role_grants WHERE user_name = 'test_user2'" + ) == TSV( + [ + "62bedbf3-7fb1-94cb-3a35-e479693223b3" + ] # roles from users.xml have deterministic ids + ) + + node.query("DROP ROLE default_role FROM local_directory") + assert node.query( + "SELECT granted_role_id FROM system.role_grants WHERE user_name = 'test_user'" + ) == TSV(["62bedbf3-7fb1-94cb-3a35-e479693223b3"]) + + # Already exists + with pytest.raises(QueryRuntimeException): + node.query("CREATE ROLE default_role IN memory") + + node.query("CREATE ROLE other_role IN memory") + + assert node.query( + "SELECT storage FROM system.roles WHERE name = 'other_role'" + ) == TSV(["memory"]) diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index 17621d09422..30669feb6b3 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -1166,152 +1166,6 @@ def produce_alter_move(node, name): pass -@pytest.mark.parametrize( - "name,engine", - [ - pytest.param("concurrently_altering_mt", "MergeTree()", id="mt"), - pytest.param( - "concurrently_altering_replicated_mt", - "ReplicatedMergeTree('/clickhouse/concurrently_altering_replicated_mt', '1')", - id="replicated", - ), - ], -) -def test_concurrent_alter_move(start_cluster, name, engine): - try: - node1.query_with_retry( - """ - CREATE TABLE IF NOT EXISTS {name} ( - EventDate Date, - number UInt64 - ) ENGINE = {engine} - ORDER BY tuple() - PARTITION BY toYYYYMM(EventDate) - SETTINGS storage_policy='jbods_with_external' - """.format( - name=name, engine=engine - ) - ) - - values = list({random.randint(1, 1000000) for _ in range(0, 1000)}) - - def insert(num): - for i in range(num): - day = random.randint(11, 30) - value = values.pop() - month = "0" + str(random.choice([3, 4])) - node1.query_with_retry( - "INSERT INTO {} VALUES(toDate('2019-{m}-{d}'), {v})".format( - name, m=month, d=day, v=value - ) - ) - - def alter_move(num): - for i in range(num): - produce_alter_move(node1, name) - - def alter_update(num): - for i in range(num): - node1.query( - "ALTER TABLE {} UPDATE number = number + 1 WHERE 1".format(name) - ) - - def optimize_table(num): - for i in range(num): - node1.query_with_retry("OPTIMIZE TABLE {} FINAL".format(name)) - - p = Pool(15) - tasks = [] - for i in range(5): - tasks.append(p.apply_async(insert, (100,))) - tasks.append(p.apply_async(alter_move, (100,))) - tasks.append(p.apply_async(alter_update, (100,))) - tasks.append(p.apply_async(optimize_table, (100,))) - - for task in tasks: - task.get(timeout=240) - - assert node1.query("SELECT 1") == "1\n" - assert node1.query("SELECT COUNT() FROM {}".format(name)) == "500\n" - finally: - node1.query(f"DROP TABLE IF EXISTS {name} SYNC") - - -@pytest.mark.parametrize( - "name,engine", - [ - pytest.param("concurrently_dropping_mt", "MergeTree()", id="mt"), - pytest.param( - "concurrently_dropping_replicated_mt", - "ReplicatedMergeTree('/clickhouse/concurrently_dropping_replicated_mt', '1')", - id="replicated", - ), - ], -) -def test_concurrent_alter_move_and_drop(start_cluster, name, engine): - try: - node1.query( - """ - CREATE TABLE IF NOT EXISTS {name} ( - EventDate Date, - number UInt64 - ) ENGINE = {engine} - ORDER BY tuple() - PARTITION BY toYYYYMM(EventDate) - SETTINGS storage_policy='jbods_with_external' - """.format( - name=name, engine=engine - ) - ) - - values = list({random.randint(1, 1000000) for _ in range(0, 1000)}) - - def insert(num): - for i in range(num): - day = random.randint(11, 30) - value = values.pop() - month = "0" + str(random.choice([3, 4])) - node1.query_with_retry( - "INSERT INTO {} VALUES(toDate('2019-{m}-{d}'), {v})".format( - name, m=month, d=day, v=value - ) - ) - - def alter_move(num): - for i in range(num): - produce_alter_move(node1, name) - - def alter_drop(num): - for i in range(num): - partition = random.choice([201903, 201904]) - op = random.choice(["drop", "detach"]) - try: - node1.query( - "ALTER TABLE {} {} PARTITION {}".format(name, op, partition) - ) - except QueryRuntimeException as e: - if "Code: 650" in e.stderr: - pass - else: - raise e - - insert(20) - p = Pool(15) - tasks = [] - for i in range(5): - tasks.append(p.apply_async(insert, (20,))) - tasks.append(p.apply_async(alter_move, (20,))) - tasks.append(p.apply_async(alter_drop, (20,))) - - for task in tasks: - task.get(timeout=120) - - assert node1.query("SELECT 1") == "1\n" - - finally: - node1.query_with_retry(f"DROP TABLE IF EXISTS {name} SYNC") - - @pytest.mark.parametrize( "name,engine", [ diff --git a/tests/integration/test_mutations_with_merge_tree/test.py b/tests/integration/test_mutations_with_merge_tree/test.py index e7d7abc8c7c..0b4a750b38e 100644 --- a/tests/integration/test_mutations_with_merge_tree/test.py +++ b/tests/integration/test_mutations_with_merge_tree/test.py @@ -27,111 +27,6 @@ def started_cluster(): cluster.shutdown() -@pytest.mark.skip(reason="Skipping mutations in partition does not work") -def test_mutations_in_partition_background(started_cluster): - try: - numbers = 100 - - name = "test_mutations_in_partition" - instance_test_mutations.query( - f"""CREATE TABLE {name} (date Date, a UInt64, b String) ENGINE = MergeTree() ORDER BY tuple() PARTITION BY a""" - ) - instance_test_mutations.query( - f"""INSERT INTO {name} SELECT '2019-07-29' AS date, number, toString(number) FROM numbers({numbers})""" - ) - - for i in range(0, numbers, 3): - instance_test_mutations.query( - f"""ALTER TABLE {name} DELETE IN PARTITION {i} WHERE a = {i}""" - ) - - for i in range(1, numbers, 3): - instance_test_mutations.query( - f"""ALTER TABLE {name} UPDATE b = 'changed' IN PARTITION {i} WHERE a = {i} """ - ) - - def count_and_changed(): - return instance_test_mutations.query( - f"SELECT count(), countIf(b == 'changed') FROM {name} SETTINGS force_index_by_date = 0, force_primary_key = 0 FORMAT CSV" - ).splitlines() - - all_done = False - for wait_times_for_mutation in range( - 100 - ): # wait for replication 80 seconds max - time.sleep(0.8) - - if count_and_changed() == ["66,33"]: - all_done = True - break - - print( - instance_test_mutations.query( - f"SELECT mutation_id, command, parts_to_do, is_done, latest_failed_part, latest_fail_reason, parts_to_do_names FROM system.mutations WHERE table = '{name}' SETTINGS force_index_by_date = 0, force_primary_key = 0 FORMAT TSVWithNames" - ) - ) - - assert (count_and_changed(), all_done) == (["66,33"], True) - assert instance_test_mutations.query( - f"SELECT count(), sum(is_done) FROM system.mutations WHERE table = '{name}' SETTINGS force_index_by_date = 0, force_primary_key = 0 FORMAT CSV" - ).splitlines() == ["67,67"] - - finally: - instance_test_mutations.query(f"""DROP TABLE {name}""") - - -@pytest.mark.skip(reason="Skipping mutations in partition does not work") -@pytest.mark.parametrize("sync", [("last",), ("all",)]) -def test_mutations_in_partition_sync(started_cluster, sync): - try: - numbers = 10 - - name = "test_mutations_in_partition_sync" - instance_test_mutations.query( - f"""CREATE TABLE {name} (date Date, a UInt64, b String) ENGINE = MergeTree() ORDER BY tuple() PARTITION BY a""" - ) - instance_test_mutations.query( - f"""INSERT INTO {name} SELECT '2019-07-29' AS date, number, toString(number) FROM numbers({numbers})""" - ) - - for i in range(0, numbers, 3): - instance_test_mutations.query( - f"""ALTER TABLE {name} DELETE IN PARTITION {i} WHERE a = {i}""" - + (" SETTINGS mutations_sync = 1" if sync == "all" else "") - ) - - for reverse_index, i in reversed( - list(enumerate(reversed(range(1, numbers, 3)))) - ): - instance_test_mutations.query( - f"""ALTER TABLE {name} UPDATE b = 'changed' IN PARTITION {i} WHERE a = {i}""" - + ( - " SETTINGS mutations_sync = 1" - if not reverse_index or sync == "all" - else "" - ) - ) - - def count_and_changed(): - return instance_test_mutations.query( - f"SELECT count(), countIf(b == 'changed') FROM {name} SETTINGS force_index_by_date = 0, force_primary_key = 0 FORMAT CSV" - ).splitlines() - - print( - instance_test_mutations.query( - f"SELECT mutation_id, command, parts_to_do, is_done, latest_failed_part, latest_fail_reason FROM system.mutations WHERE table = '{name}' SETTINGS force_index_by_date = 0, force_primary_key = 0 FORMAT TSVWithNames" - ) - ) - - assert count_and_changed() == ["6,3"] - assert instance_test_mutations.query( - f"SELECT count(), sum(is_done) FROM system.mutations WHERE table = '{name}' SETTINGS force_index_by_date = 0, force_primary_key = 0 FORMAT CSV" - ).splitlines() == ["7,7"] - - finally: - instance_test_mutations.query(f"""DROP TABLE {name}""") - - def test_mutations_with_merge_background_task(started_cluster): instance_test_mutations.query( """SYSTEM STOP MERGES test_mutations_with_ast_elements""" @@ -190,92 +85,3 @@ def test_mutations_with_truncate_table(started_cluster): ).rstrip() == "0" ) - - -@pytest.mark.skip(reason="Skipping mutations in partition does not work") -def test_mutations_will_not_hang_for_non_existing_parts_sync(started_cluster): - try: - numbers = 100 - - name = "test_mutations_will_not_hang_for_non_existing_parts_sync" - instance_test_mutations.query( - f"""CREATE TABLE {name} (date Date, a UInt64, b String) ENGINE = MergeTree() ORDER BY tuple() PARTITION BY a""" - ) - instance_test_mutations.query( - f"""INSERT INTO {name} SELECT '2019-07-29' AS date, number, toString(number) FROM numbers({numbers})""" - ) - - for i in range(0, numbers, 3): - instance_test_mutations.query( - f"""ALTER TABLE {name} DELETE IN PARTITION {i+1000} WHERE a = {i} SETTINGS mutations_sync = 1""" - ) - - def count(): - return instance_test_mutations.query( - f"SELECT count() FROM {name} SETTINGS force_index_by_date = 0, force_primary_key = 0 FORMAT CSV" - ).splitlines() - - print( - instance_test_mutations.query( - f"SELECT mutation_id, command, parts_to_do, is_done, latest_failed_part, latest_fail_reason, parts_to_do_names FROM system.mutations WHERE table = '{name}' SETTINGS force_index_by_date = 0, force_primary_key = 0 FORMAT TSVWithNames" - ) - ) - - assert count() == [f"{numbers}"] - assert instance_test_mutations.query( - f"SELECT count(), sum(is_done) FROM system.mutations WHERE table = '{name}' SETTINGS force_index_by_date = 0, force_primary_key = 0 FORMAT CSV" - ).splitlines() == [f"34,34"] - - finally: - instance_test_mutations.query(f"""DROP TABLE {name}""") - - -@pytest.mark.skip(reason="Skipping mutations in partition does not work") -def test_mutations_will_not_hang_for_non_existing_parts_async(started_cluster): - try: - numbers = 100 - - name = "test_mutations_will_not_hang_for_non_existing_parts_async" - instance_test_mutations.query( - f"""CREATE TABLE {name} (date Date, a UInt64, b String) ENGINE = MergeTree() ORDER BY tuple() PARTITION BY a""" - ) - instance_test_mutations.query( - f"""INSERT INTO {name} SELECT '2019-07-29' AS date, number, toString(number) FROM numbers({numbers})""" - ) - - for i in range(0, numbers, 3): - instance_test_mutations.query( - f"""ALTER TABLE {name} DELETE IN PARTITION {i+1000} WHERE a = {i}""" - ) - - def count(): - return instance_test_mutations.query( - f"SELECT count() FROM {name} SETTINGS force_index_by_date = 0, force_primary_key = 0 FORMAT CSV" - ).splitlines() - - def count_and_sum_is_done(): - return instance_test_mutations.query( - f"SELECT count(), sum(is_done) FROM system.mutations WHERE table = '{name}' SETTINGS force_index_by_date = 0, force_primary_key = 0 FORMAT CSV" - ).splitlines() - - all_done = False - for wait_times_for_mutation in range( - 100 - ): # wait for replication 80 seconds max - time.sleep(0.8) - - if count_and_sum_is_done() == ["34,34"]: - all_done = True - break - - print( - instance_test_mutations.query( - f"SELECT mutation_id, command, parts_to_do, is_done, latest_failed_part, latest_fail_reason, parts_to_do_names FROM system.mutations WHERE table = '{name}' SETTINGS force_index_by_date = 0, force_primary_key = 0 FORMAT TSVWithNames" - ) - ) - - assert count() == [f"{numbers}"] - assert count_and_sum_is_done() == ["34,34"] - - finally: - instance_test_mutations.query(f"""DROP TABLE {name}""") diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py index 18dde5307fd..00b5eb9e8aa 100644 --- a/tests/integration/test_mysql_database_engine/test.py +++ b/tests/integration/test_mysql_database_engine/test.py @@ -1021,3 +1021,24 @@ def test_memory_leak(started_cluster): clickhouse_node.query("DROP DATABASE test_database") clickhouse_node.restart_clickhouse() + + +def test_password_leak(started_cluster): + with contextlib.closing( + MySQLNodeInstance( + "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port + ) + ) as mysql_node: + mysql_node.query("DROP DATABASE IF EXISTS test_database") + mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'") + mysql_node.query( + "CREATE TABLE `test_database`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;" + ) + + clickhouse_node.query("DROP DATABASE IF EXISTS test_database") + clickhouse_node.query( + "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse') SETTINGS connection_auto_close = 1" + ) + assert "clickhouse" not in clickhouse_node.query( + "SHOW CREATE test_database.test_table" + ) diff --git a/tests/integration/test_mysql_protocol/golang.reference b/tests/integration/test_mysql_protocol/golang.reference index 4069b2a086a..7680c8c74dc 100644 --- a/tests/integration/test_mysql_protocol/golang.reference +++ b/tests/integration/test_mysql_protocol/golang.reference @@ -14,6 +14,7 @@ a UNSIGNED TINYINT Result: tables 1 tables 1 +tables 1 Columns: a b diff --git a/tests/integration/test_mysql_protocol/test.py b/tests/integration/test_mysql_protocol/test.py index 5e03c4883b4..a5c82e1a2c6 100644 --- a/tests/integration/test_mysql_protocol/test.py +++ b/tests/integration/test_mysql_protocol/test.py @@ -207,7 +207,7 @@ def test_mysql_client(started_cluster): expected_msg = "\n".join( [ "mysql: [Warning] Using a password on the command line interface can be insecure.", - "ERROR 81 (00000) at line 1: Code: 81. DB::Exception: Database system2 doesn't exist", + "ERROR 81 (00000) at line 1: Code: 81. DB::Exception: Database system2 does not exist", ] ) assert stderr[: len(expected_msg)].decode() == expected_msg @@ -371,6 +371,22 @@ def test_mysql_replacement_query(started_cluster): "database()\ndefault\n", ] + # SELECT SCHEMA(). + code, (stdout, stderr) = started_cluster.mysql_client_container.exec_run( + """ + mysql --protocol tcp -h {host} -P {port} default -u default + --password=123 -e "select schema();" + """.format( + host=started_cluster.get_instance_ip("node"), port=server_port + ), + demux=True, + ) + assert code == 0 + assert stdout.decode().lower() in [ + "currentdatabase()\ndefault\n", + "schema()\ndefault\n", + ] + code, (stdout, stderr) = started_cluster.mysql_client_container.exec_run( """ mysql --protocol tcp -h {host} -P {port} default -u default @@ -621,7 +637,7 @@ def test_python_client(started_cluster): client.select_db("system2") assert exc_info.value.args[1].startswith( - "Code: 81. DB::Exception: Database system2 doesn't exist" + "Code: 81. DB::Exception: Database system2 does not exist" ), exc_info.value.args[1] cursor = client.cursor(pymysql.cursors.DictCursor) @@ -646,7 +662,7 @@ def test_golang_client(started_cluster, golang_container): ) assert code == 1 - assert stderr.decode() == "Error 81: Database abc doesn't exist\n" + assert stderr.decode() == "Error 81: Database abc does not exist\n" code, (stdout, stderr) = golang_container.exec_run( "./main --host {host} --port {port} --user default --password 123 --database " @@ -679,7 +695,7 @@ def test_php_client(started_cluster, php_container): demux=True, ) assert code == 0 - assert stdout.decode() == "tables\ntables\n" + assert stdout.decode() == "tables\ntables\ntables\n" code, (stdout, stderr) = php_container.exec_run( "php -f test_ssl.php {host} {port} default 123".format( @@ -688,7 +704,7 @@ def test_php_client(started_cluster, php_container): demux=True, ) assert code == 0 - assert stdout.decode() == "tables\ntables\n" + assert stdout.decode() == "tables\ntables\ntables\n" code, (stdout, stderr) = php_container.exec_run( "php -f test.php {host} {port} user_with_double_sha1 abacaba".format( @@ -697,7 +713,7 @@ def test_php_client(started_cluster, php_container): demux=True, ) assert code == 0 - assert stdout.decode() == "tables\ntables\n" + assert stdout.decode() == "tables\ntables\ntables\n" code, (stdout, stderr) = php_container.exec_run( "php -f test_ssl.php {host} {port} user_with_double_sha1 abacaba".format( @@ -706,7 +722,7 @@ def test_php_client(started_cluster, php_container): demux=True, ) assert code == 0 - assert stdout.decode() == "tables\ntables\n" + assert stdout.decode() == "tables\ntables\ntables\n" def test_mysqljs_client(started_cluster, nodejs_container): diff --git a/tests/integration/test_named_collections/test.py b/tests/integration/test_named_collections/test.py index 5f5657fad54..cbb8c94c701 100644 --- a/tests/integration/test_named_collections/test.py +++ b/tests/integration/test_named_collections/test.py @@ -344,7 +344,7 @@ def test_granular_access_create_alter_drop_query(cluster): ) assert ( - "DB::Exception: kek: Not enough privileges. To execute this query it's necessary to have grant CREATE NAMED COLLECTION" + "DB::Exception: kek: Not enough privileges. To execute this query, it's necessary to have the grant CREATE NAMED COLLECTION" in node.query_and_get_error( "CREATE NAMED COLLECTION collection2 AS key1=1, key2='value2'", user="kek" ) @@ -370,7 +370,7 @@ def test_granular_access_create_alter_drop_query(cluster): ) assert ( - "DB::Exception: kek: Not enough privileges. To execute this query it's necessary to have grant ALTER NAMED COLLECTION" + "DB::Exception: kek: Not enough privileges. To execute this query, it's necessary to have the grant ALTER NAMED COLLECTION" in node.query_and_get_error( "ALTER NAMED COLLECTION collection2 SET key1=2", user="kek" ) @@ -385,14 +385,14 @@ def test_granular_access_create_alter_drop_query(cluster): ) node.query("REVOKE alter named collection ON collection2 FROM kek") assert ( - "DB::Exception: kek: Not enough privileges. To execute this query it's necessary to have grant ALTER NAMED COLLECTION" + "DB::Exception: kek: Not enough privileges. To execute this query, it's necessary to have the grant ALTER NAMED COLLECTION" in node.query_and_get_error( "ALTER NAMED COLLECTION collection2 SET key1=3", user="kek" ) ) assert ( - "DB::Exception: kek: Not enough privileges. To execute this query it's necessary to have grant DROP NAMED COLLECTION" + "DB::Exception: kek: Not enough privileges. To execute this query, it's necessary to have the grant DROP NAMED COLLECTION" in node.query_and_get_error("DROP NAMED COLLECTION collection2", user="kek") ) node.query("GRANT drop named collection ON collection2 TO kek") diff --git a/tests/queries/0_stateless/02726_set_allow_experimental_query_cache_as_obsolete.reference b/tests/integration/test_named_collections_if_exists_on_cluster/__init__.py similarity index 100% rename from tests/queries/0_stateless/02726_set_allow_experimental_query_cache_as_obsolete.reference rename to tests/integration/test_named_collections_if_exists_on_cluster/__init__.py diff --git a/tests/integration/test_named_collections_if_exists_on_cluster/configs/config.d/cluster.xml b/tests/integration/test_named_collections_if_exists_on_cluster/configs/config.d/cluster.xml new file mode 100644 index 00000000000..95336350c1e --- /dev/null +++ b/tests/integration/test_named_collections_if_exists_on_cluster/configs/config.d/cluster.xml @@ -0,0 +1,22 @@ + + + + + true + + clickhouse1 + 9000 + + + clickhouse2 + 9000 + + + clickhouse3 + 9000 + + + true + + + diff --git a/tests/integration/test_named_collections_if_exists_on_cluster/configs/users.d/default.xml b/tests/integration/test_named_collections_if_exists_on_cluster/configs/users.d/default.xml new file mode 100644 index 00000000000..15da914f666 --- /dev/null +++ b/tests/integration/test_named_collections_if_exists_on_cluster/configs/users.d/default.xml @@ -0,0 +1,12 @@ + + + + + default + default + 1 + 1 + 1 + + + diff --git a/tests/integration/test_named_collections_if_exists_on_cluster/test.py b/tests/integration/test_named_collections_if_exists_on_cluster/test.py new file mode 100644 index 00000000000..5f5be9156b9 --- /dev/null +++ b/tests/integration/test_named_collections_if_exists_on_cluster/test.py @@ -0,0 +1,148 @@ +""" +Test cases: + +--- execute on the first node +create named collection foobar as a=1, b=2; +create named collection if not exists foobar on cluster '{cluster}' as a=1, b=2, c=3; +create named collection collection_present_on_first_node as a=1, b=2, s='string', x=0, y=-1; + +--- execute on any other node +alter named collection foobar on cluster '{cluster}' set a=2, c=3; +alter named collection foobar on cluster '{cluster}' delete b; +alter named collection foobar on cluster '{cluster}' set a=3 delete c; +alter named collection if exists collection_absent_ewerywhere on cluster '{cluster}' delete b; +alter named collection if exists collection_present_on_first_node on cluster '{cluster}' delete b; + +--- execute on every node +select * from system.named_collections; + +--- execute on any node +drop named collection foobar on cluster '{cluster}'; +drop named collection if exists collection_absent_ewerywhere on cluster '{cluster}'; +drop named collection if exists collection_present_on_first_node on cluster '{cluster}'; + +--- execute on every node +select * from system.named_collections; +""" + +import logging +from json import dumps, loads +from functools import partial +import pytest +from helpers.cluster import ClickHouseCluster + +dumps = partial(dumps, ensure_ascii=False) + +NODE01, NODE02, NODE03 = "clickhouse1", "clickhouse2", "clickhouse3" + +CHECK_STRING_VALUE = "Some ~`$tr!ng-_+=123@#%^&&()|?[]{}<🤡>.,\t\n:;" + +STMT_CREATE = "CREATE NAMED COLLECTION" +STMT_ALTER = "ALTER NAMED COLLECTION" +STMT_DROP = "DROP NAMED COLLECTION" + +SYSTEM_TABLE = "system.named_collections" + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + common_kwargs = dict( + main_configs=[ + "configs/config.d/cluster.xml", + ], + user_configs=[ + "configs/users.d/default.xml", + ], + with_zookeeper=True, + stay_alive=True, + ) + for name in [NODE01, NODE02, NODE03]: + cluster.add_instance(name, **common_kwargs) + + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def test_create_alter_drop_on_cluster(cluster): + """ + Executes the set of queries and checks the final named collections state. + """ + q_count_collections = f"select count() from {SYSTEM_TABLE}" + + def check_collections_empty(): + for name, node in list(cluster.instances.items()): + assert ( + "0" == node.query(q_count_collections).strip() + ), f"{SYSTEM_TABLE} is not empty on {name}" + + foobar_final_state = {"name": "foobar", "collection": {"a": "3"}} + collection_present_on_first_node_final_state = { + "name": "collection_present_on_first_node", + "collection": {"a": "1", "s": CHECK_STRING_VALUE, "x": "0", "y": "-1"}, + } + expected_state = { + NODE01: [foobar_final_state, collection_present_on_first_node_final_state], + NODE02: [foobar_final_state], + NODE03: [foobar_final_state], + } + + q_get_collections = f"select * from {SYSTEM_TABLE} order by name desc format JSON" + + def check_state(): + for name, node in list(cluster.instances.items()): + result = loads(node.query(q_get_collections))["data"] + logging.debug("%s ?= %s", dumps(result), dumps(expected_state[name])) + assert ( + expected_state[name] == result + ), f"invalid {SYSTEM_TABLE} content on {name}: {result}" + + check_collections_empty() + + # create executed on the first node + node = cluster.instances[NODE01] + node.query(f"{STMT_CREATE} foobar AS a=1, b=2") + node.query( + f"{STMT_CREATE} IF NOT EXISTS foobar ON CLUSTER 'cluster' AS a=1, b=2, c=3" + ) + node.query( + f"{STMT_CREATE} collection_present_on_first_node AS a=1, b=2, s='{CHECK_STRING_VALUE}', x=0, y=-1" + ) + + # alter executed on the second node + node = cluster.instances[NODE02] + node.query(f"{STMT_ALTER} foobar ON CLUSTER 'cluster' SET a=2, c=3") + node.query(f"{STMT_ALTER} foobar ON CLUSTER 'cluster' DELETE b") + node.query(f"{STMT_ALTER} foobar ON CLUSTER 'cluster' SET a=3 DELETE c") + node.query( + f"{STMT_ALTER} IF EXISTS collection_absent_ewerywhere ON CLUSTER 'cluster' DELETE b" + ) + node.query( + f"{STMT_ALTER} IF EXISTS collection_present_on_first_node ON CLUSTER 'cluster' DELETE b" + ) + + check_state() + for node in list(cluster.instances.values()): + node.restart_clickhouse() + check_state() + + # drop executed on the third node + node = cluster.instances[NODE03] + node.query(f"{STMT_DROP} foobar ON CLUSTER 'cluster'") + node.query( + f"{STMT_DROP} IF EXISTS collection_absent_ewerywhere ON CLUSTER 'cluster'" + ) + node.query( + f"{STMT_DROP} IF EXISTS collection_present_on_first_node ON CLUSTER 'cluster'" + ) + + check_collections_empty() + for node in list(cluster.instances.values()): + node.restart_clickhouse() + check_collections_empty() diff --git a/tests/integration/test_old_versions/test.py b/tests/integration/test_old_versions/test.py index beef294b792..aff07c53114 100644 --- a/tests/integration/test_old_versions/test.py +++ b/tests/integration/test_old_versions/test.py @@ -10,6 +10,7 @@ node18_14 = cluster.add_instance( tag="18.14.19", with_installed_binary=True, main_configs=["configs/config.d/test_cluster.xml"], + allow_analyzer=False, ) node19_1 = cluster.add_instance( "node19_1", @@ -17,6 +18,7 @@ node19_1 = cluster.add_instance( tag="19.1.16", with_installed_binary=True, main_configs=["configs/config.d/test_cluster.xml"], + allow_analyzer=False, ) node19_4 = cluster.add_instance( "node19_4", @@ -24,6 +26,7 @@ node19_4 = cluster.add_instance( tag="19.4.5.35", with_installed_binary=True, main_configs=["configs/config.d/test_cluster.xml"], + allow_analyzer=False, ) node19_8 = cluster.add_instance( "node19_8", @@ -31,6 +34,7 @@ node19_8 = cluster.add_instance( tag="19.8.3.8", with_installed_binary=True, main_configs=["configs/config.d/test_cluster.xml"], + allow_analyzer=False, ) node19_11 = cluster.add_instance( "node19_11", @@ -38,6 +42,7 @@ node19_11 = cluster.add_instance( tag="19.11.13.74", with_installed_binary=True, main_configs=["configs/config.d/test_cluster.xml"], + allow_analyzer=False, ) node19_13 = cluster.add_instance( "node19_13", @@ -45,6 +50,7 @@ node19_13 = cluster.add_instance( tag="19.13.7.57", with_installed_binary=True, main_configs=["configs/config.d/test_cluster.xml"], + allow_analyzer=False, ) node19_16 = cluster.add_instance( "node19_16", @@ -52,6 +58,7 @@ node19_16 = cluster.add_instance( tag="19.16.2.2", with_installed_binary=True, main_configs=["configs/config.d/test_cluster.xml"], + allow_analyzer=False, ) old_nodes = [node18_14, node19_1, node19_4, node19_8, node19_11, node19_13, node19_16] new_node = cluster.add_instance("node_new") diff --git a/tests/integration/test_parallel_replicas_over_distributed/__init__.py b/tests/integration/test_parallel_replicas_over_distributed/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_parallel_replicas_over_distributed/configs/remote_servers.xml b/tests/integration/test_parallel_replicas_over_distributed/configs/remote_servers.xml new file mode 100644 index 00000000000..6d7a45365f7 --- /dev/null +++ b/tests/integration/test_parallel_replicas_over_distributed/configs/remote_servers.xml @@ -0,0 +1,58 @@ + + + + + true + + n1 + 9000 + + + n2 + 9000 + + + n3 + 9000 + + + + true + + n4 + 9000 + + + n5 + 9000 + + + n6 + 9000 + + + + + + true + + n1 + 9000 + + + n2 + 9000 + + + n3 + 9000 + + + n4 + 9000 + + + + + + diff --git a/tests/integration/test_parallel_replicas_over_distributed/test.py b/tests/integration/test_parallel_replicas_over_distributed/test.py new file mode 100644 index 00000000000..6525901d366 --- /dev/null +++ b/tests/integration/test_parallel_replicas_over_distributed/test.py @@ -0,0 +1,154 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +nodes = [ + cluster.add_instance( + f"n{i}", main_configs=["configs/remote_servers.xml"], with_zookeeper=True + ) + for i in (1, 2, 3, 4, 5, 6) +] + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def create_tables(cluster, table_name): + # create replicated tables + for node in nodes: + node.query(f"DROP TABLE IF EXISTS {table_name} SYNC") + + if cluster == "test_single_shard_multiple_replicas": + nodes[0].query( + f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r1') ORDER BY (key)" + ) + nodes[1].query( + f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r2') ORDER BY (key)" + ) + nodes[2].query( + f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r3') ORDER BY (key)" + ) + nodes[3].query( + f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r4') ORDER BY (key)" + ) + elif cluster == "test_multiple_shards_multiple_replicas": + # shard 1 + nodes[0].query( + f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r1') ORDER BY (key)" + ) + nodes[1].query( + f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r2') ORDER BY (key)" + ) + nodes[2].query( + f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r3') ORDER BY (key)" + ) + # shard 2 + nodes[3].query( + f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard2/{table_name}', 'r1') ORDER BY (key)" + ) + nodes[4].query( + f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard2/{table_name}', 'r2') ORDER BY (key)" + ) + nodes[5].query( + f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard2/{table_name}', 'r3') ORDER BY (key)" + ) + else: + raise Exception(f"Unexpected cluster: {cluster}") + + # create distributed table + nodes[0].query(f"DROP TABLE IF EXISTS {table_name}_d SYNC") + nodes[0].query( + f""" + CREATE TABLE {table_name}_d AS {table_name} + Engine=Distributed( + {cluster}, + currentDatabase(), + {table_name}, + key + ) + """ + ) + + # populate data + nodes[0].query( + f"INSERT INTO {table_name}_d SELECT number, number FROM numbers(1000)", + settings={"insert_distributed_sync": 1}, + ) + nodes[0].query( + f"INSERT INTO {table_name}_d SELECT number, number FROM numbers(2000)", + settings={"insert_distributed_sync": 1}, + ) + nodes[0].query( + f"INSERT INTO {table_name}_d SELECT -number, -number FROM numbers(1000)", + settings={"insert_distributed_sync": 1}, + ) + nodes[0].query( + f"INSERT INTO {table_name}_d SELECT -number, -number FROM numbers(2000)", + settings={"insert_distributed_sync": 1}, + ) + nodes[0].query( + f"INSERT INTO {table_name}_d SELECT number, number FROM numbers(3)", + settings={"insert_distributed_sync": 1}, + ) + + +@pytest.mark.parametrize( + "cluster,max_parallel_replicas,prefer_localhost_replica", + [ + # prefer_localhost_replica=0 + pytest.param("test_single_shard_multiple_replicas", 2, 0), + pytest.param("test_single_shard_multiple_replicas", 3, 0), + pytest.param("test_single_shard_multiple_replicas", 4, 0), + pytest.param("test_single_shard_multiple_replicas", 10, 0), + # prefer_localhost_replica=1 + pytest.param("test_single_shard_multiple_replicas", 2, 1), + pytest.param("test_single_shard_multiple_replicas", 3, 1), + pytest.param("test_single_shard_multiple_replicas", 4, 1), + pytest.param("test_single_shard_multiple_replicas", 10, 1), + # prefer_localhost_replica=0 + pytest.param("test_multiple_shards_multiple_replicas", 2, 0), + pytest.param("test_multiple_shards_multiple_replicas", 3, 0), + pytest.param("test_multiple_shards_multiple_replicas", 4, 0), + pytest.param("test_multiple_shards_multiple_replicas", 10, 0), + # prefer_localhost_replica=1 + pytest.param("test_multiple_shards_multiple_replicas", 2, 1), + pytest.param("test_multiple_shards_multiple_replicas", 3, 1), + pytest.param("test_multiple_shards_multiple_replicas", 4, 1), + pytest.param("test_multiple_shards_multiple_replicas", 10, 1), + ], +) +def test_parallel_replicas_over_distributed( + start_cluster, cluster, max_parallel_replicas, prefer_localhost_replica +): + table_name = "test_table" + create_tables(cluster, table_name) + + node = nodes[0] + expected_result = f"6003\t-1999\t1999\t3\n" + + # w/o parallel replicas + assert ( + node.query(f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}_d") + == expected_result + ) + + # parallel replicas + assert ( + node.query( + f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}_d", + settings={ + "allow_experimental_parallel_reading_from_replicas": 2, + "prefer_localhost_replica": prefer_localhost_replica, + "max_parallel_replicas": max_parallel_replicas, + "use_hedged_requests": 0, + }, + ) + == expected_result + ) diff --git a/tests/integration/test_part_log_table/test.py b/tests/integration/test_part_log_table/test.py index ea4499d5877..15d65cc31dd 100644 --- a/tests/integration/test_part_log_table/test.py +++ b/tests/integration/test_part_log_table/test.py @@ -56,6 +56,23 @@ def test_config_with_standard_part_log(start_cluster): assert node2.query("SELECT * FROM system.part_log") != "" +def test_part_log_contains_partition(start_cluster): + node2.query( + "CREATE TABLE test_partition_table (date Date, word String, value UInt64) ENGINE=MergeTree() " + + "PARTITION BY toYYYYMM(date) Order by value" + ) + node2.query( + "INSERT INTO test_partition_table VALUES " + + "('2023-06-20', 'a', 10), ('2023-06-21', 'b', 11)," + + "('2023-05-20', 'cc', 14),('2023-05-21', 'd1', 15);" + ) + node2.query("SYSTEM FLUSH LOGS") + resp = node2.query( + "SELECT partition from system.part_log where table = 'test_partition_table'" + ) + assert resp == "202306\n202305\n" + + def test_config_with_non_standard_part_log(start_cluster): node3.query( "CREATE TABLE test_table(word String, value UInt64) ENGINE=MergeTree() Order by value" diff --git a/tests/integration/test_peak_memory_usage/__init__.py b/tests/integration/test_peak_memory_usage/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_peak_memory_usage/configs/remote_servers.xml b/tests/integration/test_peak_memory_usage/configs/remote_servers.xml new file mode 100644 index 00000000000..b137758489e --- /dev/null +++ b/tests/integration/test_peak_memory_usage/configs/remote_servers.xml @@ -0,0 +1,20 @@ + + + + + 1 + + shard_1 + 9000 + + + + 3 + + shard_2 + 9000 + + + + + diff --git a/tests/integration/test_peak_memory_usage/test.py b/tests/integration/test_peak_memory_usage/test.py new file mode 100644 index 00000000000..a1313461482 --- /dev/null +++ b/tests/integration/test_peak_memory_usage/test.py @@ -0,0 +1,122 @@ +import pytest +import tempfile +import re + +from helpers.cluster import ClickHouseCluster +from helpers.uclient import client, prompt + +cluster = ClickHouseCluster(__file__) + +shard_1 = cluster.add_instance( + "shard_1", + main_configs=["configs/remote_servers.xml"], + with_zookeeper=True, + macros={ + "shard": "shard_1", + }, +) +shard_2 = cluster.add_instance( + "shard_2", + main_configs=["configs/remote_servers.xml"], + with_zookeeper=True, + macros={ + "shard": "shard_2", + }, +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + shard_1.query( + "CREATE TABLE fixed_numbers ON CLUSTER 'cluster' (" + "number UInt64" + ") ENGINE=MergeTree()" + "ORDER BY number" + ) + + shard_1.query( + "CREATE TABLE fixed_numbers_2 ON CLUSTER 'cluster' (" + "number UInt64" + ") ENGINE=Memory ()" + ) + + shard_1.query( + "CREATE TABLE distributed_fixed_numbers (number UInt64) ENGINE=Distributed('cluster', 'default', 'fixed_numbers')" + ) + shard_1.query("INSERT INTO fixed_numbers SELECT number FROM numbers(0, 100)") + + shard_2.query("INSERT INTO fixed_numbers SELECT number FROM numbers(100, 200)") + + shard_1.query("INSERT INTO fixed_numbers_2 SELECT number FROM numbers(0, 10)") + + shard_2.query( + "INSERT INTO fixed_numbers_2 SELECT number FROM numbers(0, 120000)" + ) + + yield cluster + finally: + cluster.shutdown() + + +def get_memory_usage_from_client_output_and_close(client_output): + client_output.seek(0) + peek_memory_usage_str_found = False + for line in client_output: + print(f"'{line}'\n") + if not peek_memory_usage_str_found: + peek_memory_usage_str_found = "Peak memory usage" in line + + if peek_memory_usage_str_found: + search_obj = re.search(r"[+-]?[0-9]+\.[0-9]+", line) + if search_obj: + client_output.close() + print(f"peak_memory_usage {search_obj.group()}") + return search_obj.group() + + print(f"peak_memory_usage not found") + client_output.close() + return "" + + +def test_clickhouse_client_max_peak_memory_usage_distributed(started_cluster): + client_output = tempfile.TemporaryFile(mode="w+t") + command_text = ( + f"{started_cluster.get_client_cmd()} --host {shard_1.ip_address} --port 9000" + ) + with client(name="client1>", log=client_output, command=command_text) as client1: + client1.expect(prompt) + client1.send( + "SELECT COUNT(*) FROM distributed_fixed_numbers JOIN fixed_numbers_2 ON distributed_fixed_numbers.number=fixed_numbers_2.number", + ) + client1.expect("Peak memory usage", timeout=60) + client1.expect(prompt) + + peak_memory_usage = get_memory_usage_from_client_output_and_close(client_output) + assert peak_memory_usage + assert shard_2.contains_in_log( + f"Peak memory usage (for query): {peak_memory_usage}" + ) + + +def test_clickhouse_client_max_peak_memory_single_node(started_cluster): + client_output = tempfile.TemporaryFile(mode="w+t") + + command_text = ( + f"{started_cluster.get_client_cmd()} --host {shard_1.ip_address} --port 9000" + ) + with client(name="client1>", log=client_output, command=command_text) as client1: + client1.expect(prompt) + client1.send( + "SELECT COUNT(*) FROM (SELECT number FROM numbers(1,300000) INTERSECT SELECT number FROM numbers(10000,1200000))" + ) + client1.expect("Peak memory usage", timeout=60) + client1.expect(prompt) + + peak_memory_usage = get_memory_usage_from_client_output_and_close(client_output) + assert peak_memory_usage + assert shard_1.contains_in_log( + f"Peak memory usage (for query): {peak_memory_usage}" + ) diff --git a/tests/integration/test_polymorphic_parts/test.py b/tests/integration/test_polymorphic_parts/test.py index c5859146fe9..debb509de90 100644 --- a/tests/integration/test_polymorphic_parts/test.py +++ b/tests/integration/test_polymorphic_parts/test.py @@ -363,6 +363,7 @@ node7 = cluster.add_instance( tag="19.17.8.54", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) node8 = cluster.add_instance( "node8", @@ -413,54 +414,6 @@ def start_cluster_diff_versions(): cluster.shutdown() -@pytest.mark.skip(reason="compatability is temporary broken") -def test_polymorphic_parts_diff_versions(start_cluster_diff_versions): - # Check that replication with Wide parts works between different versions. - - node_old = node7 - node_new = node8 - - insert_random_data("polymorphic_table", node7, 100) - node8.query("SYSTEM SYNC REPLICA polymorphic_table", timeout=20) - - assert node8.query("SELECT count() FROM polymorphic_table") == "100\n" - assert ( - node8.query( - "SELECT DISTINCT part_type FROM system.parts WHERE table = 'polymorphic_table' and active" - ) - == "Wide\n" - ) - - -@pytest.mark.skip(reason="compatability is temporary broken") -def test_polymorphic_parts_diff_versions_2(start_cluster_diff_versions): - # Replication doesn't work on old version if part is created in compact format, because - # this version doesn't know anything about it. It's considered to be ok. - - node_old = node7 - node_new = node8 - - insert_random_data("polymorphic_table_2", node_new, 100) - - assert node_new.query("SELECT count() FROM polymorphic_table_2") == "100\n" - assert node_old.query("SELECT count() FROM polymorphic_table_2") == "0\n" - with pytest.raises(Exception): - node_old.query("SYSTEM SYNC REPLICA polymorphic_table_2", timeout=3) - - node_old.restart_with_latest_version(fix_metadata=True) - - node_old.query("SYSTEM SYNC REPLICA polymorphic_table_2", timeout=20) - - # Works after update - assert node_old.query("SELECT count() FROM polymorphic_table_2") == "100\n" - assert ( - node_old.query( - "SELECT DISTINCT part_type FROM system.parts WHERE table = 'polymorphic_table_2' and active" - ) - == "Compact\n" - ) - - def test_polymorphic_parts_non_adaptive(start_cluster): node1.query("SYSTEM STOP MERGES") node2.query("SYSTEM STOP MERGES") diff --git a/tests/integration/test_postgresql_database_engine/test.py b/tests/integration/test_postgresql_database_engine/test.py index 59a464f9020..bdf1056765e 100644 --- a/tests/integration/test_postgresql_database_engine/test.py +++ b/tests/integration/test_postgresql_database_engine/test.py @@ -400,6 +400,39 @@ def test_datetime(started_cluster): assert "DateTime64(6)" in node1.query("show create table pg.test") +def test_postgresql_password_leak(started_cluster): + conn = get_postgres_conn( + started_cluster.postgres_ip, started_cluster.postgres_port, database=True + ) + cursor = conn.cursor() + + cursor.execute("DROP SCHEMA IF EXISTS test_schema CASCADE") + cursor.execute("CREATE SCHEMA test_schema") + cursor.execute("CREATE TABLE test_schema.table1 (a integer)") + cursor.execute("CREATE TABLE table2 (a integer)") + + node1.query("DROP DATABASE IF EXISTS postgres_database") + node1.query( + "CREATE DATABASE postgres_database ENGINE = PostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword', 'test_schema')" + ) + + node1.query("DROP DATABASE IF EXISTS postgres_database2") + node1.query( + "CREATE DATABASE postgres_database2 ENGINE = PostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword')" + ) + + assert "mysecretpassword" not in node1.query("SHOW CREATE postgres_database.table1") + assert "mysecretpassword" not in node1.query( + "SHOW CREATE postgres_database2.table2" + ) + + node1.query("DROP DATABASE postgres_database") + node1.query("DROP DATABASE postgres_database2") + + cursor.execute("DROP SCHEMA test_schema CASCADE") + cursor.execute("DROP TABLE table2") + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_postgresql_protocol/test.py b/tests/integration/test_postgresql_protocol/test.py index de01bba6862..9da9ec4d0b7 100644 --- a/tests/integration/test_postgresql_protocol/test.py +++ b/tests/integration/test_postgresql_protocol/test.py @@ -40,7 +40,10 @@ server_port = 5433 def started_cluster(): try: cluster.start() - + # Wait for the PostgreSQL handler to start. + # Cluster.start waits until port 9000 becomes accessible. + # Server opens the PostgreSQL compatibility port a bit later. + cluster.instances["node"].wait_for_log_line("PostgreSQL compatibility protocol") yield cluster except Exception as ex: logging.exception(ex) diff --git a/tests/integration/test_profile_max_sessions_for_user/__init__.py b/tests/integration/test_profile_max_sessions_for_user/__init__.py new file mode 100755 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/dhparam.pem b/tests/integration/test_profile_max_sessions_for_user/configs/dhparam.pem new file mode 100755 index 00000000000..2e6cee0798d --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/dhparam.pem @@ -0,0 +1,8 @@ +-----BEGIN DH PARAMETERS----- +MIIBCAKCAQEAua92DDli13gJ+//ZXyGaggjIuidqB0crXfhUlsrBk9BV1hH3i7fR +XGP9rUdk2ubnB3k2ejBStL5oBrkHm9SzUFSQHqfDjLZjKoUpOEmuDc4cHvX1XTR5 +Pr1vf5cd0yEncJWG5W4zyUB8k++SUdL2qaeslSs+f491HBLDYn/h8zCgRbBvxhxb +9qeho1xcbnWeqkN6Kc9bgGozA16P9NLuuLttNnOblkH+lMBf42BSne/TWt3AlGZf +slKmmZcySUhF8aKfJnLKbkBCFqOtFRh8zBA9a7g+BT/lSANATCDPaAk1YVih2EKb +dpc3briTDbRsiqg2JKMI7+VdULY9bh3EawIBAg== +-----END DH PARAMETERS----- diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/log.xml b/tests/integration/test_profile_max_sessions_for_user/configs/log.xml new file mode 100644 index 00000000000..22f95a8bd5d --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/log.xml @@ -0,0 +1,9 @@ + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + + diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/ports.xml b/tests/integration/test_profile_max_sessions_for_user/configs/ports.xml new file mode 100644 index 00000000000..3123c4a3d9c --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/ports.xml @@ -0,0 +1,9 @@ + + 5433 + 9001 + 9100 + + + false + + diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/server.crt b/tests/integration/test_profile_max_sessions_for_user/configs/server.crt new file mode 100755 index 00000000000..070d37f3b77 --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/server.crt @@ -0,0 +1,18 @@ +-----BEGIN CERTIFICATE----- +MIIC+zCCAeOgAwIBAgIJANhP897Se2gmMA0GCSqGSIb3DQEBCwUAMBQxEjAQBgNV +BAMMCWxvY2FsaG9zdDAeFw0yMDA0MTgyMTE2NDBaFw0yMTA0MTgyMTE2NDBaMBQx +EjAQBgNVBAMMCWxvY2FsaG9zdDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC +ggEBAM92kcojQoMsjZ9YGhPMY6h/fDUsZeSKHLxgqE6wbmfU1oZKCPWqnvl+4n0J +pnT5h1ETxxYZLepimKq0DEVPUTmCl0xmcKbtUNiaTUKYKsdita6b2vZCX9wUPN9p +2Kjnm41l+aZNqIEBhIgHNWg9qowi20y0EIXR79jQLwwaInHAaJLZxVsqY2zjQ/D7 +1Zh82MXud7iqxBQiEfw9Cz35UFA239R8QTlPkVQfsN1gfLxnLk24QUX3o+hbUI1g +nlSpyYDHYQlOmwz8doDs6THHAZNJ4bPE9xHNFpw6dGZdbtH+IKQ/qRZIiOaiNuzJ +IOHl6XQDRDkW2LMTiCQ6fjC7Pz8CAwEAAaNQME4wHQYDVR0OBBYEFFvhaA/Eguyf +BXkMj8BkNLBqMnz2MB8GA1UdIwQYMBaAFFvhaA/EguyfBXkMj8BkNLBqMnz2MAwG +A1UdEwQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBACeU/oL48eVAKH7NQntHhRaJ +ZGeQzKIjrSBjFo8BGXD1nJZhUeFsylLrhCkC8/5/3grE3BNVX9bxcGjO81C9Mn4U +t0z13d6ovJjCZSQArtLwgeJGlpH7gNdD3DyT8DQmrqYVnmnB7UmBu45XH1LWGQZr +FAOhGRVs6s6mNj8QlLMgdmsOeOQnsGCMdoss8zV9vO2dc4A5SDSSL2mqGGY4Yjtt +X+XlEhXXnksGyx8NGVOZX4wcj8WeCAj/lihQ7Zh6XYwZH9i+E46ompUwoziZnNPu +2RH63tLNCxkOY2HF5VMlbMmzer3FkhlM6TAZZRPcvSphKPwXK4A33yqc6wnWvpc= +-----END CERTIFICATE----- diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/server.key b/tests/integration/test_profile_max_sessions_for_user/configs/server.key new file mode 100755 index 00000000000..b3dee82dcda --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/server.key @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDPdpHKI0KDLI2f +WBoTzGOof3w1LGXkihy8YKhOsG5n1NaGSgj1qp75fuJ9CaZ0+YdRE8cWGS3qYpiq +tAxFT1E5gpdMZnCm7VDYmk1CmCrHYrWum9r2Ql/cFDzfadio55uNZfmmTaiBAYSI +BzVoPaqMIttMtBCF0e/Y0C8MGiJxwGiS2cVbKmNs40Pw+9WYfNjF7ne4qsQUIhH8 +PQs9+VBQNt/UfEE5T5FUH7DdYHy8Zy5NuEFF96PoW1CNYJ5UqcmAx2EJTpsM/HaA +7OkxxwGTSeGzxPcRzRacOnRmXW7R/iCkP6kWSIjmojbsySDh5el0A0Q5FtizE4gk +On4wuz8/AgMBAAECggEAJ54J2yL+mZQRe2NUn4FBarTloDXZQ1pIgISov1Ybz0Iq +sTxEF728XAKp95y3J9Fa0NXJB+RJC2BGrRpy2W17IlNY1yMc0hOxg5t7s4LhcG/e +J/jlSG+GZL2MnlFVKXQJFWhq0yIzUmdayqstvLlB7z7cx/n+yb88YRfoVBRNjZEL +Tdrsw+087igDjrIxZJ3eMN5Wi434n9s4yAoRQC1bP5wcWx0gD4MzdmL8ip6suiRc +LRuBAhV/Op812xlxUhrF5dInUM9OLlGTXpUzexAS8Cyy7S4bfkW2BaCxTF7I7TFw +Whx28CKn/G49tIuU0m6AlxWbXpLVePTFyMb7RJz5cQKBgQD7VQd2u3HM6eE3PcXD +p6ObdLTUk8OAJ5BMmADFc71W0Epyo26/e8KXKGYGxE2W3fr13y+9b0fl5fxZPuhS +MgvXEO7rItAVsLcp0IzaqY0WUee2b4XWPAU0XuPqvjYMpx8H5OEHqFK6lhZysAqM +X7Ot3/Hux9X0MC4v5a/HNbDUOQKBgQDTUPaP3ADRrmpmE2sWuzWEnCSEz5f0tCLO +wTqhV/UraWUNlAbgK5NB790IjH/gotBSqqNPLJwJh0LUfClKM4LiaHsEag0OArOF +GhPMK1Ohps8c2RRsiG8+hxX2HEHeAVbkouEDPDiHdIW/92pBViDoETXL6qxDKbm9 +LkOcVeDfNwKBgQChh1xsqrvQ/t+IKWNZA/zahH9TwEP9sW/ESkz0mhYuHWA7nV4o +ItpFW+l2n+Nd+vy32OFN1p9W2iD9GrklWpTRfEiRRqaFyjVt4mMkhaPvnGRXlAVo +Utrldbb1v5ntN9txr2ARE9VXpe53dzzQSxGnxi4vUK/paK3GitAWMCOdwQKBgQCi +hmGsUXQb0P6qVYMGr6PAw2re7t8baLRguoMCdqjs45nCMLh9D2apzvb8TTtJJU/+ +VJlYGqJEPdDrpjcHh8jBo8QBqCM0RGWYGG9jl2syKB6hPGCV/PU6bSE58Y/DVNpk +7NUM7PM5UyhPddY2PC0A78Ole29UFLJzSzLa+b4DTwKBgH9Wh2k4YPnPcRrX89UL +eSwWa1CGq6HWX8Kd5qyz256aeHWuG5nv15+rBt+D7nwajUsqeVkAXz5H/dHuG1xz +jb7RW+pEjx0GVAmIbkM9vOLqEUfHHHPuk4AXCGGZ5sarPiKg4BHKBBsY1dpoO5UH +0j71fRA6zurHnTXDaCLWlUpZ +-----END PRIVATE KEY----- diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/ssl_conf.xml b/tests/integration/test_profile_max_sessions_for_user/configs/ssl_conf.xml new file mode 100644 index 00000000000..778d327c460 --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/ssl_conf.xml @@ -0,0 +1,17 @@ + + + + + + /etc/clickhouse-server/config.d/server.crt + /etc/clickhouse-server/config.d/server.key + + /etc/clickhouse-server/config.d/dhparam.pem + none + true + true + sslv2,sslv3 + true + + + diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/users.xml b/tests/integration/test_profile_max_sessions_for_user/configs/users.xml new file mode 100644 index 00000000000..3bed673b2ca --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/users.xml @@ -0,0 +1,16 @@ + + + + 2 + 0 + + + + + + + + 123 + + + diff --git a/tests/integration/test_profile_max_sessions_for_user/protos/clickhouse_grpc.proto b/tests/integration/test_profile_max_sessions_for_user/protos/clickhouse_grpc.proto new file mode 120000 index 00000000000..25d15f11e3b --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/protos/clickhouse_grpc.proto @@ -0,0 +1 @@ +../../../../src/Server/grpc_protos/clickhouse_grpc.proto \ No newline at end of file diff --git a/tests/integration/test_profile_max_sessions_for_user/test.py b/tests/integration/test_profile_max_sessions_for_user/test.py new file mode 100755 index 00000000000..38f77b9d375 --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/test.py @@ -0,0 +1,246 @@ +import os + +import grpc +import pymysql.connections +import psycopg2 as py_psql +import pytest +import sys +import threading + +from helpers.cluster import ClickHouseCluster, run_and_check +from helpers.test_tools import assert_logs_contain_with_retry + +from helpers.uclient import client, prompt + +MAX_SESSIONS_FOR_USER = 2 +POSTGRES_SERVER_PORT = 5433 +MYSQL_SERVER_PORT = 9001 +GRPC_PORT = 9100 + +TEST_USER = "test_user" +TEST_PASSWORD = "123" + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +DEFAULT_ENCODING = "utf-8" + +# Use grpcio-tools to generate *pb2.py files from *.proto. +proto_dir = os.path.join(SCRIPT_DIR, "./protos") +gen_dir = os.path.join(SCRIPT_DIR, "./_gen") +os.makedirs(gen_dir, exist_ok=True) +run_and_check( + f"python3 -m grpc_tools.protoc -I{proto_dir} --python_out={gen_dir} --grpc_python_out={gen_dir} {proto_dir}/clickhouse_grpc.proto", + shell=True, +) + +sys.path.append(gen_dir) + +import clickhouse_grpc_pb2 +import clickhouse_grpc_pb2_grpc + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance( + "node", + main_configs=[ + "configs/ports.xml", + "configs/log.xml", + "configs/ssl_conf.xml", + "configs/dhparam.pem", + "configs/server.crt", + "configs/server.key", + ], + user_configs=["configs/users.xml"], + env_variables={ + "UBSAN_OPTIONS": "print_stacktrace=1", + # Bug in TSAN reproduces in this test https://github.com/grpc/grpc/issues/29550#issuecomment-1188085387 + "TSAN_OPTIONS": "report_atomic_races=0 " + + os.getenv("TSAN_OPTIONS", default=""), + }, +) + + +def get_query(name, id): + return f"SElECT '{name}', {id}, number from system.numbers" + + +def grpc_get_url(): + return f"{instance.ip_address}:{GRPC_PORT}" + + +def grpc_create_insecure_channel(): + channel = grpc.insecure_channel(grpc_get_url()) + grpc.channel_ready_future(channel).result(timeout=2) + return channel + + +def grpc_query(query_text, channel, session_id_): + query_info = clickhouse_grpc_pb2.QueryInfo( + query=query_text, + session_id=session_id_, + user_name=TEST_USER, + password=TEST_PASSWORD, + ) + + stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(channel) + result = stub.ExecuteQuery(query_info) + if result and result.HasField("exception"): + raise Exception(result.exception.display_text) + return result.output.decode(DEFAULT_ENCODING) + + +def threaded_run_test(sessions): + instance.rotate_logs() + thread_list = [] + for i in range(len(sessions)): + thread = ThreadWithException(target=sessions[i], args=(i,)) + thread_list.append(thread) + thread.start() + + if len(sessions) > MAX_SESSIONS_FOR_USER: + assert_logs_contain_with_retry(instance, "overflown session count") + + instance.query(f"KILL QUERY WHERE user='{TEST_USER}' SYNC") + + for thread in thread_list: + thread.join() + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + # Wait for the PostgreSQL handler to start. + # Cluster.start waits until port 9000 becomes accessible. + # Server opens the PostgreSQL compatibility port a bit later. + instance.wait_for_log_line("PostgreSQL compatibility protocol") + yield cluster + finally: + cluster.shutdown() + + +class ThreadWithException(threading.Thread): + def run(self): + try: + super().run() + except: + pass + + +def postgres_session(id): + ch = py_psql.connect( + host=instance.ip_address, + port=POSTGRES_SERVER_PORT, + user=TEST_USER, + password=TEST_PASSWORD, + database="default", + ) + cur = ch.cursor() + cur.execute(get_query("postgres_session", id)) + cur.fetchall() + + +def mysql_session(id): + client = pymysql.connections.Connection( + host=instance.ip_address, + user=TEST_USER, + password=TEST_PASSWORD, + database="default", + port=MYSQL_SERVER_PORT, + ) + cursor = client.cursor(pymysql.cursors.DictCursor) + cursor.execute(get_query("mysql_session", id)) + cursor.fetchall() + + +def tcp_session(id): + instance.query(get_query("tcp_session", id), user=TEST_USER, password=TEST_PASSWORD) + + +def http_session(id): + instance.http_query( + get_query("http_session", id), user=TEST_USER, password=TEST_PASSWORD + ) + + +def http_named_session(id): + instance.http_query( + get_query("http_named_session", id), + user=TEST_USER, + password=TEST_PASSWORD, + params={"session_id": id}, + ) + + +def grpc_session(id): + grpc_query( + get_query("grpc_session", id), grpc_create_insecure_channel(), f"session_{id}" + ) + + +def test_profile_max_sessions_for_user_tcp(started_cluster): + threaded_run_test([tcp_session] * 3) + + +def test_profile_max_sessions_for_user_postgres(started_cluster): + threaded_run_test([postgres_session] * 3) + + +def test_profile_max_sessions_for_user_mysql(started_cluster): + threaded_run_test([mysql_session] * 3) + + +def test_profile_max_sessions_for_user_http(started_cluster): + threaded_run_test([http_session] * 3) + + +def test_profile_max_sessions_for_user_http_named_session(started_cluster): + threaded_run_test([http_named_session] * 3) + + +def test_profile_max_sessions_for_user_grpc(started_cluster): + threaded_run_test([grpc_session] * 3) + + +def test_profile_max_sessions_for_user_tcp_and_others(started_cluster): + threaded_run_test([tcp_session, grpc_session, grpc_session]) + threaded_run_test([tcp_session, http_session, http_session]) + threaded_run_test([tcp_session, mysql_session, mysql_session]) + threaded_run_test([tcp_session, postgres_session, postgres_session]) + threaded_run_test([tcp_session, http_session, postgres_session]) + threaded_run_test([tcp_session, postgres_session, http_session]) + + +def test_profile_max_sessions_for_user_setting_in_query(started_cluster): + instance.query_and_get_error("SET max_sessions_for_user = 10") + + +def test_profile_max_sessions_for_user_client_suggestions_connection(started_cluster): + command_text = f"{started_cluster.get_client_cmd()} --host {instance.ip_address} --port 9000 -u {TEST_USER} --password {TEST_PASSWORD}" + command_text_without_suggestions = command_text + " --disable_suggestion" + + # Launch client1 without suggestions to avoid a race condition: + # Client1 opens a session. + # Client1 opens a session for suggestion connection. + # Client2 fails to open a session and gets the USER_SESSION_LIMIT_EXCEEDED error. + # + # Expected order: + # Client1 opens a session. + # Client2 opens a session. + # Client2 fails to open a session for suggestions and with USER_SESSION_LIMIT_EXCEEDED (No error printed). + # Client3 fails to open a session. + # Client1 executes the query. + # Client2 loads suggestions from the server using the main connection and executes a query. + with client( + name="client1>", log=None, command=command_text_without_suggestions + ) as client1: + client1.expect(prompt) + with client(name="client2>", log=None, command=command_text) as client2: + client2.expect(prompt) + with client(name="client3>", log=None, command=command_text) as client3: + client3.expect("USER_SESSION_LIMIT_EXCEEDED") + + client1.send("SELECT 'CLIENT_1_SELECT' FORMAT CSV") + client1.expect("CLIENT_1_SELECT") + client1.expect(prompt) + client2.send("SELECT 'CLIENT_2_SELECT' FORMAT CSV") + client2.expect("CLIENT_2_SELECT") + client2.expect(prompt) diff --git a/tests/integration/test_quorum_inserts/test.py b/tests/integration/test_quorum_inserts/test.py index 4dbd530dd17..1276a6079f0 100644 --- a/tests/integration/test_quorum_inserts/test.py +++ b/tests/integration/test_quorum_inserts/test.py @@ -147,12 +147,16 @@ def test_drop_replica_and_achieve_quorum(started_cluster): @pytest.mark.parametrize(("add_new_data"), [False, True]) def test_insert_quorum_with_drop_partition(started_cluster, add_new_data): - zero.query( - "DROP TABLE IF EXISTS test_quorum_insert_with_drop_partition ON CLUSTER cluster" + # use different table names for easier disambiguation in logs between runs (you may also check uuid though, but not always convenient) + table_name = ( + "test_quorum_insert_with_drop_partition_new_data" + if add_new_data + else "test_quorum_insert_with_drop_partition" ) + zero.query(f"DROP TABLE IF EXISTS {table_name} ON CLUSTER cluster") create_query = ( - "CREATE TABLE test_quorum_insert_with_drop_partition ON CLUSTER cluster " + f"CREATE TABLE {table_name} ON CLUSTER cluster " "(a Int8, d Date) " "Engine = ReplicatedMergeTree " "PARTITION BY d ORDER BY a " @@ -161,78 +165,74 @@ def test_insert_quorum_with_drop_partition(started_cluster, add_new_data): print("Create Replicated table with three replicas") zero.query(create_query) - print("Stop fetches for test_quorum_insert_with_drop_partition at first replica.") - first.query("SYSTEM STOP FETCHES test_quorum_insert_with_drop_partition") + print(f"Stop fetches for {table_name} at first replica.") + first.query(f"SYSTEM STOP FETCHES {table_name}") print("Insert with quorum. (zero and second)") - zero.query( - "INSERT INTO test_quorum_insert_with_drop_partition(a,d) VALUES(1, '2011-01-01')" - ) + zero.query(f"INSERT INTO {table_name}(a,d) VALUES(1, '2011-01-01')") print("Drop partition.") - zero.query( - "ALTER TABLE test_quorum_insert_with_drop_partition DROP PARTITION '2011-01-01'" - ) + zero.query(f"ALTER TABLE {table_name} DROP PARTITION '2011-01-01'") if add_new_data: print("Insert to deleted partition") - zero.query( - "INSERT INTO test_quorum_insert_with_drop_partition(a,d) VALUES(2, '2011-01-01')" - ) + zero.query(f"INSERT INTO {table_name}(a,d) VALUES(2, '2011-01-01')") - print("Resume fetches for test_quorum_insert_with_drop_partition at first replica.") - first.query("SYSTEM START FETCHES test_quorum_insert_with_drop_partition") + print(f"Resume fetches for {table_name} at first replica.") + first.query(f"SYSTEM START FETCHES {table_name}") print("Sync first replica with others.") - first.query("SYSTEM SYNC REPLICA test_quorum_insert_with_drop_partition") + first.query(f"SYSTEM SYNC REPLICA {table_name}") assert "20110101" not in first.query( - """ - WITH (SELECT toString(uuid) FROM system.tables WHERE name = 'test_quorum_insert_with_drop_partition') AS uuid, + f""" + WITH (SELECT toString(uuid) FROM system.tables WHERE name = '{table_name}') AS uuid, '/clickhouse/tables/' || uuid || '/0/quorum/last_part' AS p SELECT * FROM system.zookeeper WHERE path = p FORMAT Vertical """ ) + # Sync second replica not to have `REPLICA_IS_NOT_IN_QUORUM` error + second.query(f"SYSTEM SYNC REPLICA {table_name}") + print("Select from updated partition.") if add_new_data: + assert TSV("2\t2011-01-01\n") == TSV(zero.query(f"SELECT * FROM {table_name}")) assert TSV("2\t2011-01-01\n") == TSV( - zero.query("SELECT * FROM test_quorum_insert_with_drop_partition") - ) - assert TSV("2\t2011-01-01\n") == TSV( - second.query("SELECT * FROM test_quorum_insert_with_drop_partition") + second.query(f"SELECT * FROM {table_name}") ) else: - assert TSV("") == TSV( - zero.query("SELECT * FROM test_quorum_insert_with_drop_partition") - ) - assert TSV("") == TSV( - second.query("SELECT * FROM test_quorum_insert_with_drop_partition") - ) + assert TSV("") == TSV(zero.query(f"SELECT * FROM {table_name}")) + assert TSV("") == TSV(second.query(f"SELECT * FROM {table_name}")) - zero.query( - "DROP TABLE IF EXISTS test_quorum_insert_with_drop_partition ON CLUSTER cluster" - ) + zero.query(f"DROP TABLE IF EXISTS {table_name} ON CLUSTER cluster") @pytest.mark.parametrize(("add_new_data"), [False, True]) def test_insert_quorum_with_move_partition(started_cluster, add_new_data): - zero.query( - "DROP TABLE IF EXISTS test_insert_quorum_with_move_partition_source ON CLUSTER cluster" + # use different table names for easier disambiguation in logs between runs (you may also check uuid though, but not always convenient) + source_table_name = ( + "test_insert_quorum_with_move_partition_source_new_data" + if add_new_data + else "test_insert_quorum_with_move_partition_source" ) - zero.query( - "DROP TABLE IF EXISTS test_insert_quorum_with_move_partition_destination ON CLUSTER cluster" + destination_table_name = ( + "test_insert_quorum_with_move_partition_destination_new_data" + if add_new_data + else "test_insert_quorum_with_move_partition_destination" ) + zero.query(f"DROP TABLE IF EXISTS {source_table_name} ON CLUSTER cluster") + zero.query(f"DROP TABLE IF EXISTS {destination_table_name} ON CLUSTER cluster") create_source = ( - "CREATE TABLE test_insert_quorum_with_move_partition_source ON CLUSTER cluster " + f"CREATE TABLE {source_table_name} ON CLUSTER cluster " "(a Int8, d Date) " "Engine = ReplicatedMergeTree " "PARTITION BY d ORDER BY a " ) create_destination = ( - "CREATE TABLE test_insert_quorum_with_move_partition_destination ON CLUSTER cluster " + f"CREATE TABLE {destination_table_name} ON CLUSTER cluster " "(a Int8, d Date) " "Engine = ReplicatedMergeTree " "PARTITION BY d ORDER BY a " @@ -244,65 +244,52 @@ def test_insert_quorum_with_move_partition(started_cluster, add_new_data): print("Create destination Replicated table with three replicas") zero.query(create_destination) - print( - "Stop fetches for test_insert_quorum_with_move_partition_source at first replica." - ) - first.query("SYSTEM STOP FETCHES test_insert_quorum_with_move_partition_source") + print(f"Stop fetches for {source_table_name} at first replica.") + first.query(f"SYSTEM STOP FETCHES {source_table_name}") print("Insert with quorum. (zero and second)") - zero.query( - "INSERT INTO test_insert_quorum_with_move_partition_source(a,d) VALUES(1, '2011-01-01')" - ) + zero.query(f"INSERT INTO {source_table_name}(a,d) VALUES(1, '2011-01-01')") print("Drop partition.") zero.query( - "ALTER TABLE test_insert_quorum_with_move_partition_source MOVE PARTITION '2011-01-01' TO TABLE test_insert_quorum_with_move_partition_destination" + f"ALTER TABLE {source_table_name} MOVE PARTITION '2011-01-01' TO TABLE {destination_table_name}" ) if add_new_data: print("Insert to deleted partition") - zero.query( - "INSERT INTO test_insert_quorum_with_move_partition_source(a,d) VALUES(2, '2011-01-01')" - ) + zero.query(f"INSERT INTO {source_table_name}(a,d) VALUES(2, '2011-01-01')") - print( - "Resume fetches for test_insert_quorum_with_move_partition_source at first replica." - ) - first.query("SYSTEM START FETCHES test_insert_quorum_with_move_partition_source") + print(f"Resume fetches for {source_table_name} at first replica.") + first.query(f"SYSTEM START FETCHES {source_table_name}") print("Sync first replica with others.") - first.query("SYSTEM SYNC REPLICA test_insert_quorum_with_move_partition_source") + first.query(f"SYSTEM SYNC REPLICA {source_table_name}") assert "20110101" not in first.query( - """ - WITH (SELECT toString(uuid) FROM system.tables WHERE name = 'test_insert_quorum_with_move_partition_source') AS uuid, + f""" + WITH (SELECT toString(uuid) FROM system.tables WHERE name = '{source_table_name}') AS uuid, '/clickhouse/tables/' || uuid || '/0/quorum/last_part' AS p SELECT * FROM system.zookeeper WHERE path = p FORMAT Vertical """ ) + # Sync second replica not to have `REPLICA_IS_NOT_IN_QUORUM` error + second.query(f"SYSTEM SYNC REPLICA {source_table_name}") + print("Select from updated partition.") if add_new_data: assert TSV("2\t2011-01-01\n") == TSV( - zero.query("SELECT * FROM test_insert_quorum_with_move_partition_source") + zero.query(f"SELECT * FROM {source_table_name}") ) assert TSV("2\t2011-01-01\n") == TSV( - second.query("SELECT * FROM test_insert_quorum_with_move_partition_source") + second.query(f"SELECT * FROM {source_table_name}") ) else: - assert TSV("") == TSV( - zero.query("SELECT * FROM test_insert_quorum_with_move_partition_source") - ) - assert TSV("") == TSV( - second.query("SELECT * FROM test_insert_quorum_with_move_partition_source") - ) + assert TSV("") == TSV(zero.query(f"SELECT * FROM {source_table_name}")) + assert TSV("") == TSV(second.query(f"SELECT * FROM {source_table_name}")) - zero.query( - "DROP TABLE IF EXISTS test_insert_quorum_with_move_partition_source ON CLUSTER cluster" - ) - zero.query( - "DROP TABLE IF EXISTS test_insert_quorum_with_move_partition_destination ON CLUSTER cluster" - ) + zero.query(f"DROP TABLE IF EXISTS {source_table_name} ON CLUSTER cluster") + zero.query(f"DROP TABLE IF EXISTS {destination_table_name} ON CLUSTER cluster") def test_insert_quorum_with_ttl(started_cluster): diff --git a/tests/integration/test_quota/test.py b/tests/integration/test_quota/test.py index 4f98b9a0d0d..cec14b0af73 100644 --- a/tests/integration/test_quota/test.py +++ b/tests/integration/test_quota/test.py @@ -105,7 +105,7 @@ def test_quota_from_users_xml(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", [31556952], 0, @@ -252,7 +252,7 @@ def test_simpliest_quota(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[]", 0, @@ -326,7 +326,7 @@ def test_tracking_quota(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[31556952]", 0, @@ -446,7 +446,7 @@ def test_exceed_quota(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[31556952]", 0, @@ -523,7 +523,7 @@ def test_exceed_quota(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[31556952]", 0, @@ -613,7 +613,7 @@ def test_add_remove_interval(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", [31556952], 0, @@ -675,7 +675,7 @@ def test_add_remove_interval(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[31556952,63113904]", 0, @@ -824,7 +824,7 @@ def test_add_remove_interval(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", [31556952], 0, @@ -914,7 +914,7 @@ def test_add_remove_interval(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[]", 0, @@ -986,7 +986,7 @@ def test_add_remove_interval(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", [31556952], 0, @@ -1048,7 +1048,7 @@ def test_add_remove_quota(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", [31556952], 0, @@ -1111,7 +1111,7 @@ def test_add_remove_quota(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[31556952]", 0, @@ -1121,7 +1121,7 @@ def test_add_remove_quota(): [ "myQuota2", "4590510c-4d13-bf21-ec8a-c2187b092e73", - "users.xml", + "users_xml", "['client_key','user_name']", "[3600,2629746]", 0, @@ -1214,7 +1214,7 @@ def test_add_remove_quota(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[31556952]", 0, @@ -1283,7 +1283,7 @@ def test_add_remove_quota(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[31556952]", 0, @@ -1346,7 +1346,7 @@ def test_reload_users_xml_by_timer(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[31556952]", 0, @@ -1385,7 +1385,7 @@ def test_reload_users_xml_by_timer(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", ["user_name"], "[31556952]", 0, @@ -1554,7 +1554,7 @@ def test_query_inserts(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", [31556952], 0, diff --git a/tests/integration/test_replicated_database/configs/config.xml b/tests/integration/test_replicated_database/configs/config.xml index 16cd942e975..e598cc28d5d 100644 --- a/tests/integration/test_replicated_database/configs/config.xml +++ b/tests/integration/test_replicated_database/configs/config.xml @@ -1,5 +1,6 @@ 10 + 1 10 diff --git a/tests/integration/test_replicated_database/configs/settings.xml b/tests/integration/test_replicated_database/configs/settings.xml index 5666ffeace8..7c0e60a044e 100644 --- a/tests/integration/test_replicated_database/configs/settings.xml +++ b/tests/integration/test_replicated_database/configs/settings.xml @@ -4,6 +4,8 @@ 1 1 1 + 0 + 0 diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index ed034a326da..f45841124d9 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -672,7 +672,11 @@ def test_alters_from_different_replicas(started_cluster): def create_some_tables(db): - settings = {"distributed_ddl_task_timeout": 0} + settings = { + "distributed_ddl_task_timeout": 0, + "allow_experimental_object_type": 1, + "allow_suspicious_codecs": 1, + } main_node.query(f"CREATE TABLE {db}.t1 (n int) ENGINE=Memory", settings=settings) dummy_node.query( f"CREATE TABLE {db}.t2 (s String) ENGINE=Memory", settings=settings @@ -690,11 +694,11 @@ def create_some_tables(db): settings=settings, ) dummy_node.query( - f"CREATE TABLE {db}.rmt2 (n int) ENGINE=ReplicatedMergeTree order by n", + f"CREATE TABLE {db}.rmt2 (n int CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12))) ENGINE=ReplicatedMergeTree order by n", settings=settings, ) main_node.query( - f"CREATE TABLE {db}.rmt3 (n int) ENGINE=ReplicatedMergeTree order by n", + f"CREATE TABLE {db}.rmt3 (n int, json Object('json') materialized '') ENGINE=ReplicatedMergeTree order by n", settings=settings, ) dummy_node.query( @@ -868,7 +872,10 @@ def test_recover_staled_replica(started_cluster): ]: assert main_node.query(f"SELECT (*,).1 FROM recover.{table}") == "42\n" for table in ["t2", "rmt1", "rmt2", "rmt4", "d1", "d2", "mt2", "mv1", "mv3"]: - assert dummy_node.query(f"SELECT (*,).1 FROM recover.{table}") == "42\n" + assert ( + dummy_node.query(f"SELECT '{table}', (*,).1 FROM recover.{table}") + == f"{table}\t42\n" + ) for table in ["m1", "mt1"]: assert dummy_node.query(f"SELECT count() FROM recover.{table}") == "0\n" global test_recover_staled_replica_run @@ -1219,7 +1226,7 @@ def test_force_synchronous_settings(started_cluster): def select_func(): dummy_node.query( - "SELECT sleepEachRow(1) FROM test_force_synchronous_settings.t" + "SELECT sleepEachRow(1) FROM test_force_synchronous_settings.t SETTINGS function_sleep_max_microseconds_per_block = 0" ) select_thread = threading.Thread(target=select_func) @@ -1255,7 +1262,7 @@ def test_recover_digest_mismatch(started_cluster): "mv /var/lib/clickhouse/metadata/recover_digest_mismatch/t1.sql /var/lib/clickhouse/metadata/recover_digest_mismatch/m1.sql", "sed --follow-symlinks -i 's/Int32/String/' /var/lib/clickhouse/metadata/recover_digest_mismatch/mv1.sql", "rm -f /var/lib/clickhouse/metadata/recover_digest_mismatch/d1.sql", - # f"rm -rf /var/lib/clickhouse/metadata/recover_digest_mismatch/", # Directory already exists + "rm -rf /var/lib/clickhouse/metadata/recover_digest_mismatch/", # Will trigger "Directory already exists" "rm -rf /var/lib/clickhouse/store", ] diff --git a/tests/integration/test_replicated_merge_tree_compatibility/test.py b/tests/integration/test_replicated_merge_tree_compatibility/test.py index 68f2776e955..c30a0d86c98 100644 --- a/tests/integration/test_replicated_merge_tree_compatibility/test.py +++ b/tests/integration/test_replicated_merge_tree_compatibility/test.py @@ -9,6 +9,7 @@ node1 = cluster.add_instance( tag="20.12.4.5", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) node2 = cluster.add_instance( "node2", @@ -17,6 +18,7 @@ node2 = cluster.add_instance( tag="20.12.4.5", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/configs/config.d/storage_conf.xml b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/configs/config.d/storage_conf.xml deleted file mode 100644 index bb4aba94e0b..00000000000 --- a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/configs/config.d/storage_conf.xml +++ /dev/null @@ -1,93 +0,0 @@ - - - - - hdfs - hdfs://hdfs1:9000/clickhouse1/ - - true - - - hdfs - hdfs://hdfs1:9000/clickhouse1/ - - true - - - hdfs - hdfs://hdfs1:9000/clickhouse2/ - - true - - - - - -
- hdfs1 -
-
-
- - -
- default -
- - hdfs1 - -
- 0.0 -
- - -
- hdfs2 -
- - hdfs1 - -
-
- - -
- hdfs1_again -
- - hdfs1 - -
-
-
-
- - - 1024000 - 1 - true - - - - - - - node1 - 9000 - - - - - node2 - 9000 - - - - - - - test_cluster - 1 - - true -
diff --git a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py deleted file mode 100644 index eb3d62eb718..00000000000 --- a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py +++ /dev/null @@ -1,322 +0,0 @@ -import pytest - -# FIXME This test is too flaky -# https://github.com/ClickHouse/ClickHouse/issues/42561 - -pytestmark = pytest.mark.skip - -import logging -from string import Template -import time - -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import assert_eq_with_retry - -from pyhdfs import HdfsClient - -SHARDS = 2 -FILES_OVERHEAD_PER_TABLE = 1 # format_version.txt -FILES_OVERHEAD_PER_PART_COMPACT = 7 - - -def wait_for_hdfs_objects(cluster, fp, expected, num_tries=30): - fs = HdfsClient(hosts=cluster.hdfs_ip) - while num_tries > 0: - num_hdfs_objects = len(fs.listdir(fp)) - if num_hdfs_objects == expected: - break - num_tries -= 1 - time.sleep(1) - assert len(fs.listdir(fp)) == expected - - -@pytest.fixture(scope="module") -def cluster(): - try: - cluster = ClickHouseCluster(__file__) - cluster.add_instance( - "node1", - main_configs=["configs/config.d/storage_conf.xml"], - macros={"replica": "node1"}, - with_zookeeper=True, - with_hdfs=True, - ) - cluster.add_instance( - "node2", - main_configs=["configs/config.d/storage_conf.xml"], - macros={"replica": "node2"}, - with_zookeeper=True, - with_hdfs=True, - ) - logging.info("Starting cluster...") - cluster.start() - if cluster.instances["node1"].is_debug_build(): - # https://github.com/ClickHouse/ClickHouse/issues/27814 - pytest.skip( - "libhdfs3 calls rand function which does not pass harmful check in debug build" - ) - logging.info("Cluster started") - - fs = HdfsClient(hosts=cluster.hdfs_ip) - fs.mkdirs("/clickhouse1") - fs.mkdirs("/clickhouse2") - logging.info("Created HDFS directory") - - yield cluster - finally: - cluster.shutdown() - - -def test_hdfs_zero_copy_replication_insert(cluster): - node1 = cluster.instances["node1"] - node2 = cluster.instances["node2"] - try: - node1.query( - """ - CREATE TABLE hdfs_test ON CLUSTER test_cluster (dt DateTime, id Int64) - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hdfs_test', '{replica}') - ORDER BY (dt, id) - SETTINGS storage_policy='hdfs_only' - """ - ) - wait_for_hdfs_objects( - cluster, "/clickhouse1", SHARDS * FILES_OVERHEAD_PER_TABLE - ) - - node1.query("INSERT INTO hdfs_test VALUES (now() - INTERVAL 3 DAY, 10)") - node2.query("SYSTEM SYNC REPLICA hdfs_test", timeout=30) - assert node1.query("SELECT count() FROM hdfs_test FORMAT Values") == "(1)" - assert node2.query("SELECT count() FROM hdfs_test FORMAT Values") == "(1)" - assert ( - node1.query("SELECT id FROM hdfs_test ORDER BY dt FORMAT Values") == "(10)" - ) - assert ( - node2.query("SELECT id FROM hdfs_test ORDER BY dt FORMAT Values") == "(10)" - ) - assert ( - node1.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='hdfs_test' FORMAT Values" - ) - == "('all','hdfs1')" - ) - assert ( - node2.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='hdfs_test' FORMAT Values" - ) - == "('all','hdfs1')" - ) - wait_for_hdfs_objects( - cluster, - "/clickhouse1", - SHARDS * FILES_OVERHEAD_PER_TABLE + FILES_OVERHEAD_PER_PART_COMPACT, - ) - finally: - node1.query("DROP TABLE IF EXISTS hdfs_test SYNC") - node2.query("DROP TABLE IF EXISTS hdfs_test SYNC") - - -@pytest.mark.parametrize( - ("storage_policy", "init_objects"), - [("hybrid", 0), ("tiered", 0), ("tiered_copy", FILES_OVERHEAD_PER_TABLE)], -) -def test_hdfs_zero_copy_replication_single_move(cluster, storage_policy, init_objects): - node1 = cluster.instances["node1"] - try: - node1.query( - Template( - """ - CREATE TABLE single_node_move_test (dt DateTime, id Int64) - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/single_node_move_test', '{replica}') - ORDER BY (dt, id) - SETTINGS storage_policy='$policy',temporary_directories_lifetime=1 - """ - ).substitute(policy=storage_policy) - ) - wait_for_hdfs_objects(cluster, "/clickhouse1", init_objects) - - node1.query( - "INSERT INTO single_node_move_test VALUES (now() - INTERVAL 3 DAY, 10), (now() - INTERVAL 1 DAY, 11)" - ) - assert ( - node1.query( - "SELECT id FROM single_node_move_test ORDER BY dt FORMAT Values" - ) - == "(10),(11)" - ) - - node1.query( - "ALTER TABLE single_node_move_test MOVE PARTITION ID 'all' TO VOLUME 'external'" - ) - assert ( - node1.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='single_node_move_test' FORMAT Values" - ) - == "('all','hdfs1')" - ) - assert ( - node1.query( - "SELECT id FROM single_node_move_test ORDER BY dt FORMAT Values" - ) - == "(10),(11)" - ) - wait_for_hdfs_objects( - cluster, "/clickhouse1", init_objects + FILES_OVERHEAD_PER_PART_COMPACT - ) - - node1.query( - "ALTER TABLE single_node_move_test MOVE PARTITION ID 'all' TO VOLUME 'main'" - ) - assert ( - node1.query( - "SELECT id FROM single_node_move_test ORDER BY dt FORMAT Values" - ) - == "(10),(11)" - ) - finally: - node1.query("DROP TABLE IF EXISTS single_node_move_test SYNC") - - -@pytest.mark.parametrize( - ("storage_policy", "init_objects"), - [("hybrid", 0), ("tiered", 0), ("tiered_copy", SHARDS * FILES_OVERHEAD_PER_TABLE)], -) -def test_hdfs_zero_copy_replication_move(cluster, storage_policy, init_objects): - node1 = cluster.instances["node1"] - node2 = cluster.instances["node2"] - try: - node1.query( - Template( - """ - CREATE TABLE move_test ON CLUSTER test_cluster (dt DateTime, id Int64) - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/move_test', '{replica}') - ORDER BY (dt, id) - SETTINGS storage_policy='$policy' - """ - ).substitute(policy=storage_policy) - ) - wait_for_hdfs_objects(cluster, "/clickhouse1", init_objects) - - node1.query( - "INSERT INTO move_test VALUES (now() - INTERVAL 3 DAY, 10), (now() - INTERVAL 1 DAY, 11)" - ) - node2.query("SYSTEM SYNC REPLICA move_test", timeout=30) - - assert ( - node1.query("SELECT id FROM move_test ORDER BY dt FORMAT Values") - == "(10),(11)" - ) - assert ( - node2.query("SELECT id FROM move_test ORDER BY dt FORMAT Values") - == "(10),(11)" - ) - - node1.query( - "ALTER TABLE move_test MOVE PARTITION ID 'all' TO VOLUME 'external'" - ) - wait_for_hdfs_objects( - cluster, "/clickhouse1", init_objects + FILES_OVERHEAD_PER_PART_COMPACT - ) - - node2.query( - "ALTER TABLE move_test MOVE PARTITION ID 'all' TO VOLUME 'external'" - ) - assert ( - node1.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='move_test' FORMAT Values" - ) - == "('all','hdfs1')" - ) - assert ( - node2.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='move_test' FORMAT Values" - ) - == "('all','hdfs1')" - ) - assert ( - node1.query("SELECT id FROM move_test ORDER BY dt FORMAT Values") - == "(10),(11)" - ) - assert ( - node2.query("SELECT id FROM move_test ORDER BY dt FORMAT Values") - == "(10),(11)" - ) - wait_for_hdfs_objects( - cluster, "/clickhouse1", init_objects + FILES_OVERHEAD_PER_PART_COMPACT - ) - finally: - node1.query("DROP TABLE IF EXISTS move_test SYNC") - node2.query("DROP TABLE IF EXISTS move_test SYNC") - - -@pytest.mark.parametrize(("storage_policy"), ["hybrid", "tiered", "tiered_copy"]) -def test_hdfs_zero_copy_with_ttl_move(cluster, storage_policy): - node1 = cluster.instances["node1"] - node2 = cluster.instances["node2"] - try: - node1.query( - Template( - """ - CREATE TABLE ttl_move_test ON CLUSTER test_cluster (dt DateTime, id Int64) - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/ttl_move_test', '{replica}') - ORDER BY (dt, id) - TTL dt + INTERVAL 2 DAY TO VOLUME 'external' - SETTINGS storage_policy='$policy' - """ - ).substitute(policy=storage_policy) - ) - - node1.query("INSERT INTO ttl_move_test VALUES (now() - INTERVAL 3 DAY, 10)") - node1.query("INSERT INTO ttl_move_test VALUES (now() - INTERVAL 1 DAY, 11)") - - node1.query("OPTIMIZE TABLE ttl_move_test FINAL") - node2.query("SYSTEM SYNC REPLICA ttl_move_test", timeout=30) - - assert_eq_with_retry(node1, "SELECT count() FROM ttl_move_test", "2") - assert_eq_with_retry(node2, "SELECT count() FROM ttl_move_test", "2") - assert ( - node1.query("SELECT id FROM ttl_move_test ORDER BY id FORMAT Values") - == "(10),(11)" - ) - assert ( - node2.query("SELECT id FROM ttl_move_test ORDER BY id FORMAT Values") - == "(10),(11)" - ) - finally: - node1.query("DROP TABLE IF EXISTS ttl_move_test SYNC") - node2.query("DROP TABLE IF EXISTS ttl_move_test SYNC") - - -def test_hdfs_zero_copy_with_ttl_delete(cluster): - node1 = cluster.instances["node1"] - node2 = cluster.instances["node2"] - try: - node1.query( - """ - CREATE TABLE ttl_delete_test ON CLUSTER test_cluster (dt DateTime, id Int64) - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/ttl_delete_test', '{replica}') - ORDER BY (dt, id) - TTL dt + INTERVAL 2 DAY - SETTINGS storage_policy='tiered' - """ - ) - - node1.query("INSERT INTO ttl_delete_test VALUES (now() - INTERVAL 3 DAY, 10)") - node1.query("INSERT INTO ttl_delete_test VALUES (now() - INTERVAL 1 DAY, 11)") - - node1.query("OPTIMIZE TABLE ttl_delete_test FINAL") - node2.query("SYSTEM SYNC REPLICA ttl_delete_test", timeout=30) - - assert_eq_with_retry(node1, "SELECT count() FROM ttl_delete_test", "1") - assert_eq_with_retry(node2, "SELECT count() FROM ttl_delete_test", "1") - - assert ( - node1.query("SELECT id FROM ttl_delete_test ORDER BY id FORMAT Values") - == "(11)" - ) - assert ( - node2.query("SELECT id FROM ttl_delete_test ORDER BY id FORMAT Values") - == "(11)" - ) - finally: - node1.query("DROP TABLE IF EXISTS ttl_delete_test SYNC") - node2.query("DROP TABLE IF EXISTS ttl_delete_test SYNC") diff --git a/tests/integration/test_replicated_user_defined_functions/test.py b/tests/integration/test_replicated_user_defined_functions/test.py index c0990819bf4..f54be21c4c0 100644 --- a/tests/integration/test_replicated_user_defined_functions/test.py +++ b/tests/integration/test_replicated_user_defined_functions/test.py @@ -1,10 +1,12 @@ import inspect +from contextlib import nullcontext as does_not_raise import pytest import time import os.path from helpers.cluster import ClickHouseCluster +from helpers.client import QueryRuntimeException from helpers.test_tools import assert_eq_with_retry, TSV SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -83,6 +85,33 @@ def test_create_and_drop(): node1.query("DROP FUNCTION f1") +@pytest.mark.parametrize( + "ignore, expected_raise", + [("true", does_not_raise()), ("false", pytest.raises(QueryRuntimeException))], +) +def test_create_and_drop_udf_on_cluster(ignore, expected_raise): + node1.replace_config( + "/etc/clickhouse-server/users.d/users.xml", + inspect.cleandoc( + f""" + + + + {ignore} + + + + """ + ), + ) + node1.query("SYSTEM RELOAD CONFIG") + + with expected_raise: + node1.query("CREATE FUNCTION f1 ON CLUSTER default AS (x, y) -> x + y") + assert node1.query("SELECT f1(12, 3)") == "15\n" + node1.query("DROP FUNCTION f1 ON CLUSTER default") + + def test_create_and_replace(): node1.query("CREATE FUNCTION f1 AS (x, y) -> x + y") assert node1.query("SELECT f1(12, 3)") == "15\n" diff --git a/tests/integration/test_replicated_users/test.py b/tests/integration/test_replicated_users/test.py index a7dbaf6ed30..489724ed4fb 100644 --- a/tests/integration/test_replicated_users/test.py +++ b/tests/integration/test_replicated_users/test.py @@ -1,3 +1,4 @@ +import inspect import pytest import time @@ -82,6 +83,37 @@ def test_create_replicated_on_cluster(started_cluster, entity): node1.query(f"DROP {entity.keyword} {entity.name} {entity.options}") +@pytest.mark.parametrize("entity", entities, ids=get_entity_id) +def test_create_replicated_on_cluster_ignore(started_cluster, entity): + node1.replace_config( + "/etc/clickhouse-server/users.d/users.xml", + inspect.cleandoc( + f""" + + + + true + + + + """ + ), + ) + node1.query("SYSTEM RELOAD CONFIG") + + node1.query( + f"CREATE {entity.keyword} {entity.name} ON CLUSTER default {entity.options}" + ) + assert ( + f"cannot insert because {entity.keyword.lower()} `{entity.name}{entity.options}` already exists in replicated" + in node2.query_and_get_error_with_retry( + f"CREATE {entity.keyword} {entity.name} {entity.options}" + ) + ) + + node1.query(f"DROP {entity.keyword} {entity.name} {entity.options}") + + @pytest.mark.parametrize("entity", entities, ids=get_entity_id) def test_create_replicated_if_not_exists_on_cluster(started_cluster, entity): node1.query( diff --git a/tests/integration/test_replicating_constants/test.py b/tests/integration/test_replicating_constants/test.py index 82cc5e757f1..00781e473c7 100644 --- a/tests/integration/test_replicating_constants/test.py +++ b/tests/integration/test_replicating_constants/test.py @@ -11,6 +11,7 @@ node2 = cluster.add_instance( image="yandex/clickhouse-server", tag="19.1.14", with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_restore_replica/test.py b/tests/integration/test_restore_replica/test.py index 31c503f6184..3d0baa87c21 100644 --- a/tests/integration/test_restore_replica/test.py +++ b/tests/integration/test_restore_replica/test.py @@ -44,6 +44,18 @@ def fill_table(): check_data(499500, 1000) +# kazoo.delete may throw NotEmptyError on concurrent modifications of the path +def zk_rmr_with_retries(zk, path): + for i in range(1, 10): + try: + zk.delete(path, recursive=True) + return + except Exception as ex: + print(ex) + time.sleep(0.5) + assert False + + @pytest.fixture(scope="module") def start_cluster(): try: @@ -84,7 +96,7 @@ def test_restore_replica_sequential(start_cluster): fill_table() print("Deleting root ZK path metadata") - zk.delete("/clickhouse/tables/test", recursive=True) + zk_rmr_with_retries(zk, "/clickhouse/tables/test") assert zk.exists("/clickhouse/tables/test") is None node_1.query("SYSTEM RESTART REPLICA test") @@ -119,7 +131,7 @@ def test_restore_replica_parallel(start_cluster): fill_table() print("Deleting root ZK path metadata") - zk.delete("/clickhouse/tables/test", recursive=True) + zk_rmr_with_retries(zk, "/clickhouse/tables/test") assert zk.exists("/clickhouse/tables/test") is None node_1.query("SYSTEM RESTART REPLICA test") @@ -147,12 +159,12 @@ def test_restore_replica_alive_replicas(start_cluster): fill_table() print("Deleting replica2 path, trying to restore replica1") - zk.delete("/clickhouse/tables/test/replicas/replica2", recursive=True) + zk_rmr_with_retries(zk, "/clickhouse/tables/test/replicas/replica2") assert zk.exists("/clickhouse/tables/test/replicas/replica2") is None node_1.query_and_get_error("SYSTEM RESTORE REPLICA test") print("Deleting replica1 path, trying to restore replica1") - zk.delete("/clickhouse/tables/test/replicas/replica1", recursive=True) + zk_rmr_with_retries(zk, "/clickhouse/tables/test/replicas/replica1") assert zk.exists("/clickhouse/tables/test/replicas/replica1") is None node_1.query("SYSTEM RESTART REPLICA test") diff --git a/tests/integration/test_reverse_dns_query/configs/listen_host.xml b/tests/integration/test_reverse_dns_query/configs/listen_host.xml index 58ef55cd3f3..9c27c612f63 100644 --- a/tests/integration/test_reverse_dns_query/configs/listen_host.xml +++ b/tests/integration/test_reverse_dns_query/configs/listen_host.xml @@ -1,5 +1,5 @@ - + :: 0.0.0.0 1 - +
diff --git a/tests/integration/test_reverse_dns_query/coredns_config/Corefile b/tests/integration/test_reverse_dns_query/coredns_config/Corefile index 84d297f7cdf..3edf37dafa5 100644 --- a/tests/integration/test_reverse_dns_query/coredns_config/Corefile +++ b/tests/integration/test_reverse_dns_query/coredns_config/Corefile @@ -1,4 +1,8 @@ . { + hosts /example.com { + reload "20ms" + fallthrough + } forward . 127.0.0.11 log } diff --git a/tests/integration/test_reverse_dns_query/coredns_config/example.com b/tests/integration/test_reverse_dns_query/coredns_config/example.com new file mode 100644 index 00000000000..6c6e4cbee2e --- /dev/null +++ b/tests/integration/test_reverse_dns_query/coredns_config/example.com @@ -0,0 +1 @@ +filled in runtime, but needs to exist in order to be volume mapped in docker diff --git a/tests/integration/test_reverse_dns_query/test.py b/tests/integration/test_reverse_dns_query/test.py index c5d4304a5d2..00c3956f74f 100644 --- a/tests/integration/test_reverse_dns_query/test.py +++ b/tests/integration/test_reverse_dns_query/test.py @@ -1,4 +1,5 @@ import pytest +import socket from helpers.cluster import ClickHouseCluster, get_docker_compose_path, run_and_check from time import sleep import os @@ -30,6 +31,28 @@ def started_cluster(): cluster.shutdown() +def check_ptr_record(ip, hostname): + try: + host, aliaslist, ipaddrlist = socket.gethostbyaddr(ip) + if hostname.lower() == host.lower(): + return True + except socket.herror: + pass + return False + + +def setup_dns_server(ip): + domains_string = "test.example.com" + example_file_path = f'{ch_server.env_variables["COREDNS_CONFIG_DIR"]}/example.com' + run_and_check(f"echo '{ip} {domains_string}' > {example_file_path}", shell=True) + + # DNS server takes time to reload the configuration. + for try_num in range(10): + if all(check_ptr_record(ip, host) for host in domains_string.split()): + break + sleep(1) + + def setup_ch_server(dns_server_ip): ch_server.exec_in_container( (["bash", "-c", f"echo 'nameserver {dns_server_ip}' > /etc/resolv.conf"]) @@ -42,9 +65,10 @@ def setup_ch_server(dns_server_ip): def test_reverse_dns_query(started_cluster): dns_server_ip = cluster.get_instance_ip(cluster.coredns_host) - + random_ipv6 = "4ae8:fa0f:ee1d:68c5:0b76:1b79:7ae6:1549" # https://commentpicker.com/ip-address-generator.php + setup_dns_server(random_ipv6) setup_ch_server(dns_server_ip) for _ in range(0, 200): - response = ch_server.query("select reverseDNSQuery('2001:4860:4860::8888')") - assert response == "['dns.google']\n" + response = ch_server.query(f"select reverseDNSQuery('{random_ipv6}')") + assert response == "['test.example.com']\n" diff --git a/tests/integration/test_role/test.py b/tests/integration/test_role/test.py index 9a352f43eab..0e1bbea9cea 100644 --- a/tests/integration/test_role/test.py +++ b/tests/integration/test_role/test.py @@ -287,7 +287,7 @@ def test_introspection(): assert instance.query( "SELECT name, storage from system.roles WHERE name IN ('R1', 'R2') ORDER BY name" - ) == TSV([["R1", "local directory"], ["R2", "local directory"]]) + ) == TSV([["R1", "local_directory"], ["R2", "local_directory"]]) assert instance.query( "SELECT * from system.grants WHERE user_name IN ('A', 'B') OR role_name IN ('R1', 'R2') ORDER BY user_name, role_name, access_type, database, table, column, is_partial_revoke, grant_option" @@ -301,7 +301,7 @@ def test_introspection(): ) assert instance.query( - "SELECT * from system.role_grants WHERE user_name IN ('A', 'B') OR role_name IN ('R1', 'R2') ORDER BY user_name, role_name, granted_role_name" + "SELECT user_name, role_name, granted_role_name, granted_role_is_default, with_admin_option from system.role_grants WHERE user_name IN ('A', 'B') OR role_name IN ('R1', 'R2') ORDER BY user_name, role_name, granted_role_name" ) == TSV([["A", "\\N", "R1", 1, 0], ["B", "\\N", "R2", 1, 1]]) assert instance.query( diff --git a/tests/integration/test_row_policy/test.py b/tests/integration/test_row_policy/test.py index 1933823f5d2..d88a8f2a243 100644 --- a/tests/integration/test_row_policy/test.py +++ b/tests/integration/test_row_policy/test.py @@ -420,7 +420,7 @@ def test_introspection(): "mydb", "local", "5b23c389-7e18-06bf-a6bc-dd1afbbc0a97", - "users.xml", + "users_xml", "a = 1", 0, 0, @@ -433,7 +433,7 @@ def test_introspection(): "mydb", "filtered_table1", "9e8a8f62-4965-2b5e-8599-57c7b99b3549", - "users.xml", + "users_xml", "a = 1", 0, 0, @@ -446,7 +446,7 @@ def test_introspection(): "mydb", "filtered_table2", "cffae79d-b9bf-a2ef-b798-019c18470b25", - "users.xml", + "users_xml", "a + b < 1 or c - d > 5", 0, 0, @@ -459,7 +459,7 @@ def test_introspection(): "mydb", "filtered_table3", "12fc5cef-e3da-3940-ec79-d8be3911f42b", - "users.xml", + "users_xml", "c = 1", 0, 0, @@ -636,7 +636,9 @@ def test_grant_create_row_policy(): assert node.query("SHOW POLICIES") == "" node.query("CREATE USER X") - expected_error = "necessary to have grant CREATE ROW POLICY ON mydb.filtered_table1" + expected_error = ( + "necessary to have the grant CREATE ROW POLICY ON mydb.filtered_table1" + ) assert expected_error in node.query_and_get_error( "CREATE POLICY pA ON mydb.filtered_table1 FOR SELECT USING a + + + 2 + + + 496 + + diff --git a/tests/integration/test_runtime_configurable_cache_size/configs/smaller_mark_cache.xml b/tests/integration/test_runtime_configurable_cache_size/configs/smaller_mark_cache.xml new file mode 100644 index 00000000000..2613b4bbeee --- /dev/null +++ b/tests/integration/test_runtime_configurable_cache_size/configs/smaller_mark_cache.xml @@ -0,0 +1,5 @@ + + + 248 + + diff --git a/tests/integration/test_runtime_configurable_cache_size/configs/smaller_query_cache.xml b/tests/integration/test_runtime_configurable_cache_size/configs/smaller_query_cache.xml new file mode 100644 index 00000000000..6f2de0fa8f5 --- /dev/null +++ b/tests/integration/test_runtime_configurable_cache_size/configs/smaller_query_cache.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/test_runtime_configurable_cache_size/test.py b/tests/integration/test_runtime_configurable_cache_size/test.py new file mode 100644 index 00000000000..6119ff1ebea --- /dev/null +++ b/tests/integration/test_runtime_configurable_cache_size/test.py @@ -0,0 +1,144 @@ +import os +import pytest +import shutil +import time +from helpers.cluster import ClickHouseCluster + +# Tests that sizes of in-memory caches (mark / uncompressed / index mark / index uncompressed / mmapped file / query cache) can be changed +# at runtime (issue #51085). This file tests only the mark cache (which uses the SLRU cache policy) and the query cache (which uses the TTL +# cache policy). As such, both tests are representative for the other caches. + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=["configs/default.xml"], + stay_alive=True, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +CONFIG_DIR = os.path.join(SCRIPT_DIR, "configs") + + +# temporarily disabled due to https://github.com/ClickHouse/ClickHouse/pull/51446#issuecomment-1687066351 +# def test_mark_cache_size_is_runtime_configurable(start_cluster): +# # the initial config specifies the mark cache size as 496 bytes, just enough to hold two marks +# node.query("SYSTEM DROP MARK CACHE") +# +# node.query("CREATE TABLE test1 (val String) ENGINE=MergeTree ORDER BY val") +# node.query("INSERT INTO test1 VALUES ('abc') ('def') ('ghi')") +# node.query("SELECT * FROM test1 WHERE val = 'def'") # cache 1st mark +# +# node.query("CREATE TABLE test2 (val String) ENGINE=MergeTree ORDER BY val") +# node.query("INSERT INTO test2 VALUES ('abc') ('def') ('ghi')") +# node.query("SELECT * FROM test2 WHERE val = 'def'") # cache 2nd mark +# +# # Result checking is based on asynchronous metrics. These are calculated by default every 1.0 sec, and this is also the +# # smallest possible value. Found no statement to force-recalculate them, therefore waaaaait... +# time.sleep(2.0) +# res = node.query( +# "SELECT value FROM system.asynchronous_metrics WHERE metric LIKE 'MarkCacheFiles'" +# ) +# assert res == "2\n" +# res = node.query( +# "SELECT value FROM system.asynchronous_metrics WHERE metric LIKE 'MarkCacheBytes'" +# ) +# assert res == "496\n" +# +# # switch to a config with a mark cache size of 248 bytes +# node.copy_file_to_container( +# os.path.join(CONFIG_DIR, "smaller_mark_cache.xml"), +# "/etc/clickhouse-server/config.d/default.xml", +# ) +# +# node.query("SYSTEM RELOAD CONFIG") +# +# # check that eviction worked as expected +# time.sleep(2.0) +# res = node.query( +# "SELECT value FROM system.asynchronous_metrics WHERE metric LIKE 'MarkCacheFiles'" +# ) +# assert res == "1\n" +# res = node.query( +# "SELECT value FROM system.asynchronous_metrics WHERE metric LIKE 'MarkCacheBytes'" +# ) +# assert res == "248\n" +# +# # check that the new mark cache maximum size is respected when more marks are cached +# node.query("CREATE TABLE test3 (val String) ENGINE=MergeTree ORDER BY val") +# node.query("INSERT INTO test3 VALUES ('abc') ('def') ('ghi')") +# node.query("SELECT * FROM test3 WHERE val = 'def'") +# time.sleep(2.0) +# res = node.query( +# "SELECT value FROM system.asynchronous_metrics WHERE metric LIKE 'MarkCacheFiles'" +# ) +# assert res == "1\n" +# res = node.query( +# "SELECT value FROM system.asynchronous_metrics WHERE metric LIKE 'MarkCacheBytes'" +# ) +# assert res == "248\n" +# +# # restore the original config +# node.copy_file_to_container( +# os.path.join(CONFIG_DIR, "default.xml"), +# "/etc/clickhouse-server/config.d/default.xml", +# ) + + +def test_query_cache_size_is_runtime_configurable(start_cluster): + # the inital config specifies the maximum query cache size as 2, run 3 queries, expect 2 cache entries + node.query("SYSTEM DROP QUERY CACHE") + node.query("SELECT 1 SETTINGS use_query_cache = 1, query_cache_ttl = 1") + node.query("SELECT 2 SETTINGS use_query_cache = 1, query_cache_ttl = 1") + node.query("SELECT 3 SETTINGS use_query_cache = 1, query_cache_ttl = 1") + + time.sleep(2.0) + res = node.query( + "SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries'" + ) + assert res == "2\n" + + # switch to a config with a maximum query cache size of 1 + node.copy_file_to_container( + os.path.join(CONFIG_DIR, "smaller_query_cache.xml"), + "/etc/clickhouse-server/config.d/default.xml", + ) + + node.query("SYSTEM RELOAD CONFIG") + + # check that eviction worked as expected + time.sleep(2.0) + res = node.query( + "SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries'" + ) + assert ( + res == "2\n" + ) # "Why not 1?", you think. Reason is that QC uses the TTLCachePolicy that evicts lazily only upon insert. + # Not a real issue, can be changed later, at least there's a test now. + + # Also, you may also wonder "why query_cache_ttl = 1"? Reason is that TTLCachePolicy only removes *stale* entries. With the default TTL + # (60 sec), no entries would be removed at all. Again: not a real issue, can be changed later and there's at least a test now. + + # check that the new query cache maximum size is respected when more queries run + node.query("SELECT 4 SETTINGS use_query_cache = 1, query_cache_ttl = 1") + node.query("SELECT 5 SETTINGS use_query_cache = 1, query_cache_ttl = 1") + time.sleep(2.0) + res = node.query( + "SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries'" + ) + assert res == "1\n" + + # restore the original config + node.copy_file_to_container( + os.path.join(CONFIG_DIR, "default.xml"), + "/etc/clickhouse-server/config.d/default.xml", + ) diff --git a/tests/integration/test_s3_zero_copy_replication/test.py b/tests/integration/test_s3_zero_copy_replication/test.py index 2a4e0eece08..0ca81a27802 100644 --- a/tests/integration/test_s3_zero_copy_replication/test.py +++ b/tests/integration/test_s3_zero_copy_replication/test.py @@ -153,96 +153,6 @@ def test_s3_zero_copy_replication(started_cluster, policy): node2.query("DROP TABLE IF EXISTS s3_test SYNC") -@pytest.mark.skip(reason="Test is flaky (and never was stable)") -def test_s3_zero_copy_on_hybrid_storage(started_cluster): - node1 = cluster.instances["node1"] - node2 = cluster.instances["node2"] - - node1.query( - """ - CREATE TABLE hybrid_test ON CLUSTER test_cluster (id UInt32, value String) - ENGINE=ReplicatedMergeTree('/clickhouse/tables/hybrid_test', '{}') - ORDER BY id - SETTINGS storage_policy='hybrid',temporary_directories_lifetime=1 - """.format( - "{replica}" - ) - ) - - node1.query("INSERT INTO hybrid_test VALUES (0,'data'),(1,'data')") - node2.query("SYSTEM SYNC REPLICA hybrid_test", timeout=30) - - assert ( - node1.query("SELECT * FROM hybrid_test ORDER BY id FORMAT Values") - == "(0,'data'),(1,'data')" - ) - assert ( - node2.query("SELECT * FROM hybrid_test ORDER BY id FORMAT Values") - == "(0,'data'),(1,'data')" - ) - - assert ( - node1.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values" - ) - == "('all','default')" - ) - assert ( - node2.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values" - ) - == "('all','default')" - ) - - node1.query("ALTER TABLE hybrid_test MOVE PARTITION ID 'all' TO DISK 's31'") - - assert ( - node1.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values" - ) - == "('all','s31')" - ) - assert ( - node2.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values" - ) - == "('all','default')" - ) - - # Total objects in S3 - s3_objects = get_large_objects_count(cluster, size=0) - - node2.query("ALTER TABLE hybrid_test MOVE PARTITION ID 'all' TO DISK 's31'") - - assert ( - node1.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values" - ) - == "('all','s31')" - ) - assert ( - node2.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values" - ) - == "('all','s31')" - ) - - # Check that after moving partition on node2 no new obects on s3 - wait_for_large_objects_count(cluster, s3_objects, size=0) - - assert ( - node1.query("SELECT * FROM hybrid_test ORDER BY id FORMAT Values") - == "(0,'data'),(1,'data')" - ) - assert ( - node2.query("SELECT * FROM hybrid_test ORDER BY id FORMAT Values") - == "(0,'data'),(1,'data')" - ) - - node1.query("DROP TABLE IF EXISTS hybrid_test SYNC") - node2.query("DROP TABLE IF EXISTS hybrid_test SYNC") - - def insert_data_time(node, table, number_of_mb, time, start=0): values = ",".join( f"({x},{time})" diff --git a/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py b/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py deleted file mode 100644 index 5fbe426074f..00000000000 --- a/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env python3 -import time - -import pytest - -# FIXME This test is too flaky -# https://github.com/ClickHouse/ClickHouse/issues/45887 - -pytestmark = pytest.mark.skip - -from helpers.cluster import ClickHouseCluster - - -single_node_cluster = ClickHouseCluster(__file__) -small_node = single_node_cluster.add_instance( - "small_node", main_configs=["configs/s3.xml"], with_minio=True -) - - -@pytest.fixture(scope="module") -def started_single_node_cluster(): - try: - single_node_cluster.start() - - yield single_node_cluster - finally: - single_node_cluster.shutdown() - - -def test_move_and_s3_memory_usage(started_single_node_cluster): - if small_node.is_built_with_sanitizer() or small_node.is_debug_build(): - pytest.skip("Disabled for debug and sanitizers. Too slow.") - - small_node.query( - "CREATE TABLE s3_test_with_ttl (x UInt32, a String codec(NONE), b String codec(NONE), c String codec(NONE), d String codec(NONE), e String codec(NONE)) engine = MergeTree order by x partition by x SETTINGS storage_policy='s3_and_default'" - ) - - for _ in range(10): - small_node.query( - "insert into s3_test_with_ttl select 0, repeat('a', 100), repeat('b', 100), repeat('c', 100), repeat('d', 100), repeat('e', 100) from zeros(400000) settings max_block_size = 8192, max_insert_block_size=10000000, min_insert_block_size_rows=10000000" - ) - - # After this, we should have 5 columns per 10 * 100 * 400000 ~ 400 MB; total ~2G data in partition - small_node.query( - "optimize table s3_test_with_ttl final", - settings={ - "send_logs_level": "error", - "allow_prefetched_read_pool_for_remote_filesystem": 0, - }, - ) - - small_node.query("system flush logs") - # Will take memory usage from metric_log. - # It is easier then specifying total memory limit (insert queries can hit this limit). - small_node.query("truncate table system.metric_log") - - small_node.query( - "alter table s3_test_with_ttl move partition 0 to volume 'external'", - settings={ - "send_logs_level": "error", - "allow_prefetched_read_pool_for_remote_filesystem": 0, - }, - ) - small_node.query("system flush logs") - max_usage = small_node.query( - """ - select max(m.val - am.val * 4096) from - (select toStartOfMinute(event_time) as time, max(CurrentMetric_MemoryTracking) as val from system.metric_log group by time) as m join - (select toStartOfMinute(event_time) as time, min(value) as val from system.asynchronous_metric_log where metric='jemalloc.arenas.all.pdirty' group by time) as am using time;""" - ) - # 3G limit is a big one. However, we can hit it anyway with parallel s3 writes enabled. - # Also actual value can be bigger because of memory drift. - # Increase it a little bit if test fails. - assert int(max_usage) < 3e9 - res = small_node.query( - "select * from system.errors where last_error_message like '%Memory limit%' limit 1", - settings={ - "allow_prefetched_read_pool_for_remote_filesystem": 0, - }, - ) - assert res == "" diff --git a/tests/integration/test_select_access_rights/test_from_system_tables.py b/tests/integration/test_select_access_rights/test_from_system_tables.py index ac938a9694a..3cd6b90109c 100644 --- a/tests/integration/test_select_access_rights/test_from_system_tables.py +++ b/tests/integration/test_select_access_rights/test_from_system_tables.py @@ -44,14 +44,14 @@ def test_system_db(): assert node.query("SELECT count()>0 FROM system.settings", user="another") == "1\n" expected_error = ( - "necessary to have grant SELECT for at least one column on system.users" + "necessary to have the grant SELECT for at least one column on system.users" ) assert expected_error in node.query_and_get_error( "SELECT count()>0 FROM system.users", user="another" ) expected_error = ( - "necessary to have grant SELECT for at least one column on system.clusters" + "necessary to have the grant SELECT for at least one column on system.clusters" ) assert expected_error in node.query_and_get_error( "SELECT count()>0 FROM system.clusters", user="another" @@ -72,14 +72,14 @@ def test_system_db(): assert node.query("SELECT count()>0 FROM system.settings", user="sqluser") == "1\n" expected_error = ( - "necessary to have grant SELECT for at least one column on system.users" + "necessary to have the grant SELECT for at least one column on system.users" ) assert expected_error in node.query_and_get_error( "SELECT count()>0 FROM system.users", user="sqluser" ) expected_error = ( - "necessary to have grant SELECT for at least one column on system.clusters" + "necessary to have the grant SELECT for at least one column on system.clusters" ) assert node.query_and_get_error( "SELECT count()>0 FROM system.clusters", user="sqluser" @@ -138,7 +138,7 @@ def test_information_schema(): ) expected_error = ( - "necessary to have grant SELECT(table_name) ON information_schema.tables" + "necessary to have the grant SELECT(table_name) ON information_schema.tables" ) assert expected_error in node.query_and_get_error( "SELECT count() FROM information_schema.tables WHERE table_name='table1'", diff --git a/tests/integration/test_select_access_rights/test_main.py b/tests/integration/test_select_access_rights/test_main.py index 76940cdadb4..eedecc2d30c 100644 --- a/tests/integration/test_select_access_rights/test_main.py +++ b/tests/integration/test_select_access_rights/test_main.py @@ -30,7 +30,7 @@ def test_select_single_column(): select_query = "SELECT a FROM table1" assert ( - "it's necessary to have grant SELECT(a) ON default.table1" + "it's necessary to have the grant SELECT(a) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -39,7 +39,7 @@ def test_select_single_column(): instance.query("REVOKE SELECT(a) ON default.table1 FROM A") assert ( - "it's necessary to have grant SELECT(a) ON default.table1" + "it's necessary to have the grant SELECT(a) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -51,7 +51,7 @@ def test_select_single_column_with_table_grant(): select_query = "SELECT a FROM table1" assert ( - "it's necessary to have grant SELECT(a) ON default.table1" + "it's necessary to have the grant SELECT(a) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -60,7 +60,7 @@ def test_select_single_column_with_table_grant(): instance.query("REVOKE SELECT(a) ON default.table1 FROM A") assert ( - "it's necessary to have grant SELECT(a) ON default.table1" + "it's necessary to have the grant SELECT(a) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -72,19 +72,19 @@ def test_select_all_columns(): select_query = "SELECT * FROM table1" assert ( - "it's necessary to have grant SELECT(d, a, b) ON default.table1" + "it's necessary to have the grant SELECT(d, a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) instance.query("GRANT SELECT(d) ON default.table1 TO A") assert ( - "it's necessary to have grant SELECT(d, a, b) ON default.table1" + "it's necessary to have the grant SELECT(d, a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) instance.query("GRANT SELECT(a) ON default.table1 TO A") assert ( - "it's necessary to have grant SELECT(d, a, b) ON default.table1" + "it's necessary to have the grant SELECT(d, a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -99,7 +99,7 @@ def test_select_all_columns_with_table_grant(): select_query = "SELECT * FROM table1" assert ( - "it's necessary to have grant SELECT(d, a, b) ON default.table1" + "it's necessary to have the grant SELECT(d, a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -114,7 +114,7 @@ def test_alias(): select_query = "SELECT x, y, x + y AS s FROM table1" assert ( - "it's necessary to have grant SELECT(x, y) ON default.table1" + "it's necessary to have the grant SELECT(x, y) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -129,7 +129,7 @@ def test_alias_columns(): select_query = "SELECT * FROM table1" assert ( - "it's necessary to have grant SELECT(x, y) ON default.table1" + "it's necessary to have the grant SELECT(x, y) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -138,7 +138,7 @@ def test_alias_columns(): select_query = "SELECT s FROM table1" assert ( - "it's necessary to have grant SELECT(s) ON default.table1" + "it's necessary to have the grant SELECT(s) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -156,7 +156,7 @@ def test_materialized_columns(): select_query = "SELECT * FROM table1" assert ( - "it's necessary to have grant SELECT(x, y) ON default.table1" + "it's necessary to have the grant SELECT(x, y) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -165,7 +165,7 @@ def test_materialized_columns(): select_query = "SELECT p FROM table1" assert ( - "it's necessary to have grant SELECT(p) ON default.table1" + "it's necessary to have the grant SELECT(p) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -186,13 +186,13 @@ def test_select_join(): select_query = "SELECT * FROM table1 JOIN table2 USING(d)" assert ( - "it's necessary to have grant SELECT(d, x, y) ON default.table2" + "it's necessary to have the grant SELECT(d, x, y) ON default.table2" in instance.query_and_get_error(select_query, user="A") ) instance.query("GRANT SELECT(d, x, y) ON default.table2 TO A") assert ( - "it's necessary to have grant SELECT(d, a, b) ON default.table1" + "it's necessary to have the grant SELECT(d, a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -201,7 +201,7 @@ def test_select_join(): instance.query("REVOKE SELECT ON default.table2 FROM A") assert ( - "it's necessary to have grant SELECT(d, x, y) ON default.table2" + "it's necessary to have the grant SELECT(d, x, y) ON default.table2" in instance.query_and_get_error(select_query, user="A") ) @@ -216,13 +216,13 @@ def test_select_union(): select_query = "SELECT * FROM table1 UNION ALL SELECT * FROM table2" assert ( - "it's necessary to have grant SELECT(a, b) ON default.table1" + "it's necessary to have the grant SELECT(a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) instance.query("GRANT SELECT(a, b) ON default.table1 TO A") assert ( - "it's necessary to have grant SELECT(a, b) ON default.table2" + "it's necessary to have the grant SELECT(a, b) ON default.table2" in instance.query_and_get_error(select_query, user="A") ) @@ -231,7 +231,7 @@ def test_select_union(): instance.query("REVOKE SELECT ON default.table1 FROM A") assert ( - "it's necessary to have grant SELECT(a, b) ON default.table1" + "it's necessary to have the grant SELECT(a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -243,7 +243,7 @@ def test_select_count(): select_query = "SELECT count() FROM table1" assert ( - "it's necessary to have grant SELECT for at least one column on default.table1" + "it's necessary to have the grant SELECT for at least one column on default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -252,7 +252,7 @@ def test_select_count(): instance.query("REVOKE SELECT(x) ON default.table1 FROM A") assert ( - "it's necessary to have grant SELECT for at least one column on default.table1" + "it's necessary to have the grant SELECT for at least one column on default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -261,7 +261,7 @@ def test_select_count(): instance.query("REVOKE SELECT(y) ON default.table1 FROM A") assert ( - "it's necessary to have grant SELECT for at least one column on default.table1" + "it's necessary to have the grant SELECT for at least one column on default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -279,7 +279,7 @@ def test_select_where(): select_query = "SELECT a FROM table1 WHERE b = 0" assert ( - "it's necessary to have grant SELECT(a, b) ON default.table1" + "it's necessary to have the grant SELECT(a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -288,7 +288,7 @@ def test_select_where(): instance.query("REVOKE SELECT ON default.table1 FROM A") assert ( - "it's necessary to have grant SELECT(a, b) ON default.table1" + "it's necessary to have the grant SELECT(a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -306,7 +306,7 @@ def test_select_prewhere(): select_query = "SELECT a FROM table1 PREWHERE b = 0" assert ( - "it's necessary to have grant SELECT(a, b) ON default.table1" + "it's necessary to have the grant SELECT(a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -315,7 +315,7 @@ def test_select_prewhere(): instance.query("REVOKE SELECT ON default.table1 FROM A") assert ( - "it's necessary to have grant SELECT(a, b) ON default.table1" + "it's necessary to have the grant SELECT(a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -334,11 +334,11 @@ def test_select_with_row_policy(): select_query = "SELECT a FROM table1" select_query2 = "SELECT count() FROM table1" assert ( - "it's necessary to have grant SELECT(a) ON default.table1" + "it's necessary to have the grant SELECT(a) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) assert ( - "it's necessary to have grant SELECT for at least one column on default.table1" + "it's necessary to have the grant SELECT for at least one column on default.table1" in instance.query_and_get_error(select_query2, user="A") ) @@ -348,10 +348,10 @@ def test_select_with_row_policy(): instance.query("REVOKE SELECT(a) ON default.table1 FROM A") assert ( - "it's necessary to have grant SELECT(a) ON default.table1" + "it's necessary to have the grant SELECT(a) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) assert ( - "it's necessary to have grant SELECT for at least one column on default.table1" + "it's necessary to have the grant SELECT for at least one column on default.table1" in instance.query_and_get_error(select_query2, user="A") ) diff --git a/tests/integration/test_session_log/.gitignore b/tests/integration/test_session_log/.gitignore new file mode 100644 index 00000000000..edf565ec632 --- /dev/null +++ b/tests/integration/test_session_log/.gitignore @@ -0,0 +1 @@ +_gen diff --git a/tests/integration/test_session_log/__init__.py b/tests/integration/test_session_log/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_session_log/configs/log.xml b/tests/integration/test_session_log/configs/log.xml new file mode 100644 index 00000000000..7a079b81e69 --- /dev/null +++ b/tests/integration/test_session_log/configs/log.xml @@ -0,0 +1,9 @@ + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + + \ No newline at end of file diff --git a/tests/integration/test_session_log/configs/ports.xml b/tests/integration/test_session_log/configs/ports.xml new file mode 100644 index 00000000000..fbaefc16b3a --- /dev/null +++ b/tests/integration/test_session_log/configs/ports.xml @@ -0,0 +1,9 @@ + + 5433 + 9001 + 9100 + + + false + + \ No newline at end of file diff --git a/tests/integration/test_session_log/configs/session_log.xml b/tests/integration/test_session_log/configs/session_log.xml new file mode 100644 index 00000000000..a0e4e3e2216 --- /dev/null +++ b/tests/integration/test_session_log/configs/session_log.xml @@ -0,0 +1,9 @@ + + + system + session_log
+ + toYYYYMM(event_date) + 7500 +
+
diff --git a/tests/integration/test_session_log/configs/users.xml b/tests/integration/test_session_log/configs/users.xml new file mode 100644 index 00000000000..0416dfadc8a --- /dev/null +++ b/tests/integration/test_session_log/configs/users.xml @@ -0,0 +1,23 @@ + + + + 0 + + + + + + + pass + + + pass + + + pass + + + pass + + + \ No newline at end of file diff --git a/tests/integration/test_session_log/protos/clickhouse_grpc.proto b/tests/integration/test_session_log/protos/clickhouse_grpc.proto new file mode 120000 index 00000000000..25d15f11e3b --- /dev/null +++ b/tests/integration/test_session_log/protos/clickhouse_grpc.proto @@ -0,0 +1 @@ +../../../../src/Server/grpc_protos/clickhouse_grpc.proto \ No newline at end of file diff --git a/tests/integration/test_session_log/test.py b/tests/integration/test_session_log/test.py new file mode 100644 index 00000000000..0c350e6c2c9 --- /dev/null +++ b/tests/integration/test_session_log/test.py @@ -0,0 +1,295 @@ +import os +import grpc +import pymysql.connections +import pytest +import random +import sys +import threading + +from helpers.cluster import ClickHouseCluster, run_and_check + +POSTGRES_SERVER_PORT = 5433 +MYSQL_SERVER_PORT = 9001 +GRPC_PORT = 9100 +SESSION_LOG_MATCHING_FIELDS = "auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface" + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +DEFAULT_ENCODING = "utf-8" + +# Use grpcio-tools to generate *pb2.py files from *.proto. +proto_dir = os.path.join(SCRIPT_DIR, "./protos") +gen_dir = os.path.join(SCRIPT_DIR, "./_gen") +os.makedirs(gen_dir, exist_ok=True) +run_and_check( + f"python3 -m grpc_tools.protoc -I{proto_dir} --python_out={gen_dir} --grpc_python_out={gen_dir} {proto_dir}/clickhouse_grpc.proto", + shell=True, +) + +sys.path.append(gen_dir) + +import clickhouse_grpc_pb2 +import clickhouse_grpc_pb2_grpc + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance( + "node", + main_configs=[ + "configs/ports.xml", + "configs/log.xml", + "configs/session_log.xml", + ], + user_configs=["configs/users.xml"], + # Bug in TSAN reproduces in this test https://github.com/grpc/grpc/issues/29550#issuecomment-1188085387 + env_variables={ + "TSAN_OPTIONS": "report_atomic_races=0 " + os.getenv("TSAN_OPTIONS", default="") + }, + with_postgres=True, +) + + +def grpc_get_url(): + return f"{instance.ip_address}:{GRPC_PORT}" + + +def grpc_create_insecure_channel(): + channel = grpc.insecure_channel(grpc_get_url()) + grpc.channel_ready_future(channel).result(timeout=2) + return channel + + +session_id_counter = 0 + + +def next_session_id(): + global session_id_counter + session_id = session_id_counter + session_id_counter += 1 + return str(session_id) + + +def grpc_query(query, user_, pass_, raise_exception): + try: + query_info = clickhouse_grpc_pb2.QueryInfo( + query=query, + session_id=next_session_id(), + user_name=user_, + password=pass_, + ) + channel = grpc_create_insecure_channel() + stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(channel) + result = stub.ExecuteQuery(query_info) + if result and result.HasField("exception"): + raise Exception(result.exception.display_text) + + return result.output.decode(DEFAULT_ENCODING) + except Exception: + assert raise_exception + + +def postgres_query(query, user_, pass_, raise_exception): + try: + connection_string = f"host={instance.hostname} port={POSTGRES_SERVER_PORT} dbname=default user={user_} password={pass_}" + cluster.exec_in_container( + cluster.postgres_id, + [ + "/usr/bin/psql", + connection_string, + "--no-align", + "--field-separator=' '", + "-c", + query, + ], + shell=True, + ) + except Exception: + assert raise_exception + + +def mysql_query(query, user_, pass_, raise_exception): + try: + client = pymysql.connections.Connection( + host=instance.ip_address, + user=user_, + password=pass_, + database="default", + port=MYSQL_SERVER_PORT, + ) + cursor = client.cursor(pymysql.cursors.DictCursor) + if raise_exception: + with pytest.raises(Exception): + cursor.execute(query) + else: + cursor.execute(query) + cursor.fetchall() + except Exception: + assert raise_exception + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + # Wait for the PostgreSQL handler to start. + # Cluster.start waits until port 9000 becomes accessible. + # Server opens the PostgreSQL compatibility port a bit later. + instance.wait_for_log_line("PostgreSQL compatibility protocol") + yield cluster + finally: + cluster.shutdown() + + +def test_grpc_session(started_cluster): + grpc_query("SELECT 1", "grpc_user", "pass", False) + grpc_query("SELECT 2", "grpc_user", "wrong_pass", True) + grpc_query("SELECT 3", "wrong_grpc_user", "pass", True) + + instance.query("SYSTEM FLUSH LOGS") + login_success_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='grpc_user' AND type = 'LoginSuccess'" + ) + assert login_success_records == "grpc_user\t1\t1\n" + logout_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='grpc_user' AND type = 'Logout'" + ) + assert logout_records == "grpc_user\t1\t1\n" + login_failure_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='grpc_user' AND type = 'LoginFailure'" + ) + assert login_failure_records == "grpc_user\t1\t1\n" + logins_and_logouts = instance.query( + f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'grpc_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'grpc_user' AND type = 'Logout')" + ) + assert logins_and_logouts == "1\n" + + +def test_mysql_session(started_cluster): + mysql_query("SELECT 1", "mysql_user", "pass", False) + mysql_query("SELECT 2", "mysql_user", "wrong_pass", True) + mysql_query("SELECT 3", "wrong_mysql_user", "pass", True) + + instance.query("SYSTEM FLUSH LOGS") + login_success_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='mysql_user' AND type = 'LoginSuccess'" + ) + assert login_success_records == "mysql_user\t1\t1\n" + logout_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='mysql_user' AND type = 'Logout'" + ) + assert logout_records == "mysql_user\t1\t1\n" + login_failure_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='mysql_user' AND type = 'LoginFailure'" + ) + assert login_failure_records == "mysql_user\t1\t1\n" + logins_and_logouts = instance.query( + f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'mysql_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'mysql_user' AND type = 'Logout')" + ) + assert logins_and_logouts == "1\n" + + +def test_postgres_session(started_cluster): + postgres_query("SELECT 1", "postgres_user", "pass", False) + postgres_query("SELECT 2", "postgres_user", "wrong_pass", True) + postgres_query("SELECT 3", "wrong_postgres_user", "pass", True) + + instance.query("SYSTEM FLUSH LOGS") + login_success_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='postgres_user' AND type = 'LoginSuccess'" + ) + assert login_success_records == "postgres_user\t1\t1\n" + logout_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='postgres_user' AND type = 'Logout'" + ) + assert logout_records == "postgres_user\t1\t1\n" + login_failure_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='postgres_user' AND type = 'LoginFailure'" + ) + assert login_failure_records == "postgres_user\t1\t1\n" + logins_and_logouts = instance.query( + f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'postgres_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'postgres_user' AND type = 'Logout')" + ) + assert logins_and_logouts == "1\n" + + +def test_parallel_sessions(started_cluster): + thread_list = [] + for _ in range(10): + # Sleep time does not significantly matter here, + # test should pass even without sleeping. + for function in [postgres_query, grpc_query, mysql_query]: + thread = threading.Thread( + target=function, + args=( + f"SELECT sleep({random.uniform(0.03, 0.04)})", + "parallel_user", + "pass", + False, + ), + ) + thread.start() + thread_list.append(thread) + thread = threading.Thread( + target=function, + args=( + f"SELECT sleep({random.uniform(0.03, 0.04)})", + "parallel_user", + "wrong_pass", + True, + ), + ) + thread.start() + thread_list.append(thread) + thread = threading.Thread( + target=function, + args=( + f"SELECT sleep({random.uniform(0.03, 0.04)})", + "wrong_parallel_user", + "pass", + True, + ), + ) + thread.start() + thread_list.append(thread) + + for thread in thread_list: + thread.join() + + instance.query("SYSTEM FLUSH LOGS") + port_0_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user'" + ) + assert port_0_sessions == "90\n" + + port_0_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND client_port = 0" + ) + assert port_0_sessions == "0\n" + + address_0_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND client_address = toIPv6('::')" + ) + assert address_0_sessions == "0\n" + + grpc_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND interface = 'gRPC'" + ) + assert grpc_sessions == "30\n" + + mysql_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND interface = 'MySQL'" + ) + assert mysql_sessions == "30\n" + + postgres_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND interface = 'PostgreSQL'" + ) + assert postgres_sessions == "30\n" + + logins_and_logouts = instance.query( + f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'parallel_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'parallel_user' AND type = 'Logout')" + ) + assert logins_and_logouts == "30\n" + + logout_failure_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND type = 'LoginFailure'" + ) + assert logout_failure_sessions == "30\n" diff --git a/tests/integration/test_settings_profile/test.py b/tests/integration/test_settings_profile/test.py index 13acc3f1042..5e40b534cee 100644 --- a/tests/integration/test_settings_profile/test.py +++ b/tests/integration/test_settings_profile/test.py @@ -88,7 +88,7 @@ def test_smoke(): ) ) assert system_settings_profile("xyz") == [ - ["xyz", "local directory", 1, 0, "['robin']", "[]"] + ["xyz", "local_directory", 1, 0, "['robin']", "[]"] ] assert system_settings_profile_elements(profile_name="xyz") == [ [ @@ -120,7 +120,7 @@ def test_smoke(): instance.query("SET max_memory_usage = 80000000", user="robin") instance.query("SET max_memory_usage = 120000000", user="robin") assert system_settings_profile("xyz") == [ - ["xyz", "local directory", 1, 0, "[]", "[]"] + ["xyz", "local_directory", 1, 0, "[]", "[]"] ] assert system_settings_profile_elements(user_name="robin") == [] @@ -201,7 +201,7 @@ def test_settings_from_granted_role(): ) ) assert system_settings_profile("xyz") == [ - ["xyz", "local directory", 2, 0, "[]", "[]"] + ["xyz", "local_directory", 2, 0, "[]", "[]"] ] assert system_settings_profile_elements(profile_name="xyz") == [ [ @@ -276,7 +276,7 @@ def test_settings_from_granted_role(): ) ) assert system_settings_profile("xyz") == [ - ["xyz", "local directory", 2, 0, "['worker']", "[]"] + ["xyz", "local_directory", 2, 0, "['worker']", "[]"] ] instance.query("ALTER SETTINGS PROFILE xyz TO NONE") @@ -293,7 +293,7 @@ def test_settings_from_granted_role(): ) instance.query("SET max_memory_usage = 120000000", user="robin") assert system_settings_profile("xyz") == [ - ["xyz", "local directory", 2, 0, "[]", "[]"] + ["xyz", "local_directory", 2, 0, "[]", "[]"] ] @@ -323,7 +323,7 @@ def test_inheritance(): ) assert system_settings_profile("xyz") == [ - ["xyz", "local directory", 1, 0, "[]", "[]"] + ["xyz", "local_directory", 1, 0, "[]", "[]"] ] assert system_settings_profile_elements(profile_name="xyz") == [ [ @@ -340,7 +340,7 @@ def test_inheritance(): ] ] assert system_settings_profile("alpha") == [ - ["alpha", "local directory", 1, 0, "['robin']", "[]"] + ["alpha", "local_directory", 1, 0, "['robin']", "[]"] ] assert system_settings_profile_elements(profile_name="alpha") == [ ["alpha", "\\N", "\\N", 0, "\\N", "\\N", "\\N", "\\N", "\\N", "xyz"] @@ -589,10 +589,10 @@ def test_function_current_profiles(): def test_allow_ddl(): - assert "it's necessary to have grant" in instance.query_and_get_error( + assert "it's necessary to have the grant" in instance.query_and_get_error( "CREATE TABLE tbl(a Int32) ENGINE=Log", user="robin" ) - assert "it's necessary to have grant" in instance.query_and_get_error( + assert "it's necessary to have the grant" in instance.query_and_get_error( "GRANT CREATE ON tbl TO robin", user="robin" ) assert "DDL queries are prohibited" in instance.query_and_get_error( @@ -615,10 +615,10 @@ def test_allow_introspection(): assert "Introspection functions are disabled" in instance.query_and_get_error( "SELECT demangle('a')" ) - assert "it's necessary to have grant" in instance.query_and_get_error( + assert "it's necessary to have the grant" in instance.query_and_get_error( "SELECT demangle('a')", user="robin" ) - assert "it's necessary to have grant" in instance.query_and_get_error( + assert "it's necessary to have the grant" in instance.query_and_get_error( "SELECT demangle('a')", user="robin", settings={"allow_introspection_functions": 1}, @@ -659,7 +659,7 @@ def test_allow_introspection(): "REVOKE demangle ON *.* FROM robin", settings={"allow_introspection_functions": 1}, ) - assert "it's necessary to have grant" in instance.query_and_get_error( + assert "it's necessary to have the grant" in instance.query_and_get_error( "SELECT demangle('a')", user="robin" ) diff --git a/tests/integration/test_ssl_cert_authentication/test.py b/tests/integration/test_ssl_cert_authentication/test.py index d1ae39ca378..d31457488c1 100644 --- a/tests/integration/test_ssl_cert_authentication/test.py +++ b/tests/integration/test_ssl_cert_authentication/test.py @@ -160,6 +160,10 @@ def get_ssl_context(cert_name): ) context.verify_mode = ssl.CERT_REQUIRED context.check_hostname = True + # Python 3.10 has removed many ciphers from the cipher suite. + # Hence based on https://github.com/urllib3/urllib3/issues/3100#issuecomment-1671106236 + # we are expanding the list of cipher suites. + context.set_ciphers("DEFAULT") return context diff --git a/tests/integration/test_storage_azure_blob_storage/configs/cluster.xml b/tests/integration/test_storage_azure_blob_storage/configs/cluster.xml new file mode 100644 index 00000000000..ffa4673c9ee --- /dev/null +++ b/tests/integration/test_storage_azure_blob_storage/configs/cluster.xml @@ -0,0 +1,39 @@ + + + + + + node_0 + 9000 + + + node_1 + 9000 + + + node_2 + 9000 + + + + + + + + node_0 + 9000 + + + + + node_1 + 19000 + + + + + + + simple_cluster + + \ No newline at end of file diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index 21f57a67495..37cdc9b609b 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -35,14 +35,19 @@ def cluster(): cluster.shutdown() -def azure_query(node, query, try_num=10, settings={}): +def azure_query(node, query, expect_error="false", try_num=10, settings={}): for i in range(try_num): try: - return node.query(query, settings=settings) + if expect_error == "true": + return node.query_and_get_error(query, settings=settings) + else: + return node.query(query, settings=settings) except Exception as ex: retriable_errors = [ "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response", "DB::Exception: Azure::Core::Http::TransportException: Connection closed before getting full response or response is less than expected", + "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response", + "DB::Exception: Azure::Core::Http::TransportException: Error while polling for socket ready read", ] retry = False for error in retriable_errors: @@ -611,3 +616,99 @@ def test_filter_using_file(cluster): query = f"select count(*) from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_partition_tf_*.csv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', '{table_format}') WHERE _file='test_partition_tf_3.csv'" assert azure_query(node, query) == "1\n" + + +def test_read_subcolumns(cluster): + node = cluster.instances["node"] + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.tsv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)", + ) + + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)", + ) + + res = node.query( + f"select a.b.d, _path, a.b, _file, a.e from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.tsv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "2\tcont/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" + + res = node.query( + f"select a.b.d, _path, a.b, _file, a.e from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "2\tcont/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" + + res = node.query( + f"select x.b.d, _path, x.b, _file, x.e from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "0\tcont/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" + + res = node.query( + f"select x.b.d, _path, x.b, _file, x.e from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" + ) + + assert res == "42\tcont/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" + + +def test_read_from_not_existing_container(cluster): + node = cluster.instances["node"] + query = f"select * from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont_not_exists', 'test_table.csv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto')" + expected_err_msg = "container does not exist" + assert expected_err_msg in azure_query(node, query, expect_error="true") + + +def test_function_signatures(cluster): + node = cluster.instances["node"] + connection_string = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1;" + storage_account_url = "http://azurite1:10000/devstoreaccount1" + account_name = "devstoreaccount1" + account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV', 'auto', 'column1 UInt32') VALUES (1),(2),(3)", + ) + + # " - connection_string, container_name, blobpath\n" + query_1 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv')" + assert azure_query(node, query_1) == "1\n2\n3\n" + + # " - connection_string, container_name, blobpath, structure \n" + query_2 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv', 'column1 UInt32')" + assert azure_query(node, query_2) == "1\n2\n3\n" + + # " - connection_string, container_name, blobpath, format \n" + query_3 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv', 'CSV')" + assert azure_query(node, query_3) == "1\n2\n3\n" + + # " - connection_string, container_name, blobpath, format, compression \n" + query_4 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv', 'CSV', 'auto')" + assert azure_query(node, query_4) == "1\n2\n3\n" + + # " - connection_string, container_name, blobpath, format, compression, structure \n" + query_5 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv', 'CSV', 'auto', 'column1 UInt32')" + assert azure_query(node, query_5) == "1\n2\n3\n" + + # " - storage_account_url, container_name, blobpath, account_name, account_key\n" + query_6 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}')" + assert azure_query(node, query_6) == "1\n2\n3\n" + + # " - storage_account_url, container_name, blobpath, account_name, account_key, structure\n" + query_7 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'column1 UInt32')" + assert azure_query(node, query_7) == "1\n2\n3\n" + + # " - storage_account_url, container_name, blobpath, account_name, account_key, format\n" + query_8 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV')" + assert azure_query(node, query_8) == "1\n2\n3\n" + + # " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression\n" + query_9 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV', 'auto')" + assert azure_query(node, query_9) == "1\n2\n3\n" + + # " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure\n" + query_10 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV', 'auto', 'column1 UInt32')" + assert azure_query(node, query_10) == "1\n2\n3\n" diff --git a/tests/integration/test_storage_azure_blob_storage/test_cluster.py b/tests/integration/test_storage_azure_blob_storage/test_cluster.py new file mode 100644 index 00000000000..454f1cdf294 --- /dev/null +++ b/tests/integration/test_storage_azure_blob_storage/test_cluster.py @@ -0,0 +1,280 @@ +#!/usr/bin/env python3 + +import gzip +import json +import logging +import os +import io +import random +import threading +import time + +from azure.storage.blob import BlobServiceClient +import helpers.client +import pytest +from helpers.cluster import ClickHouseCluster, ClickHouseInstance +from helpers.test_tools import TSV +from helpers.network import PartitionManager +from helpers.mock_servers import start_mock_servers +from helpers.test_tools import exec_query_with_retry +from test_storage_azure_blob_storage.test import azure_query + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "node_0", + main_configs=["configs/named_collections.xml", "configs/cluster.xml"], + user_configs=["configs/disable_profilers.xml", "configs/users.xml"], + with_azurite=True, + ) + cluster.add_instance( + "node_1", + main_configs=["configs/named_collections.xml", "configs/cluster.xml"], + user_configs=["configs/disable_profilers.xml", "configs/users.xml"], + with_azurite=True, + ) + cluster.add_instance( + "node_2", + main_configs=["configs/named_collections.xml", "configs/cluster.xml"], + user_configs=["configs/disable_profilers.xml", "configs/users.xml"], + with_azurite=True, + ) + cluster.start() + + yield cluster + finally: + cluster.shutdown() + + +def get_azure_file_content(filename): + container_name = "cont" + connection_string = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;" + blob_service_client = BlobServiceClient.from_connection_string(connection_string) + container_client = blob_service_client.get_container_client(container_name) + blob_client = container_client.get_blob_client(filename) + download_stream = blob_client.download_blob() + return download_stream.readall().decode("utf-8") + + +def test_select_all(cluster): + node = cluster.instances["node_0"] + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage(" + "'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_select_all.csv', 'devstoreaccount1', " + "'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', " + "'auto', 'key UInt64, data String') VALUES (1, 'a'), (2, 'b')", + ) + print(get_azure_file_content("test_cluster_select_all.csv")) + + pure_azure = azure_query( + node, + """ + SELECT * from azureBlobStorage( + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_select_all.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', + 'auto')""", + ) + print(pure_azure) + distributed_azure = azure_query( + node, + """ + SELECT * from azureBlobStorageCluster( + 'simple_cluster', 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_select_all.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', + 'auto')""", + ) + print(distributed_azure) + assert TSV(pure_azure) == TSV(distributed_azure) + + +def test_count(cluster): + node = cluster.instances["node_0"] + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage(" + "'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_count.csv', 'devstoreaccount1', " + "'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', " + "'auto', 'key UInt64') VALUES (1), (2)", + ) + print(get_azure_file_content("test_cluster_count.csv")) + + pure_azure = azure_query( + node, + """ + SELECT count(*) from azureBlobStorage( + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_count.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', + 'auto', 'key UInt64')""", + ) + print(pure_azure) + distributed_azure = azure_query( + node, + """ + SELECT count(*) from azureBlobStorageCluster( + 'simple_cluster', 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_count.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', + 'auto', 'key UInt64')""", + ) + print(distributed_azure) + assert TSV(pure_azure) == TSV(distributed_azure) + + +def test_union_all(cluster): + node = cluster.instances["node_0"] + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage(" + "'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1', " + "'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet', " + "'auto', 'a Int32, b String') VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd')", + ) + + pure_azure = azure_query( + node, + """ + SELECT * FROM + ( + SELECT * from azureBlobStorage( + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet', + 'auto', 'a Int32, b String') + UNION ALL + SELECT * from azureBlobStorage( + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet', + 'auto', 'a Int32, b String') + ) + ORDER BY (a) + """, + ) + azure_distributed = azure_query( + node, + """ + SELECT * FROM + ( + SELECT * from azureBlobStorageCluster( + 'simple_cluster', + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet', + 'auto', 'a Int32, b String') + UNION ALL + SELECT * from azureBlobStorageCluster( + 'simple_cluster', + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet', + 'auto', 'a Int32, b String') + ) + ORDER BY (a) + """, + ) + + assert TSV(pure_azure) == TSV(azure_distributed) + + +def test_skip_unavailable_shards(cluster): + node = cluster.instances["node_0"] + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage(" + "'http://azurite1:10000/devstoreaccount1', 'cont', 'test_skip_unavailable.csv', 'devstoreaccount1', " + "'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', " + "'auto', 'a UInt64') VALUES (1), (2)", + ) + result = azure_query( + node, + """ + SELECT count(*) from azureBlobStorageCluster( + 'cluster_non_existent_port', + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_skip_unavailable.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==') + SETTINGS skip_unavailable_shards = 1 + """, + ) + + assert result == "2\n" + + +def test_unset_skip_unavailable_shards(cluster): + # Although skip_unavailable_shards is not set, cluster table functions should always skip unavailable shards. + node = cluster.instances["node_0"] + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage(" + "'http://azurite1:10000/devstoreaccount1', 'cont', 'test_unset_skip_unavailable.csv', 'devstoreaccount1', " + "'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', " + "'auto', 'a UInt64') VALUES (1), (2)", + ) + result = azure_query( + node, + """ + SELECT count(*) from azureBlobStorageCluster( + 'cluster_non_existent_port', + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_skip_unavailable.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==') + """, + ) + + assert result == "2\n" + + +def test_cluster_with_named_collection(cluster): + node = cluster.instances["node_0"] + + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage(" + "'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_with_named_collection.csv', 'devstoreaccount1', " + "'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', " + "'auto', 'a UInt64') VALUES (1), (2)", + ) + + pure_azure = azure_query( + node, + """ + SELECT * from azureBlobStorage( + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_with_named_collection.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==') + """, + ) + + azure_cluster = azure_query( + node, + """ + SELECT * from azureBlobStorageCluster( + 'simple_cluster', azure_conf2, container='cont', blob_path='test_cluster_with_named_collection.csv') + """, + ) + + assert TSV(pure_azure) == TSV(azure_cluster) + + +def test_partition_parallel_readig_withcluster(cluster): + node = cluster.instances["node_0"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + partition_by = "column3" + values = "(1, 2, 3), (3, 2, 1), (78, 43, 45)" + filename = "test_tf_{_partition_id}.csv" + + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', '{filename}', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', '{table_format}') PARTITION BY {partition_by} VALUES {values}", + ) + + assert "1,2,3\n" == get_azure_file_content("test_tf_3.csv") + assert "3,2,1\n" == get_azure_file_content("test_tf_1.csv") + assert "78,43,45\n" == get_azure_file_content("test_tf_45.csv") + + azure_cluster = azure_query( + node, + """ + SELECT count(*) from azureBlobStorageCluster( + 'simple_cluster', + azure_conf2, container='cont', blob_path='test_tf_*.csv', format='CSV', compression='auto', structure='column1 UInt32, column2 UInt32, column3 UInt32') + """, + ) + + assert azure_cluster == "3\n" diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 8ff88791a3a..0824a7b5f0f 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -892,6 +892,54 @@ def test_skip_empty_files(started_cluster): assert int(res) == 0 +def test_read_subcolumns(started_cluster): + node = started_cluster.instances["node1"] + + node.query( + f"insert into function hdfs('hdfs://hdfs1:9000/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)" + ) + + node.query( + f"insert into function hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)" + ) + + res = node.query( + f"select a.b.d, _path, a.b, _file, a.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert ( + res + == "2\thdfs://hdfs1:9000/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" + ) + + res = node.query( + f"select a.b.d, _path, a.b, _file, a.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert ( + res + == "2\thdfs://hdfs1:9000/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" + ) + + res = node.query( + f"select x.b.d, _path, x.b, _file, x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert ( + res + == "0\thdfs://hdfs1:9000/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" + ) + + res = node.query( + f"select x.b.d, _path, x.b, _file, x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" + ) + + assert ( + res + == "42\thdfs://hdfs1:9000/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" + ) + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_kafka/configs/kafka.xml b/tests/integration/test_storage_kafka/configs/kafka.xml index 062c98a2ac7..c6075aff715 100644 --- a/tests/integration/test_storage_kafka/configs/kafka.xml +++ b/tests/integration/test_storage_kafka/configs/kafka.xml @@ -10,6 +10,10 @@ --> cgrp,consumer,topic,protocol + + + 600 + consumer_hang diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index f196837751b..36815badd53 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -444,6 +444,34 @@ def test_kafka_settings_new_syntax(kafka_cluster): assert members[0]["client_id"] == "instance test 1234" +def test_kafka_settings_predefined_macros(kafka_cluster): + instance.query( + """ + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = '{kafka_broker}:19092', + kafka_topic_list = '{database}_{table}_topic', + kafka_group_name = '{database}_{table}_group', + kafka_format = '{kafka_format_json_each_row}', + kafka_row_delimiter = '\\n', + kafka_commit_on_select = 1, + kafka_client_id = '{database}_{table} test 1234', + kafka_skip_broken_messages = 1; + """ + ) + + messages = [] + for i in range(50): + messages.append(json.dumps({"key": i, "value": i})) + kafka_produce(kafka_cluster, "test_kafka_topic", messages) + + result = instance.query("SELECT * FROM test.kafka", ignore_error=True) + kafka_check_result(result, True) + + members = describe_consumer_group(kafka_cluster, "test_kafka_group") + assert members[0]["client_id"] == "test_kafka test 1234" + + def test_kafka_json_as_string(kafka_cluster): kafka_produce( kafka_cluster, @@ -1158,6 +1186,7 @@ def test_kafka_consumer_hang2(kafka_cluster): instance.query( """ DROP TABLE IF EXISTS test.kafka; + DROP TABLE IF EXISTS test.kafka2; CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka @@ -4517,6 +4546,294 @@ def test_block_based_formats_2(kafka_cluster): kafka_delete_topic(admin_client, format_name) +def test_system_kafka_consumers(kafka_cluster): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + topic = "system_kafka_cons" + kafka_create_topic(admin_client, topic) + + # Check that format_csv_delimiter parameter works now - as part of all available format settings. + kafka_produce( + kafka_cluster, + topic, + ["1|foo", "2|bar", "42|answer", "100|multi\n101|row\n103|message"], + ) + + instance.query( + f""" + DROP TABLE IF EXISTS test.kafka; + + CREATE TABLE test.kafka (a UInt64, b String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_group_name = '{topic}', + kafka_commit_on_select = 1, + kafka_format = 'CSV', + kafka_row_delimiter = '\\n', + format_csv_delimiter = '|'; + """ + ) + + result = instance.query("SELECT * FROM test.kafka ORDER BY a;") + + result_system_kafka_consumers = instance.query( + """ + create or replace function stable_timestamp as + (d)->multiIf(d==toDateTime('1970-01-01 00:00:00'), 'never', abs(dateDiff('second', d, now())) < 30, 'now', toString(d)); + + SELECT database, table, length(consumer_id), assignments.topic, assignments.partition_id, + assignments.current_offset, + if(length(exceptions.time)>0, exceptions.time[1]::String, 'never') as last_exception_time_, + if(length(exceptions.text)>0, exceptions.text[1], 'no exception') as last_exception_, + stable_timestamp(last_poll_time) as last_poll_time_, num_messages_read, stable_timestamp(last_commit_time) as last_commit_time_, + num_commits, stable_timestamp(last_rebalance_time) as last_rebalance_time_, + num_rebalance_revocations, num_rebalance_assignments, is_currently_used + FROM system.kafka_consumers WHERE database='test' and table='kafka' format Vertical; + """ + ) + logging.debug(f"result_system_kafka_consumers: {result_system_kafka_consumers}") + assert ( + result_system_kafka_consumers + == """Row 1: +────── +database: test +table: kafka +length(consumer_id): 67 +assignments.topic: ['system_kafka_cons'] +assignments.partition_id: [0] +assignments.current_offset: [4] +last_exception_time_: never +last_exception_: no exception +last_poll_time_: now +num_messages_read: 4 +last_commit_time_: now +num_commits: 1 +last_rebalance_time_: never +num_rebalance_revocations: 0 +num_rebalance_assignments: 1 +is_currently_used: 0 +""" + ) + + instance.query("DROP TABLE test.kafka") + kafka_delete_topic(admin_client, topic) + + +def test_system_kafka_consumers_rebalance(kafka_cluster, max_retries=15): + # based on test_kafka_consumer_hang2 + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + producer = KafkaProducer( + bootstrap_servers="localhost:{}".format(cluster.kafka_port), + value_serializer=producer_serializer, + key_serializer=producer_serializer, + ) + + topic = "system_kafka_cons2" + kafka_create_topic(admin_client, topic, num_partitions=2) + + instance.query( + f""" + DROP TABLE IF EXISTS test.kafka; + DROP TABLE IF EXISTS test.kafka2; + + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_group_name = '{topic}', + kafka_commit_on_select = 1, + kafka_format = 'JSONEachRow'; + + CREATE TABLE test.kafka2 (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_commit_on_select = 1, + kafka_group_name = '{topic}', + kafka_format = 'JSONEachRow'; + """ + ) + + producer.send(topic=topic, value=json.dumps({"key": 1, "value": 1}), partition=0) + producer.send(topic=topic, value=json.dumps({"key": 11, "value": 11}), partition=1) + time.sleep(3) + + # first consumer subscribe the topic, try to poll some data, and go to rest + instance.query("SELECT * FROM test.kafka") + + # second consumer do the same leading to rebalance in the first + # consumer, try to poll some data + instance.query("SELECT * FROM test.kafka2") + + producer.send(topic=topic, value=json.dumps({"key": 1, "value": 1}), partition=0) + producer.send(topic=topic, value=json.dumps({"key": 10, "value": 10}), partition=1) + time.sleep(3) + + instance.query("SELECT * FROM test.kafka") + instance.query("SELECT * FROM test.kafka2") + instance.query("SELECT * FROM test.kafka") + instance.query("SELECT * FROM test.kafka2") + + result_system_kafka_consumers = instance.query( + """ + create or replace function stable_timestamp as + (d)->multiIf(d==toDateTime('1970-01-01 00:00:00'), 'never', abs(dateDiff('second', d, now())) < 30, 'now', toString(d)); + SELECT database, table, length(consumer_id), assignments.topic, assignments.partition_id, + assignments.current_offset, + if(length(exceptions.time)>0, exceptions.time[1]::String, 'never') as last_exception_time_, + if(length(exceptions.text)>0, exceptions.text[1], 'no exception') as last_exception_, + stable_timestamp(last_poll_time) as last_poll_time_, num_messages_read, stable_timestamp(last_commit_time) as last_commit_time_, + num_commits, stable_timestamp(last_rebalance_time) as last_rebalance_time_, + num_rebalance_revocations, num_rebalance_assignments, is_currently_used + FROM system.kafka_consumers WHERE database='test' and table IN ('kafka', 'kafka2') format Vertical; + """ + ) + logging.debug(f"result_system_kafka_consumers: {result_system_kafka_consumers}") + assert ( + result_system_kafka_consumers + == """Row 1: +────── +database: test +table: kafka +length(consumer_id): 67 +assignments.topic: ['system_kafka_cons2'] +assignments.partition_id: [0] +assignments.current_offset: [2] +last_exception_time_: never +last_exception_: no exception +last_poll_time_: now +num_messages_read: 4 +last_commit_time_: now +num_commits: 2 +last_rebalance_time_: now +num_rebalance_revocations: 1 +num_rebalance_assignments: 2 +is_currently_used: 0 + +Row 2: +────── +database: test +table: kafka2 +length(consumer_id): 68 +assignments.topic: ['system_kafka_cons2'] +assignments.partition_id: [1] +assignments.current_offset: [2] +last_exception_time_: never +last_exception_: no exception +last_poll_time_: now +num_messages_read: 1 +last_commit_time_: now +num_commits: 1 +last_rebalance_time_: never +num_rebalance_revocations: 0 +num_rebalance_assignments: 1 +is_currently_used: 0 +""" + ) + + instance.query("DROP TABLE test.kafka") + instance.query("DROP TABLE test.kafka2") + + kafka_delete_topic(admin_client, topic) + + +def test_system_kafka_consumers_rebalance_mv(kafka_cluster, max_retries=15): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + producer = KafkaProducer( + bootstrap_servers="localhost:{}".format(cluster.kafka_port), + value_serializer=producer_serializer, + key_serializer=producer_serializer, + ) + + topic = "system_kafka_cons_mv" + kafka_create_topic(admin_client, topic, num_partitions=2) + + instance.query( + f""" + DROP TABLE IF EXISTS test.kafka; + DROP TABLE IF EXISTS test.kafka2; + DROP TABLE IF EXISTS test.kafka_persistent; + DROP TABLE IF EXISTS test.kafka_persistent2; + + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_group_name = '{topic}', + kafka_commit_on_select = 1, + kafka_format = 'JSONEachRow'; + + CREATE TABLE test.kafka2 (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_commit_on_select = 1, + kafka_group_name = '{topic}', + kafka_format = 'JSONEachRow'; + + CREATE TABLE test.kafka_persistent (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE TABLE test.kafka_persistent2 (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + + CREATE MATERIALIZED VIEW test.persistent_kafka_mv TO test.kafka_persistent AS + SELECT key, value + FROM test.kafka; + + CREATE MATERIALIZED VIEW test.persistent_kafka_mv2 TO test.kafka_persistent2 AS + SELECT key, value + FROM test.kafka2; + """ + ) + + producer.send(topic=topic, value=json.dumps({"key": 1, "value": 1}), partition=0) + producer.send(topic=topic, value=json.dumps({"key": 11, "value": 11}), partition=1) + time.sleep(3) + + retries = 0 + result_rdkafka_stat = "" + while True: + result_rdkafka_stat = instance.query( + """ + SELECT table, JSONExtractString(rdkafka_stat, 'type') + FROM system.kafka_consumers WHERE database='test' and table = 'kafka' format Vertical; + """ + ) + if result_rdkafka_stat.find("consumer") or retries > max_retries: + break + retries += 1 + time.sleep(1) + + assert ( + result_rdkafka_stat + == """Row 1: +────── +table: kafka +JSONExtractString(rdkafka_stat, 'type'): consumer +""" + ) + + instance.query("DROP TABLE test.kafka") + instance.query("DROP TABLE test.kafka2") + instance.query("DROP TABLE test.kafka_persistent") + instance.query("DROP TABLE test.kafka_persistent2") + instance.query("DROP TABLE test.persistent_kafka_mv") + instance.query("DROP TABLE test.persistent_kafka_mv2") + + kafka_delete_topic(admin_client, topic) + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_nats/clickhouse_path/format_schemas/nats.proto b/tests/integration/test_storage_nats/clickhouse_path/format_schemas/nats.proto deleted file mode 100755 index 090ed917cdd..00000000000 --- a/tests/integration/test_storage_nats/clickhouse_path/format_schemas/nats.proto +++ /dev/null @@ -1,6 +0,0 @@ -syntax = "proto3"; - - message ProtoKeyValue { - uint64 key = 1; - string value = 2; - } diff --git a/tests/integration/test_storage_nats/configs/macros.xml b/tests/integration/test_storage_nats/configs/macros.xml deleted file mode 100644 index 4aa547e049e..00000000000 --- a/tests/integration/test_storage_nats/configs/macros.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - nats1:4444 - macro - JSONEachRow - - diff --git a/tests/integration/test_storage_nats/configs/named_collection.xml b/tests/integration/test_storage_nats/configs/named_collection.xml deleted file mode 100644 index 15817f321f0..00000000000 --- a/tests/integration/test_storage_nats/configs/named_collection.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - nats1:4444 - named - JSONEachRow - 111 - 12 - click - house - - - diff --git a/tests/integration/test_storage_nats/configs/nats.xml b/tests/integration/test_storage_nats/configs/nats.xml deleted file mode 100644 index 0a8be9122ad..00000000000 --- a/tests/integration/test_storage_nats/configs/nats.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - click - house - - diff --git a/tests/integration/test_storage_nats/nats_certs.sh b/tests/integration/test_storage_nats/nats_certs.sh deleted file mode 100755 index 689221c39e4..00000000000 --- a/tests/integration/test_storage_nats/nats_certs.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -set -euxo pipefail - -mkdir -p "${NATS_CERT_DIR}/ca" -mkdir -p "${NATS_CERT_DIR}/nats" -openssl req -newkey rsa:4096 -x509 -days 365 -nodes -batch -keyout "${NATS_CERT_DIR}/ca/ca-key.pem" -out "${NATS_CERT_DIR}/ca/ca-cert.pem" -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=ca" -openssl req -newkey rsa:4096 -nodes -batch -keyout "${NATS_CERT_DIR}/nats/server-key.pem" -out "${NATS_CERT_DIR}/nats/server-req.pem" -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=server" -openssl x509 -req -days 365 -in "${NATS_CERT_DIR}/nats/server-req.pem" -CA "${NATS_CERT_DIR}/ca/ca-cert.pem" -CAkey "${NATS_CERT_DIR}/ca/ca-key.pem" -CAcreateserial -out "${NATS_CERT_DIR}/nats/server-cert.pem" -extfile <( -cat <<-EOF -subjectAltName = DNS:localhost, DNS:nats1 -EOF -) -rm -f "${NATS_CERT_DIR}/nats/server-req.pem" diff --git a/tests/integration/test_storage_nats/nats_pb2.py b/tests/integration/test_storage_nats/nats_pb2.py deleted file mode 100644 index e9e5cb72363..00000000000 --- a/tests/integration/test_storage_nats/nats_pb2.py +++ /dev/null @@ -1,37 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: clickhouse_path/format_schemas/nats.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n)clickhouse_path/format_schemas/nats.proto"+\n\rProtoKeyValue\x12\x0b\n\x03key\x18\x01 \x01(\x04\x12\r\n\x05value\x18\x02 \x01(\tb\x06proto3' -) - - -_PROTOKEYVALUE = DESCRIPTOR.message_types_by_name["ProtoKeyValue"] -ProtoKeyValue = _reflection.GeneratedProtocolMessageType( - "ProtoKeyValue", - (_message.Message,), - { - "DESCRIPTOR": _PROTOKEYVALUE, - "__module__": "clickhouse_path.format_schemas.nats_pb2" - # @@protoc_insertion_point(class_scope:ProtoKeyValue) - }, -) -_sym_db.RegisterMessage(ProtoKeyValue) - -if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None - _PROTOKEYVALUE._serialized_start = 45 - _PROTOKEYVALUE._serialized_end = 88 -# @@protoc_insertion_point(module_scope) diff --git a/tests/integration/test_storage_nats/test.py b/tests/integration/test_storage_nats/test.py deleted file mode 100644 index 4d7e4cf813d..00000000000 --- a/tests/integration/test_storage_nats/test.py +++ /dev/null @@ -1,1875 +0,0 @@ -import pytest - -# FIXME This test is too flaky -# https://github.com/ClickHouse/ClickHouse/issues/39185 - -pytestmark = pytest.mark.skip - -import json -import os.path as p -import random -import subprocess -import threading -import logging -import time -from random import randrange -import math - -import asyncio -from google.protobuf.internal.encoder import _VarintBytes -from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster, check_nats_is_available, nats_connect_ssl -from helpers.test_tools import TSV - -from . import nats_pb2 - -cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance( - "instance", - main_configs=[ - "configs/nats.xml", - "configs/macros.xml", - "configs/named_collection.xml", - ], - user_configs=["configs/users.xml"], - with_nats=True, - clickhouse_path_dir="clickhouse_path", -) - - -# Helpers - - -def wait_nats_to_start(nats_port, ssl_ctx=None, timeout=180): - start = time.time() - while time.time() - start < timeout: - try: - if asyncio.run(check_nats_is_available(nats_port, ssl_ctx=ssl_ctx)): - logging.debug("NATS is available") - return - time.sleep(0.5) - except Exception as ex: - logging.debug("Can't connect to NATS " + str(ex)) - time.sleep(0.5) - - -def nats_check_result(result, check=False, ref_file="test_nats_json.reference"): - fpath = p.join(p.dirname(__file__), ref_file) - with open(fpath) as reference: - if check: - assert TSV(result) == TSV(reference) - else: - return TSV(result) == TSV(reference) - - -def kill_nats(nats_id): - p = subprocess.Popen(("docker", "stop", nats_id), stdout=subprocess.PIPE) - p.communicate() - return p.returncode == 0 - - -def revive_nats(nats_id, nats_port): - p = subprocess.Popen(("docker", "start", nats_id), stdout=subprocess.PIPE) - p.communicate() - wait_nats_to_start(nats_port) - - -# Fixtures - - -@pytest.fixture(scope="module") -def nats_cluster(): - try: - cluster.start() - logging.debug("nats_id is {}".format(instance.cluster.nats_docker_id)) - instance.query("CREATE DATABASE test") - - yield cluster - - finally: - cluster.shutdown() - - -@pytest.fixture(autouse=True) -def nats_setup_teardown(): - print("NATS is available - running test") - yield # run test - instance.query("DROP DATABASE test SYNC") - instance.query("CREATE DATABASE test") - - -# Tests - - -async def nats_produce_messages(cluster_inst, subject, messages=(), bytes=None): - nc = await nats_connect_ssl( - cluster_inst.nats_port, - user="click", - password="house", - ssl_ctx=cluster_inst.nats_ssl_context, - ) - logging.debug("NATS connection status: " + str(nc.is_connected)) - - for message in messages: - await nc.publish(subject, message.encode()) - if bytes is not None: - await nc.publish(subject, bytes) - logging.debug("Finished publising to " + subject) - - await nc.close() - return messages - - -def check_table_is_ready(instance, table_name): - try: - instance.query("SELECT * FROM {}".format(table_name)) - return True - except Exception: - return False - - -def test_nats_select(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'select', - nats_format = 'JSONEachRow', - nats_row_delimiter = '\\n'; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - messages = [] - for i in range(50): - messages.append(json.dumps({"key": i, "value": i})) - asyncio.run(nats_produce_messages(nats_cluster, "select", messages)) - - # The order of messages in select * from test.nats is not guaranteed, so sleep to collect everything in one select - time.sleep(1) - - result = "" - while True: - result += instance.query( - "SELECT * FROM test.nats ORDER BY key", ignore_error=True - ) - if nats_check_result(result): - break - - nats_check_result(result, True) - - -def test_nats_select_empty(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'empty', - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - """ - ) - - assert int(instance.query("SELECT count() FROM test.nats")) == 0 - - -def test_nats_json_without_delimiter(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'json', - nats_format = 'JSONEachRow'; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - messages = "" - for i in range(25): - messages += json.dumps({"key": i, "value": i}) + "\n" - - all_messages = [messages] - asyncio.run(nats_produce_messages(nats_cluster, "json", all_messages)) - - messages = "" - for i in range(25, 50): - messages += json.dumps({"key": i, "value": i}) + "\n" - all_messages = [messages] - asyncio.run(nats_produce_messages(nats_cluster, "json", all_messages)) - - time.sleep(1) - - result = "" - time_limit_sec = 60 - deadline = time.monotonic() + time_limit_sec - - while time.monotonic() < deadline: - result += instance.query( - "SELECT * FROM test.nats ORDER BY key", ignore_error=True - ) - if nats_check_result(result): - break - - nats_check_result(result, True) - - -def test_nats_csv_with_delimiter(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'csv', - nats_format = 'CSV', - nats_row_delimiter = '\\n'; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - messages = [] - for i in range(50): - messages.append("{i}, {i}".format(i=i)) - - asyncio.run(nats_produce_messages(nats_cluster, "csv", messages)) - - time.sleep(1) - - result = "" - for _ in range(60): - result += instance.query( - "SELECT * FROM test.nats ORDER BY key", ignore_error=True - ) - if nats_check_result(result): - break - - nats_check_result(result, True) - - -def test_nats_tsv_with_delimiter(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'tsv', - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.nats; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - messages = [] - for i in range(50): - messages.append("{i}\t{i}".format(i=i)) - - asyncio.run(nats_produce_messages(nats_cluster, "tsv", messages)) - - result = "" - for _ in range(60): - result = instance.query("SELECT * FROM test.view ORDER BY key") - if nats_check_result(result): - break - - nats_check_result(result, True) - - -# - - -def test_nats_macros(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = '{nats_url}', - nats_subjects = '{nats_subjects}', - nats_format = '{nats_format}' - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - message = "" - for i in range(50): - message += json.dumps({"key": i, "value": i}) + "\n" - asyncio.run(nats_produce_messages(nats_cluster, "macro", [message])) - - time.sleep(1) - - result = "" - for _ in range(60): - result += instance.query( - "SELECT * FROM test.nats ORDER BY key", ignore_error=True - ) - if nats_check_result(result): - break - - nats_check_result(result, True) - - -def test_nats_materialized_view(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'mv', - nats_format = 'JSONEachRow', - nats_row_delimiter = '\\n'; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.nats; - - CREATE TABLE test.view2 (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer2 TO test.view2 AS - SELECT * FROM test.nats group by (key, value); - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - messages = [] - for i in range(50): - messages.append(json.dumps({"key": i, "value": i})) - - asyncio.run(nats_produce_messages(nats_cluster, "mv", messages)) - - time_limit_sec = 60 - deadline = time.monotonic() + time_limit_sec - - while time.monotonic() < deadline: - result = instance.query("SELECT * FROM test.view ORDER BY key") - if nats_check_result(result): - break - - nats_check_result(result, True) - - deadline = time.monotonic() + time_limit_sec - - while time.monotonic() < deadline: - result = instance.query("SELECT * FROM test.view2 ORDER BY key") - if nats_check_result(result): - break - - nats_check_result(result, True) - - -def test_nats_materialized_view_with_subquery(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'mvsq', - nats_format = 'JSONEachRow', - nats_row_delimiter = '\\n'; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM (SELECT * FROM test.nats); - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - messages = [] - for i in range(50): - messages.append(json.dumps({"key": i, "value": i})) - asyncio.run(nats_produce_messages(nats_cluster, "mvsq", messages)) - - time_limit_sec = 60 - deadline = time.monotonic() + time_limit_sec - - while time.monotonic() < deadline: - result = instance.query("SELECT * FROM test.view ORDER BY key") - if nats_check_result(result): - break - - nats_check_result(result, True) - - -def test_nats_many_materialized_views(nats_cluster): - instance.query( - """ - DROP TABLE IF EXISTS test.view1; - DROP TABLE IF EXISTS test.view2; - DROP TABLE IF EXISTS test.consumer1; - DROP TABLE IF EXISTS test.consumer2; - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'mmv', - nats_format = 'JSONEachRow', - nats_row_delimiter = '\\n'; - CREATE TABLE test.view1 (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE TABLE test.view2 (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer1 TO test.view1 AS - SELECT * FROM test.nats; - CREATE MATERIALIZED VIEW test.consumer2 TO test.view2 AS - SELECT * FROM test.nats; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - messages = [] - for i in range(50): - messages.append(json.dumps({"key": i, "value": i})) - asyncio.run(nats_produce_messages(nats_cluster, "mmv", messages)) - - time_limit_sec = 60 - deadline = time.monotonic() + time_limit_sec - - while time.monotonic() < deadline: - result1 = instance.query("SELECT * FROM test.view1 ORDER BY key") - result2 = instance.query("SELECT * FROM test.view2 ORDER BY key") - if nats_check_result(result1) and nats_check_result(result2): - break - - instance.query( - """ - DROP TABLE test.consumer1; - DROP TABLE test.consumer2; - DROP TABLE test.view1; - DROP TABLE test.view2; - """ - ) - - nats_check_result(result1, True) - nats_check_result(result2, True) - - -def test_nats_protobuf(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value String) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'pb', - nats_format = 'Protobuf', - nats_schema = 'nats.proto:ProtoKeyValue'; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.nats; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - data = b"" - for i in range(0, 20): - msg = nats_pb2.ProtoKeyValue() - msg.key = i - msg.value = str(i) - serialized_msg = msg.SerializeToString() - data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - asyncio.run(nats_produce_messages(nats_cluster, "pb", bytes=data)) - data = b"" - for i in range(20, 21): - msg = nats_pb2.ProtoKeyValue() - msg.key = i - msg.value = str(i) - serialized_msg = msg.SerializeToString() - data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - asyncio.run(nats_produce_messages(nats_cluster, "pb", bytes=data)) - data = b"" - for i in range(21, 50): - msg = nats_pb2.ProtoKeyValue() - msg.key = i - msg.value = str(i) - serialized_msg = msg.SerializeToString() - data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - asyncio.run(nats_produce_messages(nats_cluster, "pb", bytes=data)) - - result = "" - time_limit_sec = 60 - deadline = time.monotonic() + time_limit_sec - - while time.monotonic() < deadline: - result = instance.query("SELECT * FROM test.view ORDER BY key") - if nats_check_result(result): - break - - nats_check_result(result, True) - - -def test_nats_big_message(nats_cluster): - # Create batchs of messages of size ~100Kb - nats_messages = 1000 - batch_messages = 1000 - messages = [ - json.dumps({"key": i, "value": "x" * 100}) * batch_messages - for i in range(nats_messages) - ] - - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value String) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'big', - nats_format = 'JSONEachRow'; - CREATE TABLE test.view (key UInt64, value String) - ENGINE = MergeTree - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.nats; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - asyncio.run(nats_produce_messages(nats_cluster, "big", messages)) - - while True: - result = instance.query("SELECT count() FROM test.view") - if int(result) == batch_messages * nats_messages: - break - - assert ( - int(result) == nats_messages * batch_messages - ), "ClickHouse lost some messages: {}".format(result) - - -def test_nats_mv_combo(nats_cluster): - NUM_MV = 5 - NUM_CONSUMERS = 4 - - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'combo', - nats_num_consumers = {}, - nats_format = 'JSONEachRow', - nats_row_delimiter = '\\n'; - """.format( - NUM_CONSUMERS - ) - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - for mv_id in range(NUM_MV): - instance.query( - """ - DROP TABLE IF EXISTS test.combo_{0}; - DROP TABLE IF EXISTS test.combo_{0}_mv; - CREATE TABLE test.combo_{0} (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.combo_{0}_mv TO test.combo_{0} AS - SELECT * FROM test.nats; - """.format( - mv_id - ) - ) - - time.sleep(2) - - i = [0] - messages_num = 10000 - - def produce(): - messages = [] - for _ in range(messages_num): - messages.append(json.dumps({"key": i[0], "value": i[0]})) - i[0] += 1 - asyncio.run(nats_produce_messages(nats_cluster, "combo", messages)) - - threads = [] - threads_num = 20 - - for _ in range(threads_num): - threads.append(threading.Thread(target=produce)) - for thread in threads: - time.sleep(random.uniform(0, 1)) - thread.start() - - while True: - result = 0 - for mv_id in range(NUM_MV): - result += int( - instance.query("SELECT count() FROM test.combo_{0}".format(mv_id)) - ) - if int(result) == messages_num * threads_num * NUM_MV: - break - time.sleep(1) - - for thread in threads: - thread.join() - - for mv_id in range(NUM_MV): - instance.query( - """ - DROP TABLE test.combo_{0}_mv; - DROP TABLE test.combo_{0}; - """.format( - mv_id - ) - ) - - assert ( - int(result) == messages_num * threads_num * NUM_MV - ), "ClickHouse lost some messages: {}".format(result) - - -def test_nats_insert(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'insert', - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - values = [] - for i in range(50): - values.append("({i}, {i})".format(i=i)) - values = ",".join(values) - - insert_messages = [] - - async def sub_to_nats(): - nc = await nats_connect_ssl( - nats_cluster.nats_port, - user="click", - password="house", - ssl_ctx=nats_cluster.nats_ssl_context, - ) - sub = await nc.subscribe("insert") - await sub.unsubscribe(50) - async for msg in sub.messages: - insert_messages.append(msg.data.decode()) - - await sub.drain() - await nc.drain() - - def run_sub(): - asyncio.run(sub_to_nats()) - - thread = threading.Thread(target=run_sub) - thread.start() - time.sleep(1) - - while True: - try: - instance.query("INSERT INTO test.nats VALUES {}".format(values)) - break - except QueryRuntimeException as e: - if "Local: Timed out." in str(e): - continue - else: - raise - thread.join() - - result = "\n".join(insert_messages) - nats_check_result(result, True) - - -def test_nats_many_subjects_insert_wrong(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'insert1,insert2.>,insert3.*.foo', - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - values = [] - for i in range(50): - values.append("({i}, {i})".format(i=i)) - values = ",".join(values) - - # no subject specified - instance.query_and_get_error("INSERT INTO test.nats VALUES {}".format(values)) - - # can't insert into wildcard subjects - instance.query_and_get_error( - "INSERT INTO test.nats SETTINGS stream_like_engine_insert_queue='insert2.>' VALUES {}".format( - values - ) - ) - instance.query_and_get_error( - "INSERT INTO test.nats SETTINGS stream_like_engine_insert_queue='insert3.*.foo' VALUES {}".format( - values - ) - ) - - # specified subject is not among engine's subjects - instance.query_and_get_error( - "INSERT INTO test.nats SETTINGS stream_like_engine_insert_queue='insert4' VALUES {}".format( - values - ) - ) - instance.query_and_get_error( - "INSERT INTO test.nats SETTINGS stream_like_engine_insert_queue='insert3.foo.baz' VALUES {}".format( - values - ) - ) - instance.query_and_get_error( - "INSERT INTO test.nats SETTINGS stream_like_engine_insert_queue='foo.insert2' VALUES {}".format( - values - ) - ) - - -def test_nats_many_subjects_insert_right(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'right_insert1,right_insert2', - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - values = [] - for i in range(50): - values.append("({i}, {i})".format(i=i)) - values = ",".join(values) - - insert_messages = [] - - async def sub_to_nats(): - nc = await nats_connect_ssl( - nats_cluster.nats_port, - user="click", - password="house", - ssl_ctx=nats_cluster.nats_ssl_context, - ) - sub = await nc.subscribe("right_insert1") - await sub.unsubscribe(50) - async for msg in sub.messages: - insert_messages.append(msg.data.decode()) - - await sub.drain() - await nc.drain() - - def run_sub(): - asyncio.run(sub_to_nats()) - - thread = threading.Thread(target=run_sub) - thread.start() - time.sleep(1) - - while True: - try: - instance.query( - "INSERT INTO test.nats SETTINGS stream_like_engine_insert_queue='right_insert1' VALUES {}".format( - values - ) - ) - break - except QueryRuntimeException as e: - if "Local: Timed out." in str(e): - continue - else: - raise - thread.join() - - result = "\n".join(insert_messages) - nats_check_result(result, True) - - -def test_nats_many_inserts(nats_cluster): - instance.query( - """ - DROP TABLE IF EXISTS test.nats_many; - DROP TABLE IF EXISTS test.nats_consume; - DROP TABLE IF EXISTS test.view_many; - DROP TABLE IF EXISTS test.consumer_many; - CREATE TABLE test.nats_many (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'many_inserts', - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - CREATE TABLE test.nats_consume (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'many_inserts', - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - CREATE TABLE test.view_many (key UInt64, value UInt64) - ENGINE = MergeTree - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer_many TO test.view_many AS - SELECT * FROM test.nats_consume; - """ - ) - while not check_table_is_ready(instance, "test.nats_consume"): - logging.debug("Table test.nats_consume is not yet ready") - time.sleep(0.5) - - messages_num = 10000 - values = [] - for i in range(messages_num): - values.append("({i}, {i})".format(i=i)) - values = ",".join(values) - - def insert(): - while True: - try: - instance.query("INSERT INTO test.nats_many VALUES {}".format(values)) - break - except QueryRuntimeException as e: - if "Local: Timed out." in str(e): - continue - else: - raise - - threads = [] - threads_num = 10 - for _ in range(threads_num): - threads.append(threading.Thread(target=insert)) - for thread in threads: - time.sleep(random.uniform(0, 1)) - thread.start() - - for thread in threads: - thread.join() - - time_limit_sec = 300 - deadline = time.monotonic() + time_limit_sec - - while time.monotonic() < deadline: - result = instance.query("SELECT count() FROM test.view_many") - print(result, messages_num * threads_num) - if int(result) >= messages_num * threads_num: - break - time.sleep(1) - - instance.query( - """ - DROP TABLE test.nats_consume; - DROP TABLE test.nats_many; - DROP TABLE test.consumer_many; - DROP TABLE test.view_many; - """ - ) - - assert ( - int(result) == messages_num * threads_num - ), "ClickHouse lost some messages or got duplicated ones. Total count: {}".format( - result - ) - - -def test_nats_overloaded_insert(nats_cluster): - instance.query( - """ - DROP TABLE IF EXISTS test.view_overload; - DROP TABLE IF EXISTS test.consumer_overload; - DROP TABLE IF EXISTS test.nats_consume; - CREATE TABLE test.nats_consume (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'over', - nats_num_consumers = 5, - nats_max_block_size = 10000, - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - CREATE TABLE test.nats_overload (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'over', - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - CREATE TABLE test.view_overload (key UInt64, value UInt64) - ENGINE = MergeTree - ORDER BY key - SETTINGS old_parts_lifetime=5, cleanup_delay_period=2, cleanup_delay_period_random_add=3, - cleanup_thread_preferred_points_per_iteration=0; - CREATE MATERIALIZED VIEW test.consumer_overload TO test.view_overload AS - SELECT * FROM test.nats_consume; - """ - ) - while not check_table_is_ready(instance, "test.nats_consume"): - logging.debug("Table test.nats_consume is not yet ready") - time.sleep(0.5) - - messages_num = 100000 - - def insert(): - values = [] - for i in range(messages_num): - values.append("({i}, {i})".format(i=i)) - values = ",".join(values) - - while True: - try: - instance.query( - "INSERT INTO test.nats_overload VALUES {}".format(values) - ) - break - except QueryRuntimeException as e: - if "Local: Timed out." in str(e): - continue - else: - raise - - threads = [] - threads_num = 5 - for _ in range(threads_num): - threads.append(threading.Thread(target=insert)) - for thread in threads: - time.sleep(random.uniform(0, 1)) - thread.start() - - time_limit_sec = 300 - deadline = time.monotonic() + time_limit_sec - - while time.monotonic() < deadline: - result = instance.query("SELECT count() FROM test.view_overload") - time.sleep(1) - if int(result) >= messages_num * threads_num: - break - - instance.query( - """ - DROP TABLE test.consumer_overload; - DROP TABLE test.view_overload; - DROP TABLE test.nats_consume; - DROP TABLE test.nats_overload; - """ - ) - - for thread in threads: - thread.join() - - assert ( - int(result) == messages_num * threads_num - ), "ClickHouse lost some messages or got duplicated ones. Total count: {}".format( - result - ) - - -def test_nats_virtual_column(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats_virtuals (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'virtuals', - nats_format = 'JSONEachRow'; - CREATE MATERIALIZED VIEW test.view Engine=Log AS - SELECT value, key, _subject FROM test.nats_virtuals; - """ - ) - while not check_table_is_ready(instance, "test.nats_virtuals"): - logging.debug("Table test.nats_virtuals is not yet ready") - time.sleep(0.5) - - message_num = 10 - i = 0 - messages = [] - for _ in range(message_num): - messages.append(json.dumps({"key": i, "value": i})) - i += 1 - - asyncio.run(nats_produce_messages(nats_cluster, "virtuals", messages)) - - while True: - result = instance.query("SELECT count() FROM test.view") - time.sleep(1) - if int(result) == message_num: - break - - result = instance.query( - """ - SELECT key, value, _subject - FROM test.view ORDER BY key - """ - ) - - expected = """\ -0 0 virtuals -1 1 virtuals -2 2 virtuals -3 3 virtuals -4 4 virtuals -5 5 virtuals -6 6 virtuals -7 7 virtuals -8 8 virtuals -9 9 virtuals -""" - - instance.query( - """ - DROP TABLE test.nats_virtuals; - DROP TABLE test.view; - """ - ) - - assert TSV(result) == TSV(expected) - - -def test_nats_virtual_column_with_materialized_view(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats_virtuals_mv (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'virtuals_mv', - nats_format = 'JSONEachRow'; - CREATE TABLE test.view (key UInt64, value UInt64, subject String) ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT *, _subject as subject - FROM test.nats_virtuals_mv; - """ - ) - while not check_table_is_ready(instance, "test.nats_virtuals_mv"): - logging.debug("Table test.nats_virtuals_mv is not yet ready") - time.sleep(0.5) - - message_num = 10 - i = 0 - messages = [] - for _ in range(message_num): - messages.append(json.dumps({"key": i, "value": i})) - i += 1 - - asyncio.run(nats_produce_messages(nats_cluster, "virtuals_mv", messages)) - - while True: - result = instance.query("SELECT count() FROM test.view") - time.sleep(1) - if int(result) == message_num: - break - - result = instance.query("SELECT key, value, subject FROM test.view ORDER BY key") - expected = """\ -0 0 virtuals_mv -1 1 virtuals_mv -2 2 virtuals_mv -3 3 virtuals_mv -4 4 virtuals_mv -5 5 virtuals_mv -6 6 virtuals_mv -7 7 virtuals_mv -8 8 virtuals_mv -9 9 virtuals_mv -""" - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.view; - DROP TABLE test.nats_virtuals_mv - """ - ) - - assert TSV(result) == TSV(expected) - - -def test_nats_many_consumers_to_each_queue(nats_cluster): - instance.query( - """ - DROP TABLE IF EXISTS test.destination; - CREATE TABLE test.destination(key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - """ - ) - - num_tables = 4 - for table_id in range(num_tables): - print(("Setting up table {}".format(table_id))) - instance.query( - """ - DROP TABLE IF EXISTS test.many_consumers_{0}; - DROP TABLE IF EXISTS test.many_consumers_{0}_mv; - CREATE TABLE test.many_consumers_{0} (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'many_consumers', - nats_num_consumers = 2, - nats_queue_group = 'many_consumers', - nats_format = 'JSONEachRow', - nats_row_delimiter = '\\n'; - CREATE MATERIALIZED VIEW test.many_consumers_{0}_mv TO test.destination AS - SELECT key, value FROM test.many_consumers_{0}; - """.format( - table_id - ) - ) - while not check_table_is_ready( - instance, "test.many_consumers_{}".format(table_id) - ): - logging.debug( - "Table test.many_consumers_{} is not yet ready".format(table_id) - ) - time.sleep(0.5) - - i = [0] - messages_num = 1000 - - def produce(): - messages = [] - for _ in range(messages_num): - messages.append(json.dumps({"key": i[0], "value": i[0]})) - i[0] += 1 - asyncio.run(nats_produce_messages(nats_cluster, "many_consumers", messages)) - - threads = [] - threads_num = 20 - - for _ in range(threads_num): - threads.append(threading.Thread(target=produce)) - for thread in threads: - time.sleep(random.uniform(0, 1)) - thread.start() - - result1 = "" - while True: - result1 = instance.query("SELECT count() FROM test.destination") - time.sleep(1) - if int(result1) == messages_num * threads_num: - break - - for thread in threads: - thread.join() - - for consumer_id in range(num_tables): - instance.query( - """ - DROP TABLE test.many_consumers_{0}; - DROP TABLE test.many_consumers_{0}_mv; - """.format( - consumer_id - ) - ) - - instance.query( - """ - DROP TABLE test.destination; - """ - ) - - assert ( - int(result1) == messages_num * threads_num - ), "ClickHouse lost some messages: {}".format(result1) - - -def test_nats_restore_failed_connection_without_losses_on_write(nats_cluster): - instance.query( - """ - DROP TABLE IF EXISTS test.consume; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree - ORDER BY key; - CREATE TABLE test.consume (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'producer_reconnect', - nats_format = 'JSONEachRow', - nats_num_consumers = 2, - nats_row_delimiter = '\\n'; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.consume; - DROP TABLE IF EXISTS test.producer_reconnect; - CREATE TABLE test.producer_reconnect (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'producer_reconnect', - nats_format = 'JSONEachRow', - nats_row_delimiter = '\\n'; - """ - ) - while not check_table_is_ready(instance, "test.consume"): - logging.debug("Table test.consume is not yet ready") - time.sleep(0.5) - - messages_num = 100000 - values = [] - for i in range(messages_num): - values.append("({i}, {i})".format(i=i)) - values = ",".join(values) - - while True: - try: - instance.query( - "INSERT INTO test.producer_reconnect VALUES {}".format(values) - ) - break - except QueryRuntimeException as e: - if "Local: Timed out." in str(e): - continue - else: - raise - - while int(instance.query("SELECT count() FROM test.view")) == 0: - time.sleep(0.1) - - kill_nats(nats_cluster.nats_docker_id) - time.sleep(4) - revive_nats(nats_cluster.nats_docker_id, nats_cluster.nats_port) - - while True: - result = instance.query("SELECT count(DISTINCT key) FROM test.view") - time.sleep(1) - if int(result) == messages_num: - break - - instance.query( - """ - DROP TABLE test.consume; - DROP TABLE test.producer_reconnect; - """ - ) - - assert int(result) == messages_num, "ClickHouse lost some messages: {}".format( - result - ) - - -def test_nats_no_connection_at_startup_1(nats_cluster): - # no connection when table is initialized - nats_cluster.pause_container("nats1") - instance.query_and_get_error( - """ - CREATE TABLE test.cs (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'cs', - nats_format = 'JSONEachRow', - nats_num_consumers = '5', - nats_row_delimiter = '\\n'; - """ - ) - nats_cluster.unpause_container("nats1") - - -def test_nats_no_connection_at_startup_2(nats_cluster): - instance.query( - """ - CREATE TABLE test.cs (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'cs', - nats_format = 'JSONEachRow', - nats_num_consumers = '5', - nats_row_delimiter = '\\n'; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.cs; - """ - ) - - instance.query("DETACH TABLE test.cs") - nats_cluster.pause_container("nats1") - instance.query("ATTACH TABLE test.cs") - nats_cluster.unpause_container("nats1") - while not check_table_is_ready(instance, "test.cs"): - logging.debug("Table test.cs is not yet ready") - time.sleep(0.5) - - messages_num = 1000 - messages = [] - for i in range(messages_num): - messages.append(json.dumps({"key": i, "value": i})) - asyncio.run(nats_produce_messages(nats_cluster, "cs", messages)) - - for _ in range(20): - result = instance.query("SELECT count() FROM test.view") - time.sleep(1) - if int(result) == messages_num: - break - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.cs; - """ - ) - - assert int(result) == messages_num, "ClickHouse lost some messages: {}".format( - result - ) - - -def test_nats_format_factory_settings(nats_cluster): - instance.query( - """ - CREATE TABLE test.format_settings ( - id String, date DateTime - ) ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'format_settings', - nats_format = 'JSONEachRow', - date_time_input_format = 'best_effort'; - """ - ) - while not check_table_is_ready(instance, "test.format_settings"): - logging.debug("Table test.format_settings is not yet ready") - time.sleep(0.5) - - message = json.dumps( - {"id": "format_settings_test", "date": "2021-01-19T14:42:33.1829214Z"} - ) - expected = instance.query( - """SELECT parseDateTimeBestEffort(CAST('2021-01-19T14:42:33.1829214Z', 'String'))""" - ) - - asyncio.run(nats_produce_messages(nats_cluster, "format_settings", [message])) - - while True: - result = instance.query("SELECT date FROM test.format_settings") - if result == expected: - break - - instance.query( - """ - CREATE TABLE test.view ( - id String, date DateTime - ) ENGINE = MergeTree ORDER BY id; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.format_settings; - """ - ) - - asyncio.run(nats_produce_messages(nats_cluster, "format_settings", [message])) - while True: - result = instance.query("SELECT date FROM test.view") - if result == expected: - break - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.format_settings; - """ - ) - - assert result == expected - - -def test_nats_bad_args(nats_cluster): - instance.query_and_get_error( - """ - CREATE TABLE test.drop (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_secure = true, - nats_format = 'JSONEachRow'; - """ - ) - - -def test_nats_drop_mv(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'mv', - nats_format = 'JSONEachRow'; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.nats; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - messages = [] - for i in range(20): - messages.append(json.dumps({"key": i, "value": i})) - asyncio.run(nats_produce_messages(nats_cluster, "mv", messages)) - - instance.query("DROP VIEW test.consumer") - messages = [] - for i in range(20, 40): - messages.append(json.dumps({"key": i, "value": i})) - asyncio.run(nats_produce_messages(nats_cluster, "mv", messages)) - - instance.query( - """ - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.nats; - """ - ) - messages = [] - for i in range(40, 50): - messages.append(json.dumps({"key": i, "value": i})) - asyncio.run(nats_produce_messages(nats_cluster, "mv", messages)) - - while True: - result = instance.query("SELECT * FROM test.view ORDER BY key") - if nats_check_result(result): - break - - nats_check_result(result, True) - - instance.query("DROP VIEW test.consumer") - messages = [] - for i in range(50, 60): - messages.append(json.dumps({"key": i, "value": i})) - asyncio.run(nats_produce_messages(nats_cluster, "mv", messages)) - - count = 0 - while True: - count = int(instance.query("SELECT count() FROM test.nats")) - if count: - break - - assert count > 0 - - -def test_nats_predefined_configuration(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS(nats1) """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - asyncio.run( - nats_produce_messages( - nats_cluster, "named", [json.dumps({"key": 1, "value": 2})] - ) - ) - while True: - result = instance.query( - "SELECT * FROM test.nats ORDER BY key", ignore_error=True - ) - if result == "1\t2\n": - break - - -def test_format_with_prefix_and_suffix(nats_cluster): - instance.query( - """ - DROP TABLE IF EXISTS test.nats; - - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'custom', - nats_format = 'CustomSeparated'; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - insert_messages = [] - - async def sub_to_nats(): - nc = await nats_connect_ssl( - nats_cluster.nats_port, - user="click", - password="house", - ssl_ctx=nats_cluster.nats_ssl_context, - ) - sub = await nc.subscribe("custom") - await sub.unsubscribe(2) - async for msg in sub.messages: - insert_messages.append(msg.data.decode()) - - await sub.drain() - await nc.drain() - - def run_sub(): - asyncio.run(sub_to_nats()) - - thread = threading.Thread(target=run_sub) - thread.start() - time.sleep(1) - - instance.query( - "INSERT INTO test.nats select number*10 as key, number*100 as value from numbers(2) settings format_custom_result_before_delimiter='\n', format_custom_result_after_delimiter='\n'" - ) - - thread.join() - - assert ( - "".join(insert_messages) - == "\n0\t0\n\n\n10\t100\n\n" - ) - - -def test_max_rows_per_message(nats_cluster): - instance.query( - """ - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.nats; - - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'custom1', - nats_format = 'CustomSeparated', - nats_max_rows_per_message = 3, - format_custom_result_before_delimiter = '\n', - format_custom_result_after_delimiter = '\n'; - - CREATE MATERIALIZED VIEW test.view Engine=Log AS - SELECT key, value FROM test.nats; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - num_rows = 5 - - insert_messages = [] - - async def sub_to_nats(): - nc = await nats_connect_ssl( - nats_cluster.nats_port, - user="click", - password="house", - ssl_ctx=nats_cluster.nats_ssl_context, - ) - sub = await nc.subscribe("custom1") - await sub.unsubscribe(2) - async for msg in sub.messages: - insert_messages.append(msg.data.decode()) - - await sub.drain() - await nc.drain() - - def run_sub(): - asyncio.run(sub_to_nats()) - - thread = threading.Thread(target=run_sub) - thread.start() - time.sleep(1) - - instance.query( - f"INSERT INTO test.nats select number*10 as key, number*100 as value from numbers({num_rows}) settings format_custom_result_before_delimiter='\n', format_custom_result_after_delimiter='\n'" - ) - - thread.join() - - assert ( - "".join(insert_messages) - == "\n0\t0\n10\t100\n20\t200\n\n\n30\t300\n40\t400\n\n" - ) - - attempt = 0 - rows = 0 - while attempt < 100: - rows = int(instance.query("SELECT count() FROM test.view")) - if rows == num_rows: - break - attempt += 1 - - assert rows == num_rows - - result = instance.query("SELECT * FROM test.view") - assert result == "0\t0\n10\t100\n20\t200\n30\t300\n40\t400\n" - - -def test_row_based_formats(nats_cluster): - num_rows = 10 - - for format_name in [ - "TSV", - "TSVWithNamesAndTypes", - "TSKV", - "CSV", - "CSVWithNamesAndTypes", - "CustomSeparatedWithNamesAndTypes", - "Values", - "JSON", - "JSONEachRow", - "JSONCompactEachRow", - "JSONCompactEachRowWithNamesAndTypes", - "JSONObjectEachRow", - "Avro", - "RowBinary", - "RowBinaryWithNamesAndTypes", - "MsgPack", - ]: - print(format_name) - - instance.query( - f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.nats; - - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = '{format_name}', - nats_format = '{format_name}'; - - CREATE MATERIALIZED VIEW test.view Engine=Log AS - SELECT key, value FROM test.nats; - """ - ) - - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - insert_messages = 0 - - async def sub_to_nats(): - nc = await nats_connect_ssl( - nats_cluster.nats_port, - user="click", - password="house", - ssl_ctx=nats_cluster.nats_ssl_context, - ) - sub = await nc.subscribe(format_name) - await sub.unsubscribe(2) - async for msg in sub.messages: - nonlocal insert_messages - insert_messages += 1 - - await sub.drain() - await nc.drain() - - def run_sub(): - asyncio.run(sub_to_nats()) - - thread = threading.Thread(target=run_sub) - thread.start() - time.sleep(1) - - instance.query( - f"INSERT INTO test.nats select number*10 as key, number*100 as value from numbers({num_rows})" - ) - - thread.join() - - assert insert_messages == 2 - - attempt = 0 - rows = 0 - while attempt < 100: - rows = int(instance.query("SELECT count() FROM test.view")) - if rows == num_rows: - break - attempt += 1 - - assert rows == num_rows - - expected = "" - for i in range(num_rows): - expected += str(i * 10) + "\t" + str(i * 100) + "\n" - - result = instance.query("SELECT * FROM test.view") - assert result == expected - - -def test_block_based_formats_1(nats_cluster): - instance.query( - """ - DROP TABLE IF EXISTS test.nats; - - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'PrettySpace', - nats_format = 'PrettySpace'; - """ - ) - - insert_messages = [] - - async def sub_to_nats(): - nc = await nats_connect_ssl( - nats_cluster.nats_port, - user="click", - password="house", - ssl_ctx=nats_cluster.nats_ssl_context, - ) - sub = await nc.subscribe("PrettySpace") - await sub.unsubscribe(3) - async for msg in sub.messages: - insert_messages.append(msg.data.decode()) - - await sub.drain() - await nc.drain() - - def run_sub(): - asyncio.run(sub_to_nats()) - - thread = threading.Thread(target=run_sub) - thread.start() - time.sleep(1) - - attempt = 0 - while attempt < 100: - try: - instance.query( - "INSERT INTO test.nats SELECT number * 10 as key, number * 100 as value FROM numbers(5) settings max_block_size=2, optimize_trivial_insert_select=0;" - ) - break - except Exception: - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - attempt += 1 - thread.join() - - data = [] - for message in insert_messages: - splitted = message.split("\n") - assert splitted[0] == " \x1b[1mkey\x1b[0m \x1b[1mvalue\x1b[0m" - assert splitted[1] == "" - assert splitted[-1] == "" - data += [line.split() for line in splitted[2:-1]] - - assert data == [ - ["0", "0"], - ["10", "100"], - ["20", "200"], - ["30", "300"], - ["40", "400"], - ] - - -def test_block_based_formats_2(nats_cluster): - num_rows = 100 - - for format_name in [ - "JSONColumns", - "Native", - "Arrow", - "Parquet", - "ORC", - "JSONCompactColumns", - ]: - print(format_name) - - instance.query( - f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.nats; - - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = '{format_name}', - nats_format = '{format_name}'; - - CREATE MATERIALIZED VIEW test.view Engine=Log AS - SELECT key, value FROM test.nats; - """ - ) - - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - insert_messages = 0 - - async def sub_to_nats(): - nc = await nats_connect_ssl( - nats_cluster.nats_port, - user="click", - password="house", - ssl_ctx=nats_cluster.nats_ssl_context, - ) - sub = await nc.subscribe(format_name) - await sub.unsubscribe(9) - async for msg in sub.messages: - nonlocal insert_messages - insert_messages += 1 - - await sub.drain() - await nc.drain() - - def run_sub(): - asyncio.run(sub_to_nats()) - - thread = threading.Thread(target=run_sub) - thread.start() - time.sleep(1) - - instance.query( - f"INSERT INTO test.nats SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}) settings max_block_size=12, optimize_trivial_insert_select=0;" - ) - - thread.join() - - assert insert_messages == 9 - - attempt = 0 - rows = 0 - while attempt < 100: - rows = int(instance.query("SELECT count() FROM test.view")) - if rows == num_rows: - break - attempt += 1 - - assert rows == num_rows - - result = instance.query("SELECT * FROM test.view ORDER by key") - expected = "" - for i in range(num_rows): - expected += str(i * 10) + "\t" + str(i * 100) + "\n" - assert result == expected - - -if __name__ == "__main__": - cluster.start() - input("Cluster created, press any key to destroy...") - cluster.shutdown() diff --git a/tests/integration/test_storage_nats/test_nats_json.reference b/tests/integration/test_storage_nats/test_nats_json.reference deleted file mode 100644 index 959bb2aad74..00000000000 --- a/tests/integration/test_storage_nats/test_nats_json.reference +++ /dev/null @@ -1,50 +0,0 @@ -0 0 -1 1 -2 2 -3 3 -4 4 -5 5 -6 6 -7 7 -8 8 -9 9 -10 10 -11 11 -12 12 -13 13 -14 14 -15 15 -16 16 -17 17 -18 18 -19 19 -20 20 -21 21 -22 22 -23 23 -24 24 -25 25 -26 26 -27 27 -28 28 -29 29 -30 30 -31 31 -32 32 -33 33 -34 34 -35 35 -36 36 -37 37 -38 38 -39 39 -40 40 -41 41 -42 42 -43 43 -44 44 -45 45 -46 46 -47 47 -48 48 -49 49 diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index 686eb1ea751..11729a5ab18 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -26,6 +26,10 @@ def started_cluster(): cluster.start() node1.query("CREATE DATABASE test") node2.query("CREATE DATABASE test") + # Wait for the PostgreSQL handler to start. + # cluster.start waits until port 9000 becomes accessible. + # Server opens the PostgreSQL compatibility port a bit later. + node1.wait_for_log_line("PostgreSQL compatibility protocol") yield cluster finally: @@ -726,6 +730,22 @@ def test_auto_close_connection(started_cluster): assert count == 2 +def test_literal_escaping(started_cluster): + cursor = started_cluster.postgres_conn.cursor() + cursor.execute(f"DROP TABLE IF EXISTS escaping") + cursor.execute(f"CREATE TABLE escaping(text varchar(255))") + node1.query( + "CREATE TABLE default.escaping (text String) ENGINE = PostgreSQL('postgres1:5432', 'postgres', 'escaping', 'postgres', 'mysecretpassword')" + ) + node1.query("SELECT * FROM escaping WHERE text = ''''") # ' -> '' + node1.query("SELECT * FROM escaping WHERE text = '\\''") # ' -> '' + node1.query("SELECT * FROM escaping WHERE text = '\\\\\\''") # \' -> \'' + node1.query("SELECT * FROM escaping WHERE text = '\\\\\\''") # \' -> \'' + node1.query("SELECT * FROM escaping WHERE text like '%a''a%'") # %a'a% -> %a''a% + node1.query("SELECT * FROM escaping WHERE text like '%a\\'a%'") # %a'a% -> %a''a% + cursor.execute(f"DROP TABLE escaping") + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_postgresql_replica/configs/log_conf.xml b/tests/integration/test_storage_postgresql_replica/configs/log_conf.xml deleted file mode 100644 index 27c7107ce5e..00000000000 --- a/tests/integration/test_storage_postgresql_replica/configs/log_conf.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_storage_postgresql_replica/test.py b/tests/integration/test_storage_postgresql_replica/test.py deleted file mode 100644 index 66495700102..00000000000 --- a/tests/integration/test_storage_postgresql_replica/test.py +++ /dev/null @@ -1,780 +0,0 @@ -import pytest - -# FIXME Tests with MaterializedPostgresSQL are temporarily disabled -# https://github.com/ClickHouse/ClickHouse/issues/36898 -# https://github.com/ClickHouse/ClickHouse/issues/38677 -# https://github.com/ClickHouse/ClickHouse/pull/39272#issuecomment-1190087190 - -pytestmark = pytest.mark.skip - -import time -import psycopg2 -import os.path as p - -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import assert_eq_with_retry -from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT -from helpers.test_tools import TSV - -import threading - -cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance( - "instance", - main_configs=["configs/log_conf.xml"], - with_postgres=True, - stay_alive=True, -) - -postgres_table_template = """ - CREATE TABLE IF NOT EXISTS {} ( - key Integer NOT NULL, value Integer, PRIMARY KEY(key)) - """ - -queries = [ - "INSERT INTO {} select i, i from generate_series(0, 10000) as t(i);", - "DELETE FROM {} WHERE (value*value) % 3 = 0;", - "UPDATE {} SET value = value + 125 WHERE key % 2 = 0;", - "UPDATE {} SET key=key+20000 WHERE key%2=0", - "INSERT INTO {} select i, i from generate_series(40000, 50000) as t(i);", - "DELETE FROM {} WHERE key % 10 = 0;", - "UPDATE {} SET value = value + 101 WHERE key % 2 = 1;", - "UPDATE {} SET key=key+80000 WHERE key%2=1", - "DELETE FROM {} WHERE value % 2 = 0;", - "UPDATE {} SET value = value + 2000 WHERE key % 5 = 0;", - "INSERT INTO {} select i, i from generate_series(200000, 250000) as t(i);", - "DELETE FROM {} WHERE value % 3 = 0;", - "UPDATE {} SET value = value * 2 WHERE key % 3 = 0;", - "UPDATE {} SET key=key+500000 WHERE key%2=1", - "INSERT INTO {} select i, i from generate_series(1000000, 1050000) as t(i);", - "DELETE FROM {} WHERE value % 9 = 2;", - "UPDATE {} SET key=key+10000000", - "UPDATE {} SET value = value + 2 WHERE key % 3 = 1;", - "DELETE FROM {} WHERE value%5 = 0;", -] - - -@pytest.mark.timeout(30) -def check_tables_are_synchronized( - table_name, order_by="key", postgres_database="postgres_database" -): - while True: - expected = instance.query( - "select * from {}.{} order by {};".format( - postgres_database, table_name, order_by - ) - ) - result = instance.query( - "select * from test.{} order by {};".format(table_name, order_by) - ) - if result == expected: - break - - assert result == expected - - -def get_postgres_conn( - ip, port, database=False, auto_commit=True, database_name="postgres_database" -): - if database == True: - conn_string = "host={} port={} dbname='{}' user='postgres' password='mysecretpassword'".format( - ip, port, database_name - ) - else: - conn_string = ( - "host={} port={} user='postgres' password='mysecretpassword'".format( - ip, port - ) - ) - - conn = psycopg2.connect(conn_string) - if auto_commit: - conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) - conn.autocommit = True - return conn - - -def create_postgres_db(cursor, name): - cursor.execute("CREATE DATABASE {}".format(name)) - - -def create_clickhouse_postgres_db(ip, port, name="postgres_database"): - instance.query( - """ - CREATE DATABASE {} - ENGINE = PostgreSQL('{}:{}', '{}', 'postgres', 'mysecretpassword')""".format( - name, ip, port, name - ) - ) - - -def create_materialized_table(ip, port, table_name="postgresql_replica"): - instance.query( - f""" - CREATE TABLE test.{table_name} (key Int64, value Int64) - ENGINE = MaterializedPostgreSQL( - '{ip}:{port}', 'postgres_database', '{table_name}', 'postgres', 'mysecretpassword') - PRIMARY KEY key; """ - ) - - -def create_postgres_table(cursor, table_name, replica_identity_full=False): - cursor.execute("DROP TABLE IF EXISTS {}".format(table_name)) - cursor.execute(postgres_table_template.format(table_name)) - if replica_identity_full: - cursor.execute("ALTER TABLE {} REPLICA IDENTITY FULL;".format(table_name)) - - -def postgresql_replica_check_result( - result, check=False, ref_file="test_postgresql_replica.reference" -): - fpath = p.join(p.dirname(__file__), ref_file) - with open(fpath) as reference: - if check: - assert TSV(result) == TSV(reference) - else: - return TSV(result) == TSV(reference) - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - conn = get_postgres_conn(ip=cluster.postgres_ip, port=cluster.postgres_port) - cursor = conn.cursor() - create_postgres_db(cursor, "postgres_database") - create_clickhouse_postgres_db( - ip=cluster.postgres_ip, port=cluster.postgres_port - ) - - instance.query("CREATE DATABASE test") - yield cluster - - finally: - cluster.shutdown() - - -@pytest.mark.timeout(320) -def test_initial_load_from_snapshot(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(50)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - while postgresql_replica_check_result(result) == False: - time.sleep(0.2) - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - - cursor.execute("DROP TABLE postgresql_replica;") - postgresql_replica_check_result(result, True) - instance.query(f"DROP TABLE test.postgresql_replica SYNC") - - -@pytest.mark.timeout(320) -def test_no_connection_at_startup(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(50)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - time.sleep(3) - - instance.query("DETACH TABLE test.postgresql_replica") - started_cluster.pause_container("postgres1") - - instance.query("ATTACH TABLE test.postgresql_replica") - time.sleep(3) - started_cluster.unpause_container("postgres1") - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) == 0: - time.sleep(0.5) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - cursor.execute("DROP TABLE postgresql_replica;") - postgresql_replica_check_result(result, True) - instance.query(f"DROP TABLE test.postgresql_replica SYNC") - - -@pytest.mark.timeout(320) -def test_detach_attach_is_ok(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(50)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) == 0: - time.sleep(0.2) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - postgresql_replica_check_result(result, True) - - instance.query("DETACH TABLE test.postgresql_replica") - instance.query("ATTACH TABLE test.postgresql_replica") - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - while postgresql_replica_check_result(result) == False: - time.sleep(0.5) - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - - cursor.execute("DROP TABLE postgresql_replica;") - postgresql_replica_check_result(result, True) - instance.query(f"DROP TABLE test.postgresql_replica SYNC") - - -@pytest.mark.timeout(320) -def test_replicating_insert_queries(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(10)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 10: - time.sleep(0.2) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT 10 + number, 10 + number from numbers(10)" - ) - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT 20 + number, 20 + number from numbers(10)" - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 30: - time.sleep(0.2) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT 30 + number, 30 + number from numbers(10)" - ) - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT 40 + number, 40 + number from numbers(10)" - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 50: - time.sleep(0.2) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - cursor.execute("DROP TABLE postgresql_replica;") - postgresql_replica_check_result(result, True) - instance.query(f"DROP TABLE test.postgresql_replica SYNC") - - -@pytest.mark.timeout(320) -def test_replicating_delete_queries(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(50)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - while postgresql_replica_check_result(result) == False: - time.sleep(0.2) - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT 50 + number, 50 + number from numbers(50)" - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 100: - time.sleep(0.5) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - cursor.execute("DELETE FROM postgresql_replica WHERE key > 49;") - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - while postgresql_replica_check_result(result) == False: - time.sleep(0.5) - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - - cursor.execute("DROP TABLE postgresql_replica;") - postgresql_replica_check_result(result, True) - - -@pytest.mark.timeout(320) -def test_replicating_update_queries(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number + 10 from numbers(50)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 50: - time.sleep(0.2) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - cursor.execute("UPDATE postgresql_replica SET value = value - 10;") - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - while postgresql_replica_check_result(result) == False: - time.sleep(0.5) - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - - cursor.execute("DROP TABLE postgresql_replica;") - postgresql_replica_check_result(result, True) - - -@pytest.mark.timeout(320) -def test_resume_from_written_version(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number + 10 from numbers(50)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 50: - time.sleep(0.2) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT 50 + number, 50 + number from numbers(50)" - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 100: - time.sleep(0.2) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - instance.query("DETACH TABLE test.postgresql_replica") - - cursor.execute("DELETE FROM postgresql_replica WHERE key > 49;") - cursor.execute("UPDATE postgresql_replica SET value = value - 10;") - - instance.query("ATTACH TABLE test.postgresql_replica") - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - while postgresql_replica_check_result(result) == False: - time.sleep(0.5) - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - - cursor.execute("DROP TABLE postgresql_replica;") - postgresql_replica_check_result(result, True) - - -@pytest.mark.timeout(320) -def test_many_replication_messages(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(100000)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 100000: - time.sleep(0.2) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - print("SYNC OK") - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(100000, 100000)" - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 200000: - time.sleep(1) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - print("INSERT OK") - - result = instance.query("SELECT key FROM test.postgresql_replica ORDER BY key;") - expected = instance.query("SELECT number from numbers(200000)") - assert result == expected - - cursor.execute("UPDATE postgresql_replica SET value = key + 1 WHERE key < 100000;") - - result = instance.query( - "SELECT key FROM test.postgresql_replica WHERE value = key + 1 ORDER BY key;" - ) - expected = instance.query("SELECT number from numbers(100000)") - - while result != expected: - time.sleep(1) - result = instance.query( - "SELECT key FROM test.postgresql_replica WHERE value = key + 1 ORDER BY key;" - ) - print("UPDATE OK") - - cursor.execute("DELETE FROM postgresql_replica WHERE key % 2 = 1;") - cursor.execute("DELETE FROM postgresql_replica WHERE key != value;") - - result = instance.query( - "SELECT count() FROM (SELECT * FROM test.postgresql_replica);" - ) - while int(result) != 50000: - time.sleep(1) - result = instance.query( - "SELECT count() FROM (SELECT * FROM test.postgresql_replica);" - ) - print("DELETE OK") - - cursor.execute("DROP TABLE postgresql_replica;") - - -@pytest.mark.timeout(320) -def test_connection_loss(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(50)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - i = 50 - while i < 100000: - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT {} + number, number from numbers(10000)".format( - i - ) - ) - i += 10000 - - started_cluster.pause_container("postgres1") - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - print(int(result)) - time.sleep(6) - - started_cluster.unpause_container("postgres1") - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) < 100050: - time.sleep(1) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - cursor.execute("DROP TABLE postgresql_replica;") - assert int(result) == 100050 - - -@pytest.mark.timeout(320) -def test_clickhouse_restart(started_cluster): - pytest.skip("Temporary disabled (FIXME)") - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(50)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - i = 50 - while i < 100000: - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT {} + number, number from numbers(10000)".format( - i - ) - ) - i += 10000 - - instance.restart_clickhouse() - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) < 100050: - time.sleep(1) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - cursor.execute("DROP TABLE postgresql_replica;") - print(result) - assert int(result) == 100050 - - -def test_rename_table(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(25)" - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 25: - time.sleep(0.5) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - instance.query( - "RENAME TABLE test.postgresql_replica TO test.postgresql_replica_renamed" - ) - assert ( - int(instance.query("SELECT count() FROM test.postgresql_replica_renamed;")) - == 25 - ) - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(25, 25)" - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica_renamed;") - while int(result) != 50: - time.sleep(0.5) - result = instance.query("SELECT count() FROM test.postgresql_replica_renamed;") - - result = instance.query( - "SELECT * FROM test.postgresql_replica_renamed ORDER BY key;" - ) - postgresql_replica_check_result(result, True) - cursor.execute("DROP TABLE postgresql_replica;") - instance.query("DROP TABLE IF EXISTS test.postgresql_replica_renamed") - - -def test_virtual_columns(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(10)" - ) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 10: - time.sleep(0.5) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - # just check that it works, no check with `expected` because _version is taken as LSN, which will be different each time. - result = instance.query( - "SELECT key, value, _sign, _version FROM test.postgresql_replica;" - ) - print(result) - cursor.execute("DROP TABLE postgresql_replica;") - instance.query(f"DROP TABLE test.postgresql_replica SYNC") - - -def test_abrupt_connection_loss_while_heavy_replication(started_cluster): - instance.query("DROP DATABASE IF EXISTS test_database") - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - table_name = "postgresql_replica" - create_postgres_table(cursor, table_name) - - instance.query(f"DROP TABLE IF EXISTS test.{table_name}") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - for i in range(len(queries)): - query = queries[i].format(table_name) - cursor.execute(query) - print("query {}".format(query.format(table_name))) - - started_cluster.pause_container("postgres1") - - result = instance.query("SELECT count() FROM test.postgresql_replica") - print(result) # Just debug - - started_cluster.unpause_container("postgres1") - - check_tables_are_synchronized("postgresql_replica") - - result = instance.query("SELECT count() FROM test.postgresql_replica") - print(result) # Just debug - instance.query(f"DROP TABLE test.postgresql_replica SYNC") - - -def test_abrupt_server_restart_while_heavy_replication(started_cluster): - # FIXME (kssenii) temporary disabled - if instance.is_built_with_sanitizer(): - pytest.skip("Temporary disabled (FIXME)") - - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - table_name = "postgresql_replica_697" - create_postgres_table(cursor, table_name) - - instance.query(f"INSERT INTO postgres_database.{table_name} SELECT -1, 1") - instance.query(f"DROP TABLE IF EXISTS test.{table_name} SYNC") - create_materialized_table( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - table_name=table_name, - ) - - n = 1 - while int(instance.query(f"select count() from test.{table_name}")) != 1: - sleep(1) - n += 1 - if n > 10: - break - - for query in queries: - cursor.execute(query.format(table_name)) - print("query {}".format(query.format(table_name))) - - instance.restart_clickhouse() - - result = instance.query(f"SELECT count() FROM test.{table_name}") - print(result) # Just debug - - check_tables_are_synchronized(table_name) - - result = instance.query(f"SELECT count() FROM test.{table_name}") - print(result) # Just debug - instance.query(f"DROP TABLE test.{table_name} SYNC") - - -def test_drop_table_immediately(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(100000)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - instance.query("DROP TABLE test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - check_tables_are_synchronized("postgresql_replica") - instance.query(f"DROP TABLE test.postgresql_replica SYNC") - - -if __name__ == "__main__": - cluster.start() - input("Cluster created, press any key to destroy...") - cluster.shutdown() diff --git a/tests/integration/test_storage_postgresql_replica/test_postgresql_replica.reference b/tests/integration/test_storage_postgresql_replica/test_postgresql_replica.reference deleted file mode 100644 index 959bb2aad74..00000000000 --- a/tests/integration/test_storage_postgresql_replica/test_postgresql_replica.reference +++ /dev/null @@ -1,50 +0,0 @@ -0 0 -1 1 -2 2 -3 3 -4 4 -5 5 -6 6 -7 7 -8 8 -9 9 -10 10 -11 11 -12 12 -13 13 -14 14 -15 15 -16 16 -17 17 -18 18 -19 19 -20 20 -21 21 -22 22 -23 23 -24 24 -25 25 -26 26 -27 27 -28 28 -29 29 -30 30 -31 31 -32 32 -33 33 -34 34 -35 35 -36 36 -37 37 -38 38 -39 39 -40 40 -41 41 -42 42 -43 43 -44 44 -45 45 -46 46 -47 47 -48 48 -49 49 diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 751279f5e5a..80d2050b394 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -511,69 +511,6 @@ def test_rabbitmq_many_materialized_views(rabbitmq_cluster): rabbitmq_check_result(result2, True) -@pytest.mark.skip(reason="clichouse_path with rabbitmq.proto fails to be exported") -def test_rabbitmq_protobuf(rabbitmq_cluster): - instance.query( - """ - CREATE TABLE test.rabbitmq (key UInt64, value String) - ENGINE = RabbitMQ - SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_exchange_name = 'pb', - rabbitmq_format = 'Protobuf', - rabbitmq_flush_interval_ms=1000, - rabbitmq_max_block_size=100, - rabbitmq_schema = 'rabbitmq.proto:KeyValueProto'; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.rabbitmq; - """ - ) - - credentials = pika.PlainCredentials("root", "clickhouse") - parameters = pika.ConnectionParameters( - rabbitmq_cluster.rabbitmq_ip, rabbitmq_cluster.rabbitmq_port, "/", credentials - ) - connection = pika.BlockingConnection(parameters) - channel = connection.channel() - - data = "" - for i in range(0, 20): - msg = rabbitmq_pb2.KeyValueProto() - msg.key = i - msg.value = str(i) - serialized_msg = msg.SerializeToString() - data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - channel.basic_publish(exchange="pb", routing_key="", body=data) - data = "" - for i in range(20, 21): - msg = rabbitmq_pb2.KeyValueProto() - msg.key = i - msg.value = str(i) - serialized_msg = msg.SerializeToString() - data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - channel.basic_publish(exchange="pb", routing_key="", body=data) - data = "" - for i in range(21, 50): - msg = rabbitmq_pb2.KeyValueProto() - msg.key = i - msg.value = str(i) - serialized_msg = msg.SerializeToString() - data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - channel.basic_publish(exchange="pb", routing_key="", body=data) - - connection.close() - - result = "" - while True: - result = instance.query("SELECT * FROM test.view ORDER BY key") - if rabbitmq_check_result(result): - break - - rabbitmq_check_result(result, True) - - def test_rabbitmq_big_message(rabbitmq_cluster): # Create batchs of messages of size ~100Kb rabbitmq_messages = 1000 diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 0e6fb21481e..9fb794648cb 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -944,7 +944,7 @@ def test_predefined_connection_configuration(started_cluster): f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')" ) assert ( - "To execute this query it's necessary to have grant NAMED COLLECTION ON s3_conf1" + "To execute this query, it's necessary to have the grant NAMED COLLECTION ON s3_conf1" in error ) error = instance.query_and_get_error( @@ -952,7 +952,7 @@ def test_predefined_connection_configuration(started_cluster): user="user", ) assert ( - "To execute this query it's necessary to have grant NAMED COLLECTION ON s3_conf1" + "To execute this query, it's necessary to have the grant NAMED COLLECTION ON s3_conf1" in error ) @@ -973,12 +973,12 @@ def test_predefined_connection_configuration(started_cluster): error = instance.query_and_get_error("SELECT * FROM s3(no_collection)") assert ( - "To execute this query it's necessary to have grant NAMED COLLECTION ON no_collection" + "To execute this query, it's necessary to have the grant NAMED COLLECTION ON no_collection" in error ) error = instance.query_and_get_error("SELECT * FROM s3(no_collection)", user="user") assert ( - "To execute this query it's necessary to have grant NAMED COLLECTION ON no_collection" + "To execute this query, it's necessary to have the grant NAMED COLLECTION ON no_collection" in error ) instance = started_cluster.instances["dummy"] # has named collection access @@ -1827,3 +1827,66 @@ def test_skip_empty_files(started_cluster): ) assert len(res.strip()) == 0 + + +def test_read_subcolumns(started_cluster): + bucket = started_cluster.minio_bucket + instance = started_cluster.instances["dummy"] + + instance.query( + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)" + ) + + instance.query( + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)" + ) + + res = instance.query( + f"select a.b.d, _path, a.b, _file, a.e from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "2\troot/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" + + res = instance.query( + f"select a.b.d, _path, a.b, _file, a.e from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "2\troot/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" + + res = instance.query( + f"select x.b.d, _path, x.b, _file, x.e from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "0\troot/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" + + res = instance.query( + f"select x.b.d, _path, x.b, _file, x.e from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" + ) + + assert res == "42\troot/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" + + res = instance.query( + f"select a.b.d, _path, a.b, _file, a.e from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "2\t/root/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" + + res = instance.query( + f"select a.b.d, _path, a.b, _file, a.e from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "2\t/root/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" + + res = instance.query( + f"select x.b.d, _path, x.b, _file, x.e from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "0\t/root/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" + + res = instance.query( + f"select x.b.d, _path, x.b, _file, x.e from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" + ) + + assert ( + res == "42\t/root/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" + ) diff --git a/tests/integration/test_storage_s3_queue/__init__.py b/tests/integration/test_storage_s3_queue/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_storage_s3_queue/configs/defaultS3.xml b/tests/integration/test_storage_s3_queue/configs/defaultS3.xml new file mode 100644 index 00000000000..7dac6d9fbb5 --- /dev/null +++ b/tests/integration/test_storage_s3_queue/configs/defaultS3.xml @@ -0,0 +1,11 @@ + + + + http://resolver:8080 +
Authorization: Bearer TOKEN
+
+ + http://resolver:8080/root-with-auth/restricteddirectory/ + +
+
diff --git a/tests/integration/test_storage_s3_queue/configs/named_collections.xml b/tests/integration/test_storage_s3_queue/configs/named_collections.xml new file mode 100644 index 00000000000..64674e2a3e3 --- /dev/null +++ b/tests/integration/test_storage_s3_queue/configs/named_collections.xml @@ -0,0 +1,43 @@ + + + + http://minio1:9001/root/test_table + minio + minio123 + + + http://minio1:9001/root/test_parquet + minio + minio123 + + + http://minio1:9001/root/test_parquet_gz + minio + minio123 + + + http://minio1:9001/root/test_orc + minio + minio123 + + + http://minio1:9001/root/test_native + minio + minio123 + + + http://minio1:9001/root/test.arrow + minio + minio123 + + + http://minio1:9001/root/test.parquet + minio + minio123 + + + http://minio1:9001/root/test_cache4.jsonl + true + + + diff --git a/tests/integration/test_storage_nats/configs/users.xml b/tests/integration/test_storage_s3_queue/configs/users.xml similarity index 100% rename from tests/integration/test_storage_nats/configs/users.xml rename to tests/integration/test_storage_s3_queue/configs/users.xml diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py new file mode 100644 index 00000000000..484ab6d7e95 --- /dev/null +++ b/tests/integration/test_storage_s3_queue/test.py @@ -0,0 +1,879 @@ +import io +import logging +import os +import random +import time + +import pytest +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster, ClickHouseInstance +import json + +""" +export CLICKHOUSE_TESTS_SERVER_BIN_PATH=/home/sergey/vkr/ClickHouse/build/programs/clickhouse-server +export CLICKHOUSE_TESTS_CLIENT_BIN_PATH=/home/sergey/vkr/ClickHouse/build/programs/clickhouse-client +export CLICKHOUSE_TESTS_ODBC_BRIDGE_BIN_PATH=/home/sergey/vkr/ClickHouse/build/programs/clickhouse-odbc-bridge +export CLICKHOUSE_TESTS_BASE_CONFIG_DIR=/home/sergey/vkr/ClickHouse/programs/server + +""" + + +def prepare_s3_bucket(started_cluster): + # Allows read-write access for bucket without authorization. + bucket_read_write_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "", + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "s3:GetBucketLocation", + "Resource": "arn:aws:s3:::root", + }, + { + "Sid": "", + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "s3:ListBucket", + "Resource": "arn:aws:s3:::root", + }, + { + "Sid": "", + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "s3:GetObject", + "Resource": "arn:aws:s3:::root/*", + }, + { + "Sid": "", + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "s3:PutObject", + "Resource": "arn:aws:s3:::root/*", + }, + { + "Sid": "", + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "s3:DeleteObject", + "Resource": "arn:aws:s3:::root/*", + }, + ], + } + + minio_client = started_cluster.minio_client + minio_client.set_bucket_policy( + started_cluster.minio_bucket, json.dumps(bucket_read_write_policy) + ) + + started_cluster.minio_restricted_bucket = "{}-with-auth".format( + started_cluster.minio_bucket + ) + if minio_client.bucket_exists(started_cluster.minio_restricted_bucket): + minio_client.remove_bucket(started_cluster.minio_restricted_bucket) + + minio_client.make_bucket(started_cluster.minio_restricted_bucket) + + +@pytest.fixture(autouse=True) +def s3_queue_setup_teardown(started_cluster): + instance = started_cluster.instances["instance"] + instance_2 = started_cluster.instances["instance2"] + + instance.query("DROP DATABASE IF EXISTS test; CREATE DATABASE test;") + instance_2.query("DROP DATABASE IF EXISTS test; CREATE DATABASE test;") + + minio = started_cluster.minio_client + objects = list( + minio.list_objects(started_cluster.minio_restricted_bucket, recursive=True) + ) + for obj in objects: + minio.remove_object(started_cluster.minio_restricted_bucket, obj.object_name) + yield # run test + + +MINIO_INTERNAL_PORT = 9001 +AVAILABLE_MODES = ["unordered", "ordered"] +AUTH = "'minio','minio123'," + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + + +def put_s3_file_content(started_cluster, bucket, filename, data): + buf = io.BytesIO(data) + started_cluster.minio_client.put_object(bucket, filename, buf, len(data)) + + +def generate_random_files( + count, prefix, cluster, bucket, column_num=3, row_num=10, start_ind=0 +): + total_values = [] + to_generate = [ + (f"{prefix}/test_{i}.csv", i) for i in range(start_ind, start_ind + count) + ] + to_generate.sort(key=lambda x: x[0]) + + for filename, i in to_generate: + rand_values = [ + [random.randint(0, 50) for _ in range(column_num)] for _ in range(row_num) + ] + total_values += rand_values + values_csv = ( + "\n".join((",".join(map(str, row)) for row in rand_values)) + "\n" + ).encode() + put_s3_file_content(cluster, bucket, filename, values_csv) + return total_values + + +# Returns content of given S3 file as string. +def get_s3_file_content(started_cluster, bucket, filename, decode=True): + # type: (ClickHouseCluster, str, str, bool) -> str + + data = started_cluster.minio_client.get_object(bucket, filename) + data_str = b"" + for chunk in data.stream(): + data_str += chunk + if decode: + return data_str.decode() + return data_str + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "instance", + user_configs=["configs/users.xml"], + with_minio=True, + with_zookeeper=True, + main_configs=["configs/defaultS3.xml", "configs/named_collections.xml"], + ) + cluster.add_instance( + "instance2", + user_configs=["configs/users.xml"], + with_minio=True, + with_zookeeper=True, + main_configs=["configs/defaultS3.xml", "configs/named_collections.xml"], + ) + + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + prepare_s3_bucket(cluster) + yield cluster + finally: + cluster.shutdown() + + +def run_query(instance, query, stdin=None, settings=None): + # type: (ClickHouseInstance, str, object, dict) -> str + + logging.info("Running query '{}'...".format(query)) + result = instance.query(query, stdin=stdin, settings=settings) + logging.info("Query finished") + + return result + + +@pytest.mark.parametrize("mode", AVAILABLE_MODES) +def test_delete_after_processing(started_cluster, mode): + prefix = "delete" + bucket = started_cluster.minio_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + + total_values = generate_random_files(5, prefix, started_cluster, bucket) + instance.query( + f""" + DROP TABLE IF EXISTS test.s3_queue; + CREATE TABLE test.s3_queue ({table_format}) + ENGINE = S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/test_delete_{mode}', + s3queue_loading_retries = 3, + after_processing='delete'; + """ + ) + + get_query = f"SELECT * FROM test.s3_queue" + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == total_values + minio = started_cluster.minio_client + objects = list(minio.list_objects(started_cluster.minio_bucket, recursive=True)) + assert len(objects) == 0 + + +@pytest.mark.parametrize("mode", AVAILABLE_MODES) +def test_failed_retry(started_cluster, mode): + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + + values = [ + ["failed", 1, 1], + ] + values_csv = ( + "\n".join((",".join(map(str, row)) for row in values)) + "\n" + ).encode() + filename = f"test.csv" + put_s3_file_content(started_cluster, bucket, filename, values_csv) + + instance.query( + f""" + DROP TABLE IF EXISTS test.s3_queue; + CREATE TABLE test.s3_queue ({table_format}) + ENGINE = S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/select_failed_retry_{mode}', + s3queue_loading_retries = 3; + """ + ) + + # first try + get_query = f"SELECT * FROM test.s3_queue" + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == [] + # second try + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == [] + # upload correct file + values = [ + [1, 1, 1], + ] + values_csv = ( + "\n".join((",".join(map(str, row)) for row in values)) + "\n" + ).encode() + put_s3_file_content(started_cluster, bucket, filename, values_csv) + + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == values + + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == [] + + +@pytest.mark.parametrize("mode", AVAILABLE_MODES) +def test_direct_select_file(started_cluster, mode): + auth = "'minio','minio123'," + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + values = [ + [12549, 2463, 19893], + [64021, 38652, 66703], + [81611, 39650, 83516], + ] + values_csv = ( + "\n".join((",".join(map(str, row)) for row in values)) + "\n" + ).encode() + filename = f"test.csv" + put_s3_file_content(started_cluster, bucket, filename, values_csv) + instance.query( + """ + DROP TABLE IF EXISTS test.s3_queue; + DROP TABLE IF EXISTS test.s3_queue_2; + DROP TABLE IF EXISTS test.s3_queue_3; + """ + ) + + instance.query( + f""" + CREATE TABLE test.s3_queue ({table_format}) + ENGINE = S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/*', {auth}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/select_{mode}' + """ + ) + + get_query = f"SELECT * FROM test.s3_queue" + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == values + + instance.query( + f""" + CREATE TABLE test.s3_queue_2 ({table_format}) + ENGINE = S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/*', {auth}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/select_{mode}' + """ + ) + + get_query = f"SELECT * FROM test.s3_queue" + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == [] + # New table with same zookeeper path + get_query = f"SELECT * FROM test.s3_queue_2" + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == [] + # New table with different zookeeper path + instance.query( + f""" + CREATE TABLE test.s3_queue_3 ({table_format}) + ENGINE = S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/*', {auth}'CSV') + SETTINGS + mode = '{mode}', + keeper_path='/clickhouse/select_{mode}_2' + """ + ) + get_query = f"SELECT * FROM test.s3_queue_3" + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == values + + values = [ + [1, 1, 1], + ] + values_csv = ( + "\n".join((",".join(map(str, row)) for row in values)) + "\n" + ).encode() + filename = f"t.csv" + put_s3_file_content(started_cluster, bucket, filename, values_csv) + + get_query = f"SELECT * FROM test.s3_queue_3" + if mode == "unordered": + assert [ + list(map(int, l.split())) + for l in run_query(instance, get_query).splitlines() + ] == values + elif mode == "ordered": + assert [ + list(map(int, l.split())) + for l in run_query(instance, get_query).splitlines() + ] == [] + + +@pytest.mark.parametrize("mode", AVAILABLE_MODES) +def test_direct_select_multiple_files(started_cluster, mode): + prefix = f"multiple_files_{mode}" + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + instance.query("drop table if exists test.s3_queue") + instance.query( + f""" + CREATE TABLE test.s3_queue ({table_format}) + ENGINE = S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/select_multiple_{mode}' + """ + ) + + for i in range(5): + rand_values = [[random.randint(0, 50) for _ in range(3)] for _ in range(10)] + + values_csv = ( + "\n".join((",".join(map(str, row)) for row in rand_values)) + "\n" + ).encode() + filename = f"{prefix}/test_{i}.csv" + put_s3_file_content(started_cluster, bucket, filename, values_csv) + + get_query = f"SELECT * FROM test.s3_queue" + assert [ + list(map(int, l.split())) + for l in run_query(instance, get_query).splitlines() + ] == rand_values + + total_values = generate_random_files( + 4, prefix, started_cluster, bucket, start_ind=5 + ) + get_query = f"SELECT * FROM test.s3_queue" + assert { + tuple(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + } == set([tuple(i) for i in total_values]) + + +@pytest.mark.parametrize("mode", AVAILABLE_MODES) +def test_streaming_to_view_(started_cluster, mode): + prefix = f"streaming_files_{mode}" + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + + total_values = generate_random_files(10, prefix, started_cluster, bucket) + instance.query( + f""" + DROP TABLE IF EXISTS test.s3_queue_persistent; + DROP TABLE IF EXISTS test.s3_queue; + DROP TABLE IF EXISTS test.persistent_s3_queue_mv; + + CREATE TABLE test.s3_queue_persistent ({table_format}) + ENGINE = MergeTree() + ORDER BY column1; + + CREATE TABLE test.s3_queue ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/view_{mode}'; + + CREATE MATERIALIZED VIEW test.persistent_s3_queue_mv TO test.s3_queue_persistent AS + SELECT + * + FROM test.s3_queue; + """ + ) + expected_values = set([tuple(i) for i in total_values]) + for i in range(10): + get_query = f"SELECT * FROM test.persistent_s3_queue_mv" + + selected_values = { + tuple(map(int, l.split())) + for l in run_query(instance, get_query).splitlines() + } + if selected_values != expected_values: + time.sleep(1) + else: + break + + assert selected_values == expected_values + + +@pytest.mark.parametrize("mode", AVAILABLE_MODES) +def test_streaming_to_many_views(started_cluster, mode): + prefix = f"streaming_files_{mode}" + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + retry_cnt = 10 + + instance.query( + f""" + DROP TABLE IF EXISTS test.s3_queue_persistent; + DROP TABLE IF EXISTS test.s3_queue_persistent_2; + DROP TABLE IF EXISTS test.s3_queue_persistent_3; + DROP TABLE IF EXISTS test.s3_queue; + DROP TABLE IF EXISTS test.persistent_s3_queue_mv; + DROP TABLE IF EXISTS test.persistent_s3_queue_mv_2; + DROP TABLE IF EXISTS test.persistent_s3_queue_mv_3; + + + CREATE TABLE test.s3_queue_persistent ({table_format}) + ENGINE = MergeTree() + ORDER BY column1; + + CREATE TABLE test.s3_queue_persistent_2 ({table_format}) + ENGINE = MergeTree() + ORDER BY column1; + + CREATE TABLE test.s3_queue_persistent_3 ({table_format}) + ENGINE = MergeTree() + ORDER BY column1; + + CREATE TABLE test.s3_queue ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/multiple_view_{mode}'; + + CREATE MATERIALIZED VIEW test.persistent_s3_queue_mv TO test.s3_queue_persistent AS + SELECT + * + FROM test.s3_queue; + + CREATE MATERIALIZED VIEW test.persistent_s3_queue_mv_2 TO test.s3_queue_persistent_2 AS + SELECT + * + FROM test.s3_queue; + + CREATE MATERIALIZED VIEW test.persistent_s3_queue_mv_3 TO test.s3_queue_persistent_3 AS + SELECT + * + FROM test.s3_queue; + """ + ) + total_values = generate_random_files(5, prefix, started_cluster, bucket) + expected_values = set([tuple(i) for i in total_values]) + + for i in range(retry_cnt): + retry = False + for get_query in [ + f"SELECT * FROM test.s3_queue_persistent", + f"SELECT * FROM test.s3_queue_persistent_2", + f"SELECT * FROM test.s3_queue_persistent_3", + ]: + selected_values = { + tuple(map(int, l.split())) + for l in run_query(instance, get_query).splitlines() + } + if i == retry_cnt - 1: + assert selected_values == expected_values + if selected_values != expected_values: + retry = True + break + if retry: + time.sleep(1) + else: + break + + +def test_multiple_tables_meta_mismatch(started_cluster): + prefix = f"test_meta" + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + + instance.query( + f""" + DROP TABLE IF EXISTS test.s3_queue; + + CREATE TABLE test.s3_queue ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = 'ordered', + keeper_path = '/clickhouse/test_meta'; + """ + ) + # check mode + failed = False + try: + instance.query( + f""" + CREATE TABLE test.s3_queue_copy ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = 'unordered', + keeper_path = '/clickhouse/test_meta'; + """ + ) + except QueryRuntimeException as e: + assert "Existing table metadata in ZooKeeper differs in engine mode" in str(e) + failed = True + assert failed is True + + # check columns + table_format_copy = table_format + ", column4 UInt32" + try: + instance.query( + f""" + CREATE TABLE test.s3_queue_copy ({table_format_copy}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = 'ordered', + keeper_path = '/clickhouse/test_meta'; + """ + ) + except QueryRuntimeException as e: + assert ( + "Table columns structure in ZooKeeper is different from local table structure" + in str(e) + ) + failed = True + + assert failed is True + + # check format + try: + instance.query( + f""" + CREATE TABLE test.s3_queue_copy ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'TSV') + SETTINGS + mode = 'ordered', + keeper_path = '/clickhouse/test_meta'; + """ + ) + except QueryRuntimeException as e: + assert "Existing table metadata in ZooKeeper differs in format name" in str(e) + failed = True + assert failed is True + + # create working engine + instance.query( + f""" + CREATE TABLE test.s3_queue_copy ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = 'ordered', + keeper_path = '/clickhouse/test_meta'; + """ + ) + + +def test_max_set_age(started_cluster): + files_to_generate = 10 + max_age = 1 + prefix = f"test_multiple" + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + + instance.query( + f""" + DROP TABLE IF EXISTS test.s3_queue; + + CREATE TABLE test.s3_queue ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = 'unordered', + keeper_path = '/clickhouse/test_set_age', + s3queue_tracked_files_limit = 10, + s3queue_tracked_file_ttl_sec = {max_age}; + """ + ) + + total_values = generate_random_files( + files_to_generate, prefix, started_cluster, bucket, row_num=1 + ) + get_query = f"SELECT * FROM test.s3_queue" + res1 = [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] + assert res1 == total_values + time.sleep(max_age + 1) + + get_query = f"SELECT * FROM test.s3_queue" + res1 = [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] + assert res1 == total_values + + +@pytest.mark.parametrize("mode", AVAILABLE_MODES) +def test_multiple_tables_streaming_sync(started_cluster, mode): + files_to_generate = 300 + poll_size = 30 + prefix = f"test_multiple_{mode}" + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + + instance.query( + f""" + DROP TABLE IF EXISTS test.s3_queue; + DROP TABLE IF EXISTS test.s3_queue_copy; + DROP TABLE IF EXISTS test.s3_queue_copy_2; + + DROP TABLE IF EXISTS test.s3_queue_persistent; + DROP TABLE IF EXISTS test.s3_queue_persistent_copy; + DROP TABLE IF EXISTS test.s3_queue_persistent_copy_2; + + DROP TABLE IF EXISTS test.persistent_s3_queue_mv; + DROP TABLE IF EXISTS test.persistent_s3_queue_mv_copy; + DROP TABLE IF EXISTS test.persistent_s3_queue_mv_copy_2; + + CREATE TABLE test.s3_queue ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/test_multiple_consumers_sync_{mode}', + s3queue_polling_size = {poll_size}; + + CREATE TABLE test.s3_queue_copy ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/test_multiple_consumers_sync_{mode}', + s3queue_polling_size = {poll_size}; + + CREATE TABLE test.s3_queue_copy_2 ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/test_multiple_consumers_sync_{mode}', + s3queue_polling_size = {poll_size}; + + CREATE TABLE test.s3_queue_persistent ({table_format}) + ENGINE = MergeTree() + ORDER BY column1; + + CREATE TABLE test.s3_queue_persistent_copy ({table_format}) + ENGINE = MergeTree() + ORDER BY column1; + + CREATE TABLE test.s3_queue_persistent_copy_2 ({table_format}) + ENGINE = MergeTree() + ORDER BY column1; + + CREATE MATERIALIZED VIEW test.persistent_s3_queue_mv TO test.s3_queue_persistent AS + SELECT + * + FROM test.s3_queue; + + CREATE MATERIALIZED VIEW test.persistent_s3_queue_mv_copy TO test.s3_queue_persistent_copy AS + SELECT + * + FROM test.s3_queue_copy; + + CREATE MATERIALIZED VIEW test.persistent_s3_queue_mv_copy_2 TO test.s3_queue_persistent_copy_2 AS + SELECT + * + FROM test.s3_queue_copy_2; + """ + ) + total_values = generate_random_files( + files_to_generate, prefix, started_cluster, bucket, row_num=1 + ) + + def get_count(table_name): + return int(run_query(instance, f"SELECT count() FROM {table_name}")) + + for _ in range(100): + if ( + get_count("test.s3_queue_persistent") + + get_count("test.s3_queue_persistent_copy") + + get_count("test.s3_queue_persistent_copy_2") + ) == files_to_generate: + break + time.sleep(1) + + get_query = f"SELECT * FROM test.s3_queue_persistent" + res1 = [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] + get_query_copy = f"SELECT * FROM test.s3_queue_persistent_copy" + res2 = [ + list(map(int, l.split())) + for l in run_query(instance, get_query_copy).splitlines() + ] + get_query_copy_2 = f"SELECT * FROM test.s3_queue_persistent_copy_2" + res3 = [ + list(map(int, l.split())) + for l in run_query(instance, get_query_copy_2).splitlines() + ] + assert {tuple(v) for v in res1 + res2 + res3} == set( + [tuple(i) for i in total_values] + ) + + # Checking that all files were processed only once + time.sleep(10) + assert ( + get_count("test.s3_queue_persistent") + + get_count("test.s3_queue_persistent_copy") + + get_count("test.s3_queue_persistent_copy_2") + ) == files_to_generate + + +@pytest.mark.parametrize("mode", AVAILABLE_MODES) +def test_multiple_tables_streaming_sync_distributed(started_cluster, mode): + files_to_generate = 100 + poll_size = 2 + prefix = f"test_multiple_{mode}" + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + instance_2 = started_cluster.instances["instance2"] + + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + for inst in [instance, instance_2]: + inst.query( + f""" + DROP TABLE IF EXISTS test.s3_queue; + DROP TABLE IF EXISTS test.s3_queue_persistent; + DROP TABLE IF EXISTS test.persistent_s3_queue_mv; + + CREATE TABLE test.s3_queue ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/test_multiple_consumers_{mode}', + s3queue_polling_size = {poll_size}; + + CREATE TABLE test.s3_queue_persistent ({table_format}) + ENGINE = MergeTree() + ORDER BY column1; + """ + ) + + for inst in [instance, instance_2]: + inst.query( + f""" + CREATE MATERIALIZED VIEW test.persistent_s3_queue_mv TO test.s3_queue_persistent AS + SELECT + * + FROM test.s3_queue; + """ + ) + + total_values = generate_random_files( + files_to_generate, prefix, started_cluster, bucket, row_num=1 + ) + + def get_count(node, table_name): + return int(run_query(node, f"SELECT count() FROM {table_name}")) + + for _ in range(150): + if ( + get_count(instance, "test.s3_queue_persistent") + + get_count(instance_2, "test.s3_queue_persistent") + ) == files_to_generate: + break + time.sleep(1) + + get_query = f"SELECT * FROM test.s3_queue_persistent" + res1 = [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] + res2 = [ + list(map(int, l.split())) for l in run_query(instance_2, get_query).splitlines() + ] + + assert len(res1) + len(res2) == files_to_generate + + # Checking that all engines have made progress + assert len(res1) > 0 + assert len(res2) > 0 + + assert {tuple(v) for v in res1 + res2} == set([tuple(i) for i in total_values]) + + # Checking that all files were processed only once + time.sleep(10) + assert ( + get_count(instance, "test.s3_queue_persistent") + + get_count(instance_2, "test.s3_queue_persistent") + ) == files_to_generate + + +def test_max_set_size(started_cluster): + files_to_generate = 10 + prefix = f"test_multiple" + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + + instance.query( + f""" + DROP TABLE IF EXISTS test.s3_queue; + + CREATE TABLE test.s3_queue ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = 'unordered', + keeper_path = '/clickhouse/test_set_size', + s3queue_tracked_files_limit = {files_to_generate - 1}; + """ + ) + + total_values = generate_random_files( + files_to_generate, prefix, started_cluster, bucket, start_ind=0, row_num=1 + ) + get_query = f"SELECT * FROM test.s3_queue" + res1 = [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] + assert res1 == total_values + + get_query = f"SELECT * FROM test.s3_queue" + res1 = [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] + assert res1 == [total_values[0]] + + get_query = f"SELECT * FROM test.s3_queue" + res1 = [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] + assert res1 == [total_values[1]] diff --git a/tests/integration/test_storage_url/test.py b/tests/integration/test_storage_url/test.py index 7f359078967..7ff7a871413 100644 --- a/tests/integration/test_storage_url/test.py +++ b/tests/integration/test_storage_url/test.py @@ -68,13 +68,13 @@ def test_url_cluster_with_named_collection(): def test_table_function_url_access_rights(): node1.query("CREATE USER OR REPLACE u1") - expected_error = "necessary to have grant CREATE TEMPORARY TABLE, URL ON *.*" + expected_error = "necessary to have the grant CREATE TEMPORARY TABLE, URL ON *.*" assert expected_error in node1.query_and_get_error( f"SELECT * FROM url('http://nginx:80/test_1', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')", user="u1", ) - expected_error = "necessary to have grant CREATE TEMPORARY TABLE, URL ON *.*" + expected_error = "necessary to have the grant CREATE TEMPORARY TABLE, URL ON *.*" assert expected_error in node1.query_and_get_error( f"SELECT * FROM url('http://nginx:80/test_1', 'TSV')", user="u1" ) @@ -89,7 +89,7 @@ def test_table_function_url_access_rights(): user="u1", ) == TSV([["column1", "UInt32"], ["column2", "UInt32"], ["column3", "UInt32"]]) - expected_error = "necessary to have grant URL ON *.*" + expected_error = "necessary to have the grant URL ON *.*" assert expected_error in node1.query_and_get_error( f"DESCRIBE TABLE url('http://nginx:80/test_1', 'TSV')", user="u1" ) diff --git a/tests/integration/test_system_flush_logs/test.py b/tests/integration/test_system_flush_logs/test.py index bf225ac30f8..084d342d736 100644 --- a/tests/integration/test_system_flush_logs/test.py +++ b/tests/integration/test_system_flush_logs/test.py @@ -45,7 +45,11 @@ def test_system_logs(flush_logs, table, exists): if exists: node.query(q) else: - assert "Table {} doesn't exist".format(table) in node.query_and_get_error(q) + response = node.query_and_get_error(q) + assert ( + "Table {} does not exist".format(table) in response + or "Unknown table expression identifier '{}'".format(table) in response + ) # Logic is tricky, let's check that there is no hang in case of message queue diff --git a/tests/integration/test_system_start_stop_listen/test.py b/tests/integration/test_system_start_stop_listen/test.py index 1925685af03..8a3081e0c15 100644 --- a/tests/integration/test_system_start_stop_listen/test.py +++ b/tests/integration/test_system_start_stop_listen/test.py @@ -143,3 +143,73 @@ def test_all_protocols(started_cluster): backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES ALL") assert_everything_works() + + +def test_except(started_cluster): + custom_client = Client(main_node.ip_address, 9001, command=cluster.client_bin_path) + assert_everything_works() + + # STOP LISTEN QUERIES ALL EXCEPT + main_node.query("SYSTEM STOP LISTEN QUERIES ALL EXCEPT MYSQL, CUSTOM 'tcp'") + assert "Connection refused" in main_node.query_and_get_error(QUERY) + custom_client.query(MYSQL_QUERY) + assert http_works() == False + assert http_works(8124) == False + + # START LISTEN QUERIES ALL EXCEPT + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES ALL EXCEPT TCP") + assert "Connection refused" in main_node.query_and_get_error(QUERY) + custom_client.query(MYSQL_QUERY) + assert http_works() == True + assert http_works(8124) == True + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES ALL") + + assert_everything_works() + + # STOP LISTEN QUERIES DEFAULT EXCEPT + main_node.query("SYSTEM STOP LISTEN QUERIES DEFAULT EXCEPT TCP") + main_node.query(QUERY) + assert "Connections to mysql failed" in custom_client.query_and_get_error( + MYSQL_QUERY + ) + custom_client.query(QUERY) + assert http_works() == False + assert http_works(8124) == True + + # START LISTEN QUERIES DEFAULT EXCEPT + backup_node.query( + "SYSTEM START LISTEN ON CLUSTER default QUERIES DEFAULT EXCEPT HTTP" + ) + main_node.query(QUERY) + main_node.query(MYSQL_QUERY) + custom_client.query(QUERY) + assert http_works() == False + assert http_works(8124) == True + + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES ALL") + + assert_everything_works() + + # STOP LISTEN QUERIES CUSTOM EXCEPT + main_node.query("SYSTEM STOP LISTEN QUERIES CUSTOM EXCEPT CUSTOM 'tcp'") + main_node.query(QUERY) + custom_client.query(MYSQL_QUERY) + custom_client.query(QUERY) + assert http_works() == True + assert http_works(8124) == False + + main_node.query("SYSTEM STOP LISTEN QUERIES CUSTOM") + + # START LISTEN QUERIES DEFAULT EXCEPT + backup_node.query( + "SYSTEM START LISTEN ON CLUSTER default QUERIES CUSTOM EXCEPT CUSTOM 'tcp'" + ) + main_node.query(QUERY) + main_node.query(MYSQL_QUERY) + assert "Connection refused" in custom_client.query_and_get_error(QUERY) + assert http_works() == True + assert http_works(8124) == True + + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES ALL") + + assert_everything_works() diff --git a/tests/integration/test_table_functions_access_rights/test.py b/tests/integration/test_table_functions_access_rights/test.py index 09a05122c07..b1d1a291bc5 100644 --- a/tests/integration/test_table_functions_access_rights/test.py +++ b/tests/integration/test_table_functions_access_rights/test.py @@ -40,7 +40,7 @@ def test_merge(): instance.query("CREATE USER A") assert ( - "it's necessary to have grant CREATE TEMPORARY TABLE ON *.*" + "it's necessary to have the grant CREATE TEMPORARY TABLE ON *.*" in instance.query_and_get_error(select_query, user="A") ) @@ -62,7 +62,7 @@ def test_merge(): instance.query("GRANT SELECT ON default.table1 TO A") instance.query("GRANT INSERT ON default.table2 TO A") assert ( - "it's necessary to have grant SELECT ON default.table2" + "it's necessary to have the grant SELECT ON default.table2" in instance.query_and_get_error(select_query, user="A") ) diff --git a/tests/integration/test_temporary_data_in_cache/test.py b/tests/integration/test_temporary_data_in_cache/test.py index e6142c7eef1..ed06a70cf5a 100644 --- a/tests/integration/test_temporary_data_in_cache/test.py +++ b/tests/integration/test_temporary_data_in_cache/test.py @@ -2,6 +2,7 @@ # pylint: disable=redefined-outer-name import pytest +import fnmatch from helpers.cluster import ClickHouseCluster from helpers.client import QueryRuntimeException @@ -68,7 +69,9 @@ def test_cache_evicted_by_temporary_data(start_cluster): "max_bytes_before_external_sort": "4M", }, ) - assert "Failed to reserve space for the file cache" in str(exc.value) + assert fnmatch.fnmatch( + str(exc.value), "*Failed to reserve * for temporary file*" + ), exc.value # Some data evicted from cache by temporary data cache_size_after_eviction = get_cache_size() @@ -104,6 +107,8 @@ def test_cache_evicted_by_temporary_data(start_cluster): "SELECT randomPrintableASCII(1024) FROM numbers(32 * 1024) FORMAT TSV", params={"buffer_size": 0, "wait_end_of_query": 1}, ) - assert "Failed to reserve space for the file cache" in str(exc.value) + assert fnmatch.fnmatch( + str(exc.value), "*Failed to reserve * for temporary file*" + ), exc.value q("DROP TABLE IF EXISTS t1") diff --git a/tests/integration/test_throttling/test.py b/tests/integration/test_throttling/test.py index 62640394a85..a27bb472ea8 100644 --- a/tests/integration/test_throttling/test.py +++ b/tests/integration/test_throttling/test.py @@ -117,7 +117,8 @@ def assert_took(took, should_took): # we need to decrease the lower limit because the server limits could # be enforced by throttling some server background IO instead of query IO # and we have no control over it - assert took >= should_took[0] * 0.85 and took < should_took[1] + # Note that throttler does not apply any restrictions on upper bound, so we can only tell how much time required "at least", not "at most" + assert took >= should_took * 0.85 @pytest.mark.parametrize( @@ -132,7 +133,7 @@ def assert_took(took, should_took): None, None, None, - (0, 3), + 0, id="no_local_throttling", ), # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds @@ -142,7 +143,7 @@ def assert_took(took, should_took): "user", "max_backup_bandwidth", "1M", - (7, 14), + 7, id="user_local_throttling", ), # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds @@ -152,7 +153,7 @@ def assert_took(took, should_took): "server", "max_backup_bandwidth_for_server", "2M", - (3, 7), + 3, id="server_local_throttling", ), # @@ -164,7 +165,7 @@ def assert_took(took, should_took): None, None, None, - (0, 3), + 0, id="no_remote_to_local_throttling", ), # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds @@ -174,7 +175,7 @@ def assert_took(took, should_took): "user", "max_backup_bandwidth", "1M", - (7, 14), + 7, id="user_remote_to_local_throttling", ), # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds @@ -184,7 +185,7 @@ def assert_took(took, should_took): "server", "max_backup_bandwidth_for_server", "2M", - (3, 7), + 3, id="server_remote_to_local_throttling", ), # @@ -196,7 +197,7 @@ def assert_took(took, should_took): None, None, None, - (0, 3), + 0, id="no_remote_to_remote_throttling", ), # No throttling for S3-to-S3, uses native copy @@ -206,7 +207,7 @@ def assert_took(took, should_took): "user", "max_backup_bandwidth", "1M", - (0, 3), + 0, id="user_remote_to_remote_throttling", ), # No throttling for S3-to-S3, uses native copy @@ -216,7 +217,7 @@ def assert_took(took, should_took): "server", "max_backup_bandwidth_for_server", "2M", - (0, 3), + 0, id="server_remote_to_remote_throttling", ), # @@ -233,7 +234,7 @@ def assert_took(took, should_took): None, None, None, - (0, 3), + 0, id="no_local_to_remote_throttling", ), # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds, but for S3Client it is 2x more @@ -243,7 +244,7 @@ def assert_took(took, should_took): "user", "max_backup_bandwidth", "1M", - (7 * 3, 7 * 4 - 1), + 7 * 3, id="user_local_to_remote_throttling", ), # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds, but for S3Client it is 2x more @@ -253,7 +254,7 @@ def assert_took(took, should_took): "server", "max_backup_bandwidth_for_server", "2M", - (3 * 3, 3 * 5), + 3 * 3, id="server_local_to_remote_throttling", ), ], @@ -290,7 +291,7 @@ def test_backup_throttling_override(): }, ) # reading 1e6*8 bytes with 500Ki default bandwith should take (8-0.5)/0.5=15 seconds - assert_took(took, (15, 20)) + assert_took(took, 15) @pytest.mark.parametrize( @@ -299,14 +300,14 @@ def test_backup_throttling_override(): # # Local # - pytest.param("default", None, None, None, (0, 3), id="no_local_throttling"), + pytest.param("default", None, None, None, 0, id="no_local_throttling"), # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds pytest.param( "default", "user", "max_local_read_bandwidth", "1M", - (7, 14), + 7, id="user_local_throttling", ), # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds @@ -315,20 +316,20 @@ def test_backup_throttling_override(): "server", "max_local_read_bandwidth_for_server", "2M", - (3, 7), + 3, id="server_local_throttling", ), # # Remote # - pytest.param("s3", None, None, None, (0, 3), id="no_remote_throttling"), + pytest.param("s3", None, None, None, 0, id="no_remote_throttling"), # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds pytest.param( "s3", "user", "max_remote_read_network_bandwidth", "1M", - (7, 14), + 7, id="user_remote_throttling", ), # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds @@ -337,7 +338,7 @@ def test_backup_throttling_override(): "server", "max_remote_read_network_bandwidth_for_server", "2M", - (3, 7), + 3, id="server_remote_throttling", ), ], @@ -361,14 +362,14 @@ def test_read_throttling(policy, mode, setting, value, should_took): # # Local # - pytest.param("default", None, None, None, (0, 3), id="no_local_throttling"), + pytest.param("default", None, None, None, 0, id="no_local_throttling"), # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds pytest.param( "default", "user", "max_local_write_bandwidth", "1M", - (7, 14), + 7, id="local_user_throttling", ), # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds @@ -377,29 +378,29 @@ def test_read_throttling(policy, mode, setting, value, should_took): "server", "max_local_write_bandwidth_for_server", "2M", - (3, 7), + 3, id="local_server_throttling", ), # # Remote # - pytest.param("s3", None, None, None, (0, 3), id="no_remote_throttling"), - # writeing 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds + pytest.param("s3", None, None, None, 0, id="no_remote_throttling"), + # writing 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds pytest.param( "s3", "user", "max_remote_write_network_bandwidth", "1M", - (7, 14), + 7, id="user_remote_throttling", ), - # writeing 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds + # writing 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds pytest.param( "s3", "server", "max_remote_write_network_bandwidth_for_server", "2M", - (3, 7), + 3, id="server_remote_throttling", ), ], diff --git a/tests/integration/test_transactions/test.py b/tests/integration/test_transactions/test.py index a12d30915dd..46660581223 100644 --- a/tests/integration/test_transactions/test.py +++ b/tests/integration/test_transactions/test.py @@ -105,8 +105,6 @@ def test_rollback_unfinished_on_restart1(start_cluster): "0_4_4_0_7\t0\ttid3\tcsn18446744073709551615_\ttid0\tcsn0_\n" "0_8_8_0\t0\ttid5\tcsn18446744073709551615_\ttid0\tcsn0_\n" "1_1_1_0\t0\ttid0\tcsn1_\ttid1\tcsn_1\n" - "1_1_1_1\t1\ttid1\tcsn_1\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" - "1_1_1_1_7\t0\ttid3\tcsn18446744073709551615_\ttid0\tcsn0_\n" "1_3_3_0\t1\ttid2\tcsn_2\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" "1_3_3_0_7\t0\ttid3\tcsn18446744073709551615_\ttid0\tcsn0_\n" "1_5_5_0\t1\ttid6\tcsn_6\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" @@ -194,6 +192,5 @@ def test_rollback_unfinished_on_restart2(start_cluster): "0_4_4_0\t1\ttid2\tcsn_2\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" "0_5_5_0\t0\ttid5\tcsn18446744073709551615_\ttid0\tcsn0_\n" "1_1_1_0\t0\ttid0\tcsn1_\ttid1\tcsn_1\n" - "1_1_1_1\t1\ttid1\tcsn_1\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" "1_3_3_0\t1\ttid2\tcsn_2\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" ) diff --git a/tests/integration/test_truncate_database/__init__.py b/tests/integration/test_truncate_database/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_truncate_database/configs/distributed_servers.xml b/tests/integration/test_truncate_database/configs/distributed_servers.xml new file mode 100644 index 00000000000..68b420f36b4 --- /dev/null +++ b/tests/integration/test_truncate_database/configs/distributed_servers.xml @@ -0,0 +1,18 @@ + + + + + + node1 + 9000 + + + + + node2 + 9000 + + + + + diff --git a/tests/integration/test_truncate_database/configs/replicated_servers.xml b/tests/integration/test_truncate_database/configs/replicated_servers.xml new file mode 100644 index 00000000000..8e318d385c3 --- /dev/null +++ b/tests/integration/test_truncate_database/configs/replicated_servers.xml @@ -0,0 +1,21 @@ + + + + + true + + node1 + 9000 + + + node2 + 9000 + + + node3 + 9000 + + + + + diff --git a/tests/integration/test_truncate_database/test_distributed.py b/tests/integration/test_truncate_database/test_distributed.py new file mode 100644 index 00000000000..fed3d16d190 --- /dev/null +++ b/tests/integration/test_truncate_database/test_distributed.py @@ -0,0 +1,53 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance( + "node1", main_configs=["configs/distributed_servers.xml"], with_zookeeper=True +) +node2 = cluster.add_instance( + "node2", with_zookeeper=True, main_configs=["configs/distributed_servers.xml"] +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + for node in (node1, node2): + node.query( + """ + CREATE DATABASE test; + CREATE TABLE test.local_table(id UInt32, val String) ENGINE = MergeTree ORDER BY id + """ + ) + + node1.query("INSERT INTO test.local_table VALUES (1, 'node1')") + node2.query("INSERT INTO test.local_table VALUES (2, 'node2')") + + node1.query( + "CREATE TABLE test.distributed(id UInt32, val String) ENGINE = Distributed(test_cluster, test, local_table)" + ) + node2.query( + "CREATE TABLE test.distributed(id UInt32, val String) ENGINE = Distributed(test_cluster, test, local_table)" + ) + + yield cluster + + finally: + cluster.shutdown() + + +def test_truncate_database_distributed(started_cluster): + query1 = "SELECT count() FROM test.distributed WHERE (id, val) IN ((1, 'node1'), (2, 'a'), (3, 'b'))" + query2 = "SELECT sum((id, val) IN ((1, 'node1'), (2, 'a'), (3, 'b'))) FROM test.distributed" + assert node1.query(query1) == "1\n" + assert node1.query(query2) == "1\n" + assert node2.query(query1) == "1\n" + assert node2.query(query2) == "1\n" + assert node2.query("SHOW DATABASES LIKE 'test'") == "test\n" + node1.query("TRUNCATE DATABASE test ON CLUSTER test_cluster SYNC") + assert node2.query("SHOW TABLES FROM test") == "" diff --git a/tests/integration/test_truncate_database/test_replicated.py b/tests/integration/test_truncate_database/test_replicated.py new file mode 100644 index 00000000000..59830d44378 --- /dev/null +++ b/tests/integration/test_truncate_database/test_replicated.py @@ -0,0 +1,56 @@ +import time + +import pytest +from helpers.cluster import ClickHouseCluster + + +def fill_nodes(nodes, shard): + for node in nodes: + node.query( + """ + CREATE DATABASE test; + + CREATE TABLE test.test_table(date Date, id UInt32) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/{shard}/replicated/test_table', '{replica}') ORDER BY id PARTITION BY toYYYYMM(date); + """.format( + shard=shard, replica=node.name + ) + ) + + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance( + "node1", with_zookeeper=True, main_configs=["configs/replicated_servers.xml"] +) +node2 = cluster.add_instance( + "node2", with_zookeeper=True, main_configs=["configs/replicated_servers.xml"] +) +node3 = cluster.add_instance( + "node3", with_zookeeper=True, main_configs=["configs/replicated_servers.xml"] +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + + fill_nodes([node1, node2, node3], 1) + + yield cluster + + except Exception as ex: + print(ex) + + finally: + cluster.shutdown() + + +def test_truncate_database_replicated(start_cluster): + node1.query( + "INSERT INTO test.test_table SELECT number, toString(number) FROM numbers(100)" + ) + assert node2.query("SELECT id FROM test.test_table LIMIT 1") == "0\n" + assert node3.query("SHOW DATABASES LIKE 'test'") == "test\n" + node3.query("TRUNCATE DATABASE test ON CLUSTER test_cluster SYNC") + assert node2.query("SHOW TABLES FROM test") == "" diff --git a/tests/integration/test_ttl_move/test.py b/tests/integration/test_ttl_move/test.py index a2f28e21666..c1c076277bb 100644 --- a/tests/integration/test_ttl_move/test.py +++ b/tests/integration/test_ttl_move/test.py @@ -1529,106 +1529,6 @@ def test_concurrent_alter_with_ttl_move(started_cluster, name, engine): node1.query("DROP TABLE IF EXISTS {name} SYNC".format(name=name)) -@pytest.mark.skip(reason="Flacky test") -@pytest.mark.parametrize( - "name,positive", - [ - pytest.param("test_double_move_while_select_negative", 0, id="negative"), - pytest.param("test_double_move_while_select_positive", 1, id="positive"), - ], -) -def test_double_move_while_select(started_cluster, name, positive): - name = unique_table_name(name) - - try: - node1.query( - """ - CREATE TABLE {name} ( - n Int64, - s String - ) ENGINE = MergeTree - ORDER BY tuple() - PARTITION BY n - SETTINGS storage_policy='small_jbod_with_external',temporary_directories_lifetime=1 - """.format( - name=name - ) - ) - - node1.query( - "INSERT INTO {name} VALUES (1, randomPrintableASCII(10*1024*1024))".format( - name=name - ) - ) - - parts = node1.query( - "SELECT name FROM system.parts WHERE table = '{name}' AND active = 1".format( - name=name - ) - ).splitlines() - assert len(parts) == 1 - - node1.query( - "ALTER TABLE {name} MOVE PART '{part}' TO DISK 'external'".format( - name=name, part=parts[0] - ) - ) - - def long_select(): - if positive: - node1.query( - "SELECT sleep(3), sleep(2), sleep(1), n FROM {name}".format( - name=name - ) - ) - - thread = threading.Thread(target=long_select) - thread.start() - - time.sleep(1) - - node1.query( - "ALTER TABLE {name} MOVE PART '{part}' TO DISK 'jbod1'".format( - name=name, part=parts[0] - ) - ) - - # Fill jbod1 to force ClickHouse to make move of partition 1 to external. - node1.query( - "INSERT INTO {name} VALUES (2, randomPrintableASCII(9*1024*1024))".format( - name=name - ) - ) - node1.query( - "INSERT INTO {name} VALUES (3, randomPrintableASCII(9*1024*1024))".format( - name=name - ) - ) - node1.query( - "INSERT INTO {name} VALUES (4, randomPrintableASCII(9*1024*1024))".format( - name=name - ) - ) - - wait_parts_mover(node1, name, retry_count=40) - - # If SELECT locked old part on external, move shall fail. - assert node1.query( - "SELECT disk_name FROM system.parts WHERE table = '{name}' AND active = 1 AND name = '{part}'".format( - name=name, part=parts[0] - ) - ).splitlines() == ["jbod1" if positive else "external"] - - thread.join() - - assert node1.query( - "SELECT n FROM {name} ORDER BY n".format(name=name) - ).splitlines() == ["1", "2", "3", "4"] - - finally: - node1.query("DROP TABLE IF EXISTS {name} SYNC".format(name=name)) - - @pytest.mark.parametrize( "name,engine,positive", [ diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py index 3b031569b8a..117ebe37dd2 100644 --- a/tests/integration/test_ttl_replicated/test.py +++ b/tests/integration/test_ttl_replicated/test.py @@ -23,6 +23,7 @@ node4 = cluster.add_instance( main_configs=[ "configs/compat.xml", ], + allow_analyzer=False, ) node5 = cluster.add_instance( @@ -35,6 +36,7 @@ node5 = cluster.add_instance( main_configs=[ "configs/compat.xml", ], + allow_analyzer=False, ) node6 = cluster.add_instance( "node6", @@ -46,6 +48,7 @@ node6 = cluster.add_instance( main_configs=[ "configs/compat.xml", ], + allow_analyzer=False, ) diff --git a/tests/integration/test_user_directories/test.py b/tests/integration/test_user_directories/test.py index 45afb86f464..704fb30b2fd 100644 --- a/tests/integration/test_user_directories/test.py +++ b/tests/integration/test_user_directories/test.py @@ -38,14 +38,14 @@ def test_old_style(): assert node.query("SELECT * FROM system.user_directories") == TSV( [ [ - "users.xml", - "users.xml", + "users_xml", + "users_xml", '{"path":"\\\\/etc\\\\/clickhouse-server\\\\/users2.xml"}', 1, ], [ - "local directory", - "local directory", + "local_directory", + "local_directory", '{"path":"\\\\/var\\\\/lib\\\\/clickhouse\\\\/access2\\\\/"}', 2, ], @@ -62,20 +62,20 @@ def test_local_directories(): assert node.query("SELECT * FROM system.user_directories") == TSV( [ [ - "users.xml", - "users.xml", + "users_xml", + "users_xml", '{"path":"\\\\/etc\\\\/clickhouse-server\\\\/users3.xml"}', 1, ], [ - "local directory", - "local directory", + "local_directory", + "local_directory", '{"path":"\\\\/var\\\\/lib\\\\/clickhouse\\\\/access3\\\\/"}', 2, ], [ "local directory (ro)", - "local directory", + "local_directory", '{"path":"\\\\/var\\\\/lib\\\\/clickhouse\\\\/access3-ro\\\\/","readonly":true}', 3, ], @@ -92,8 +92,8 @@ def test_relative_path(): assert node.query("SELECT * FROM system.user_directories") == TSV( [ [ - "users.xml", - "users.xml", + "users_xml", + "users_xml", '{"path":"\\\\/etc\\\\/clickhouse-server\\\\/users4.xml"}', 1, ] @@ -110,8 +110,8 @@ def test_memory(): assert node.query("SELECT * FROM system.user_directories") == TSV( [ [ - "users.xml", - "users.xml", + "users_xml", + "users_xml", '{"path":"\\\\/etc\\\\/clickhouse-server\\\\/users5.xml"}', 1, ], @@ -129,20 +129,20 @@ def test_mixed_style(): assert node.query("SELECT * FROM system.user_directories") == TSV( [ [ - "users.xml", - "users.xml", + "users_xml", + "users_xml", '{"path":"\\\\/etc\\\\/clickhouse-server\\\\/users6.xml"}', 1, ], [ - "local directory", - "local directory", + "local_directory", + "local_directory", '{"path":"\\\\/var\\\\/lib\\\\/clickhouse\\\\/access6\\\\/"}', 2, ], [ - "local directory", - "local directory", + "local_directory", + "local_directory", '{"path":"\\\\/var\\\\/lib\\\\/clickhouse\\\\/access6a\\\\/"}', 3, ], @@ -160,14 +160,14 @@ def test_duplicates(): assert node.query("SELECT * FROM system.user_directories") == TSV( [ [ - "users.xml", - "users.xml", + "users_xml", + "users_xml", '{"path":"\\\\/etc\\\\/clickhouse-server\\\\/users7.xml"}', 1, ], [ - "local directory", - "local directory", + "local_directory", + "local_directory", '{"path":"\\\\/var\\\\/lib\\\\/clickhouse\\\\/access7\\\\/"}', 2, ], diff --git a/tests/integration/test_version_update/test.py b/tests/integration/test_version_update/test.py index 3332fe69e86..b8fa3e7ebb4 100644 --- a/tests/integration/test_version_update/test.py +++ b/tests/integration/test_version_update/test.py @@ -15,6 +15,7 @@ node2 = cluster.add_instance( tag="21.2", with_installed_binary=True, stay_alive=True, + allow_analyzer=False, ) # Use differents nodes because if there is node.restart_from_latest_version(), then in later tests @@ -25,6 +26,7 @@ node3 = cluster.add_instance( tag="21.5", with_installed_binary=True, stay_alive=True, + allow_analyzer=False, ) node4 = cluster.add_instance( "node4", @@ -32,6 +34,7 @@ node4 = cluster.add_instance( tag="21.5", with_installed_binary=True, stay_alive=True, + allow_analyzer=False, ) node5 = cluster.add_instance( "node5", @@ -39,6 +42,7 @@ node5 = cluster.add_instance( tag="21.5", with_installed_binary=True, stay_alive=True, + allow_analyzer=False, ) node6 = cluster.add_instance( "node6", @@ -46,6 +50,7 @@ node6 = cluster.add_instance( tag="21.5", with_installed_binary=True, stay_alive=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_version_update_after_mutation/configs/force_remove_data_recursively_on_drop.xml b/tests/integration/test_version_update_after_mutation/configs/force_remove_data_recursively_on_drop.xml deleted file mode 100644 index 7a00648b28e..00000000000 --- a/tests/integration/test_version_update_after_mutation/configs/force_remove_data_recursively_on_drop.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - 1 - - - diff --git a/tests/integration/test_version_update_after_mutation/test.py b/tests/integration/test_version_update_after_mutation/test.py index 416220c93c3..f3ae190ee46 100644 --- a/tests/integration/test_version_update_after_mutation/test.py +++ b/tests/integration/test_version_update_after_mutation/test.py @@ -16,6 +16,7 @@ node1 = cluster.add_instance( main_configs=[ "configs/compat.xml", ], + allow_analyzer=False, ) node2 = cluster.add_instance( "node2", @@ -27,6 +28,7 @@ node2 = cluster.add_instance( main_configs=[ "configs/compat.xml", ], + allow_analyzer=False, ) node3 = cluster.add_instance( "node3", @@ -38,6 +40,7 @@ node3 = cluster.add_instance( main_configs=[ "configs/compat.xml", ], + allow_analyzer=False, ) @@ -51,12 +54,6 @@ def start_cluster(): cluster.shutdown() -def restart_node(node): - # set force_remove_data_recursively_on_drop (cannot be done before, because the version is too old) - node.put_users_config("configs/force_remove_data_recursively_on_drop.xml") - node.restart_with_latest_version(signal=9, fix_metadata=True) - - def test_mutate_and_upgrade(start_cluster): for node in [node1, node2]: node.query("DROP TABLE IF EXISTS mt") @@ -73,9 +70,10 @@ def test_mutate_and_upgrade(start_cluster): node2.query("DETACH TABLE mt") # stop being leader node1.query("DETACH TABLE mt") # stop being leader - - restart_node(node1) - restart_node(node2) + node1.query("SYSTEM FLUSH LOGS") + node2.query("SYSTEM FLUSH LOGS") + node1.restart_with_latest_version(signal=9, fix_metadata=True) + node2.restart_with_latest_version(signal=9, fix_metadata=True) # After hard restart table can be in readonly mode exec_query_with_retry( @@ -131,7 +129,7 @@ def test_upgrade_while_mutation(start_cluster): # (We could be in process of creating some system table, which will leave empty directory on restart, # so when we start moving system tables from ordinary to atomic db, it will complain about some undeleted files) node3.query("SYSTEM FLUSH LOGS") - restart_node(node3) + node3.restart_with_latest_version(signal=9, fix_metadata=True) # checks for readonly exec_query_with_retry(node3, "OPTIMIZE TABLE mt1", sleep_time=5, retry_count=60) diff --git a/tests/integration/test_wrong_db_or_table_name/__init__.py b/tests/integration/test_wrong_db_or_table_name/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_wrong_db_or_table_name/test.py b/tests/integration/test_wrong_db_or_table_name/test.py new file mode 100644 index 00000000000..68af383b6c3 --- /dev/null +++ b/tests/integration/test_wrong_db_or_table_name/test.py @@ -0,0 +1,108 @@ +import pytest +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node") + + +@pytest.fixture(scope="module") +def start(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_wrong_database_name(start): + node.query( + """ + CREATE DATABASE test; + CREATE TABLE test.table_test (i Int64) ENGINE=Memory; + INSERT INTO test.table_test SELECT 1; + """ + ) + + with pytest.raises( + QueryRuntimeException, + match="DB::Exception: Database tes does not exist. Maybe you meant test?.", + ): + node.query("SELECT * FROM tes.table_test LIMIT 1;") + assert int(node.query("SELECT count() FROM test.table_test;")) == 1 + node.query( + """ + DROP TABLE test.table_test; + DROP DATABASE test; + """ + ) + + +def test_drop_wrong_database_name(start): + node.query( + """ + CREATE DATABASE test; + CREATE TABLE test.table_test (i Int64) ENGINE=Memory; + INSERT INTO test.table_test SELECT 1; + """ + ) + + with pytest.raises( + QueryRuntimeException, + match="DB::Exception: Database tes does not exist. Maybe you meant test?.", + ): + node.query("DROP DATABASE tes;") + assert int(node.query("SELECT count() FROM test.table_test;")) == 1 + node.query("DROP DATABASE test;") + + +def test_wrong_table_name(start): + node.query( + """ + CREATE DATABASE test; + CREATE TABLE test.table_test (i Int64) ENGINE=Memory; + CREATE TABLE test.table_test2 (i Int64) ENGINE=Memory; + INSERT INTO test.table_test SELECT 1; + """ + ) + with pytest.raises( + QueryRuntimeException, + match="DB::Exception: Table test.table_test1 does not exist. Maybe you meant table_test?.", + ): + node.query( + """ + SELECT * FROM test.table_test1 LIMIT 1; + """ + ) + assert int(node.query("SELECT count() FROM test.table_test;")) == 1 + node.query( + """ + DROP TABLE test.table_test; + DROP TABLE test.table_test2; + DROP DATABASE test; + """ + ) + + +def test_drop_wrong_table_name(start): + node.query( + """ + CREATE DATABASE test; + CREATE TABLE test.table_test (i Int64) ENGINE=Memory; + INSERT INTO test.table_test SELECT 1; + """ + ) + + with pytest.raises( + QueryRuntimeException, + match="DB::Exception: Table test.table_tes does not exist. Maybe you meant table_test?.", + ): + node.query("DROP TABLE test.table_tes;") + assert int(node.query("SELECT count() FROM test.table_test;")) == 1 + node.query( + """ + DROP TABLE test.table_test; + DROP DATABASE test; + """ + ) diff --git a/tests/integration/test_zookeeper_config/test.py b/tests/integration/test_zookeeper_config/test.py index 65f82c2286b..0c0f77ec597 100644 --- a/tests/integration/test_zookeeper_config/test.py +++ b/tests/integration/test_zookeeper_config/test.py @@ -2,6 +2,7 @@ import time import pytest import logging from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster( __file__, zookeeper_config_path="configs/zookeeper_config_root_a.xml" @@ -56,10 +57,10 @@ def test_chroot_with_same_root(started_cluster): for j in range(2): # Second insert to test deduplication node.query("INSERT INTO simple VALUES ({0}, {0})".format(i)) - time.sleep(1) - - assert node1.query("select count() from simple").strip() == "2" - assert node2.query("select count() from simple").strip() == "2" + node1.query("SYSTEM SYNC REPLICA simple") + assert_eq_with_retry(node1, "select count() from simple", "2") + node2.query("SYSTEM SYNC REPLICA simple") + assert_eq_with_retry(node2, "select count() from simple", "2") def test_chroot_with_different_root(started_cluster): @@ -76,5 +77,7 @@ def test_chroot_with_different_root(started_cluster): for j in range(2): # Second insert to test deduplication node.query("INSERT INTO simple_different VALUES ({0}, {0})".format(i)) - assert node1.query("select count() from simple_different").strip() == "1" - assert node3.query("select count() from simple_different").strip() == "1" + node1.query("SYSTEM SYNC REPLICA simple_different") + assert_eq_with_retry(node1, "select count() from simple_different", "1") + node3.query("SYSTEM SYNC REPLICA simple_different") + assert_eq_with_retry(node3, "select count() from simple_different", "1") diff --git a/tests/integration/test_zookeeper_fallback_session/__init__.py b/tests/integration/test_zookeeper_fallback_session/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_zookeeper_fallback_session/configs/remote_servers.xml b/tests/integration/test_zookeeper_fallback_session/configs/remote_servers.xml new file mode 100644 index 00000000000..63fdcea5dab --- /dev/null +++ b/tests/integration/test_zookeeper_fallback_session/configs/remote_servers.xml @@ -0,0 +1,23 @@ + + + + + + node1 + 9000 + + + + node2 + 9000 + + + + node3 + 9000 + + + + + + diff --git a/tests/integration/test_zookeeper_fallback_session/configs/zookeeper_load_balancing.xml b/tests/integration/test_zookeeper_fallback_session/configs/zookeeper_load_balancing.xml new file mode 100644 index 00000000000..b0844ab4e73 --- /dev/null +++ b/tests/integration/test_zookeeper_fallback_session/configs/zookeeper_load_balancing.xml @@ -0,0 +1,23 @@ + + + + in_order + + 2 + 4 + + + zoo1 + 2181 + + + zoo2 + 2181 + + + zoo3 + 2181 + + 500 + + diff --git a/tests/integration/test_zookeeper_fallback_session/test.py b/tests/integration/test_zookeeper_fallback_session/test.py new file mode 100644 index 00000000000..570eca4f0a6 --- /dev/null +++ b/tests/integration/test_zookeeper_fallback_session/test.py @@ -0,0 +1,101 @@ +import pytest +from helpers.cluster import ClickHouseCluster, ClickHouseInstance +from helpers.network import PartitionManager + + +cluster = ClickHouseCluster( + __file__, zookeeper_config_path="configs/zookeeper_load_balancing.xml" +) + +node1 = cluster.add_instance( + "node1", + with_zookeeper=True, + main_configs=["configs/remote_servers.xml", "configs/zookeeper_load_balancing.xml"], +) +node2 = cluster.add_instance( + "node2", + with_zookeeper=True, + main_configs=["configs/remote_servers.xml", "configs/zookeeper_load_balancing.xml"], +) +node3 = cluster.add_instance( + "node3", + with_zookeeper=True, + main_configs=["configs/remote_servers.xml", "configs/zookeeper_load_balancing.xml"], +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + for node in [node1, node2, node3]: + node.query("DROP TABLE IF EXISTS simple SYNC") + node.query( + """ + CREATE TABLE simple (date Date, id UInt32) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id; + """.format( + replica=node.name + ) + ) + yield cluster + finally: + cluster.shutdown() + + +def assert_uses_zk_node(node: ClickHouseInstance, zk_node): + def check_callback(host): + return host.strip() == expected_zk_ip_addr + + expected_zk_ip_addr = node.cluster.get_instance_ip(zk_node) + + host = node.query_with_retry( + "select host from system.zookeeper_connection", check_callback=check_callback + ) + assert host.strip() == expected_zk_ip_addr + + +def test_fallback_session(started_cluster: ClickHouseCluster): + # only leave connecting to zoo3 possible + with PartitionManager() as pm: + for node in started_cluster.instances.values(): + for zk in ["zoo1", "zoo2"]: + pm._add_rule( + { + "source": node.ip_address, + "destination": cluster.get_instance_ip(zk), + "action": "REJECT --reject-with tcp-reset", + } + ) + + for node in [node1, node2, node3]: + # all nodes will have to switch to zoo3 + assert_uses_zk_node(node, "zoo3") + + node1.query_with_retry("INSERT INTO simple VALUES ({0}, {0})".format(1)) + + # and replication still works + for node in [node2, node3]: + assert ( + node.query_with_retry( + "SELECT count() from simple", + check_callback=lambda count: count.strip() == "1", + ) + == "1\n" + ) + + # at this point network partitioning has been reverted. + # the nodes should switch to zoo1 automatically because of `in_order` load-balancing. + # otherwise they would connect to a random replica + for node in [node1, node2, node3]: + assert_uses_zk_node(node, "zoo1") + + node1.query_with_retry("INSERT INTO simple VALUES ({0}, {0})".format(2)) + for node in [node2, node3]: + assert ( + node.query_with_retry( + "SELECT count() from simple", + check_callback=lambda count: count.strip() == "2", + ) + == "2\n" + ) diff --git a/tests/performance/README.md b/tests/performance/README.md index c0c055bba97..f554e96203b 100644 --- a/tests/performance/README.md +++ b/tests/performance/README.md @@ -4,11 +4,11 @@ This directory contains `.xml`-files with performance tests for @akuzm tool. ### How to write performance test -First of all you should check existing tests don't cover your case. If there are no such tests than you should write your own. +First of all you should check existing tests don't cover your case. If there are no such tests then you should write your own. You can use `substitions`, `create`, `fill` and `drop` queries to prepare test. You can find examples in this folder. -If your test continued more than 10 minutes, please, add tag `long` to have an opportunity to run all tests and skip long ones. +If your test takes more than 10 minutes, please, add tag `long` to have an opportunity to run all tests and skip long ones. ### How to run performance test diff --git a/tests/performance/aggregate_with_serialized_method.xml b/tests/performance/aggregate_with_serialized_method.xml new file mode 100644 index 00000000000..91763c69bb9 --- /dev/null +++ b/tests/performance/aggregate_with_serialized_method.xml @@ -0,0 +1,32 @@ + + + 8 + 0 + 4 + + + + CREATE TABLE t_nullable + ( + key_string1 Nullable(String), + key_string2 Nullable(String), + key_string3 Nullable(String), + key_int64_1 Nullable(Int64), + key_int64_2 Nullable(Int64), + key_int64_3 Nullable(Int64), + key_int64_4 Nullable(Int64), + key_int64_5 Nullable(Int64), + m1 Int64, + m2 Int64 + ) + ENGINE = Memory + + insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%10+1, number%10+2, number%10+3, number%10+4,number%10+5, number%6000+1, number%5000+2 from numbers_mt(20000000) + select key_string1,key_string2,key_string3, min(m1) from t_nullable group by key_string1,key_string2,key_string3 + select key_string3,key_int64_1,key_int64_2, min(m1) from t_nullable group by key_string3,key_int64_1,key_int64_2 + select key_int64_1,key_int64_2,key_int64_3,key_int64_4,key_int64_5, min(m1) from t_nullable group by key_int64_1,key_int64_2,key_int64_3,key_int64_4,key_int64_5 + select toFloat64(key_int64_1),toFloat64(key_int64_2),toFloat64(key_int64_3),toFloat64(key_int64_4),toFloat64(key_int64_5), min(m1) from t_nullable group by toFloat64(key_int64_1),toFloat64(key_int64_2),toFloat64(key_int64_3),toFloat64(key_int64_4),toFloat64(key_int64_5) limit 10 + select toDecimal64(key_int64_1, 3),toDecimal64(key_int64_2, 3),toDecimal64(key_int64_3, 3),toDecimal64(key_int64_4, 3),toDecimal64(key_int64_5, 3), min(m1) from t_nullable group by toDecimal64(key_int64_1, 3),toDecimal64(key_int64_2, 3),toDecimal64(key_int64_3, 3),toDecimal64(key_int64_4, 3),toDecimal64(key_int64_5, 3) limit 10 + + drop table if exists t_nullable + \ No newline at end of file diff --git a/tests/performance/lower_upper_function.xml b/tests/performance/lower_upper_function.xml deleted file mode 100644 index 1b84a334ace..00000000000 --- a/tests/performance/lower_upper_function.xml +++ /dev/null @@ -1,11 +0,0 @@ - - select lower(randomString(16)) - select lower(randomString(32)) - select lower(randomString(64)) - select lower(randomString(128)) - select lower(randomString(256)) - select lower(randomString(512)) - select lower(randomString(1024)) - select lower(randomString(832)) - select lower(randomString(416)) - diff --git a/tests/performance/parquet_filter.xml b/tests/performance/parquet_filter.xml new file mode 100644 index 00000000000..27bcb15ee5e --- /dev/null +++ b/tests/performance/parquet_filter.xml @@ -0,0 +1,9 @@ + + create table if not exists t (key UInt64, value String) engine = File(Parquet) settings output_format_parquet_use_custom_encoder=1, output_format_parquet_row_group_size=100000 + + insert into t select number, toString(number) from numbers(2000000) settings max_threads=16, max_insert_threads=16, max_insert_block_size=100000, max_block_size=100000 + + select sum(cityHash64(*)) from t where key between 1050000 and 1150000 settings max_threads=1 + + drop table if exists t + diff --git a/tests/performance/storage_join_direct_join.xml b/tests/performance/storage_join_direct_join.xml new file mode 100644 index 00000000000..2fc63c2c926 --- /dev/null +++ b/tests/performance/storage_join_direct_join.xml @@ -0,0 +1,19 @@ + + + 1 + + + CREATE TABLE keys (key UInt64) ENGINE = MergeTree ORDER BY key; + CREATE TABLE dict (key UInt64, value1 UInt64, value2 Float64, value3 String, + value4 String, value5 String, value6 String, value7 String, value8 String, value9 String, + value10 String) ENGINE = Join(ANY, LEFT, key); + + INSERT INTO keys SELECT rand() FROM numbers(10000000); + INSERT INTO dict SELECT rand(), rand()%1000, rand()*0.0001, toString(number), + toString(number), toString(number), toString(number), toString(number), toString(number), + toString(number), toString(number) FROM numbers(1000000); + + SELECT keys.key, value1 FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null; + SELECT keys.key, value1 FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null SETTINGS + allow_experimental_analyzer=1 + \ No newline at end of file diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference index 11b660b54a3..00a2cd14700 100644 --- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference +++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference @@ -1,5 +1,6 @@ runtime messages 0.001 runtime exceptions 0.05 +unknown runtime exceptions 0.01 messages shorter than 10 1 messages shorter than 16 3 exceptions shorter than 30 3 diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql index 86fe01dc0e3..c96018d8091 100644 --- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql +++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql @@ -9,14 +9,20 @@ create view logs as select * from system.text_log where now() - toIntervalMinute -- Check that we don't have too many messages formatted with fmt::runtime or strings concatenation. -- 0.001 threshold should be always enough, the value was about 0.00025 -select 'runtime messages', max2(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.001) from logs; +select 'runtime messages', greatest(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.001) from logs; -- Check the same for exceptions. The value was 0.03 -select 'runtime exceptions', max2(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.05) from logs where message like '%DB::Exception%'; +select 'runtime exceptions', max2(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.05) from logs + where (message like '%DB::Exception%' or message like '%Coordination::Exception%') + and message not like '% Received from %clickhouse-staging.com:9440%'; + +select 'unknown runtime exceptions', max2(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.01) from logs where + (message like '%DB::Exception%' or message like '%Coordination::Exception%') + and message not like '% Received from %' and message not like '%(SYNTAX_ERROR)%'; -- FIXME some of the following messages are not informative and it has to be fixed create temporary table known_short_messages (s String) as select * from (select -['', '({}) Keys: {}', '({}) {}', 'Aggregating', 'Became leader', 'Cleaning queue', +['', '{} ({})', '({}) Keys: {}', '({}) {}', 'Aggregating', 'Became leader', 'Cleaning queue', 'Creating set.', 'Cyclic aliases', 'Detaching {}', 'Executing {}', 'Fire events: {}', 'Found part {}', 'Loaded queue', 'No sharding key', 'No tables', 'Query: {}', 'Removed', 'Removed part {}', 'Removing parts.', 'Request URI: {}', 'Sending part {}', @@ -32,11 +38,13 @@ create temporary table known_short_messages (s String) as select * from (select 'brotli decode error{}', 'Invalid H3 index: {}', 'Too large node state size', 'No additional keys found.', 'Attempt to read after EOF.', 'Replication was stopped', '{} building file infos', 'Cannot parse uuid {}', 'Query was cancelled', 'Cancelled merging parts', 'Cancelled mutating parts', 'Log pulling is cancelled', -'Transaction was cancelled', 'Could not find table: {}', 'Table {} doesn''t exist', -'Database {} doesn''t exist', 'Dictionary ({}) not found', 'Unknown table function {}', +'Transaction was cancelled', 'Could not find table: {}', 'Table {} does not exist', +'Database {} does not exist', 'Dictionary ({}) not found', 'Unknown table function {}', 'Unknown format {}', 'Unknown explain kind ''{}''', 'Unknown setting {}', 'Unknown input format {}', 'Unknown identifier: ''{}''', 'User name is empty', 'Expected function, got: {}', -'Attempt to read after eof', 'String size is too big ({}), maximum: {}', 'API mode: {}' +'Attempt to read after eof', 'String size is too big ({}), maximum: {}', +'Processed: {}%', 'Creating {}: {}', 'Table {}.{} doesn''t exist', 'Invalid cache key hex: {}', +'User has been dropped', 'Illegal type {} of argument of function {}. Should be DateTime or DateTime64' ] as arr) array join arr; -- Check that we don't have too many short meaningless message patterns. @@ -48,7 +56,7 @@ select 'messages shorter than 16', max2(countDistinctOrDefault(message_format_st -- Unlike above, here we look at length of the formatted message, not format string. Most short format strings are fine because they end up decorated with context from outer or inner exceptions, e.g.: -- "Expected end of line" -> "Code: 117. DB::Exception: Expected end of line: (in file/uri /var/lib/clickhouse/user_files/data_02118): (at row 1)" -- But we have to cut out the boilerplate, e.g.: --- "Code: 60. DB::Exception: Table default.a doesn't exist. (UNKNOWN_TABLE), Stack trace" -> "Table default.a doesn't exist." +-- "Code: 60. DB::Exception: Table default.a does not exist. (UNKNOWN_TABLE), Stack trace" -> "Table default.a does not exist." -- This table currently doesn't have enough information to do this reliably, so we just regex search for " (ERROR_NAME_IN_CAPS)" and hope that's good enough. -- For the "Code: 123. DB::Exception: " part, we just subtract 26 instead of searching for it. Because sometimes it's not at the start, e.g.: -- "Unexpected error, will try to restart main thread: Code: 341. DB::Exception: Unexpected error: Code: 57. DB::Exception:[...]" diff --git a/tests/queries/0_stateless/00061_merge_tree_alter.sql b/tests/queries/0_stateless/00061_merge_tree_alter.sql index 2e46b1e16d6..f2a36d6e5a3 100644 --- a/tests/queries/0_stateless/00061_merge_tree_alter.sql +++ b/tests/queries/0_stateless/00061_merge_tree_alter.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check DROP TABLE IF EXISTS alter_00061; set allow_deprecated_syntax_for_merge_tree=1; diff --git a/tests/queries/0_stateless/00080_show_tables_and_system_tables.reference b/tests/queries/0_stateless/00080_show_tables_and_system_tables.reference index f0b6f0e0c41..e4690f20d3e 100644 --- a/tests/queries/0_stateless/00080_show_tables_and_system_tables.reference +++ b/tests/queries/0_stateless/00080_show_tables_and_system_tables.reference @@ -2,8 +2,8 @@ A B numbers one -A 1 TinyLog CREATE TABLE test_show_tables.A (`A` UInt8) ENGINE = TinyLog -B 1 TinyLog CREATE TABLE test_show_tables.B (`A` UInt8) ENGINE = TinyLog +A 1 TinyLog CREATE TABLE default.A (`A` UInt8) ENGINE = TinyLog +B 1 TinyLog CREATE TABLE default.B (`A` UInt8) ENGINE = TinyLog test_temporary_table -['test_show_tables'] ['test_materialized'] +['default'] ['test_materialized'] 0 diff --git a/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql b/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql index 137dfb5b6f0..a58f9ddb0ac 100644 --- a/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql +++ b/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql @@ -1,32 +1,27 @@ --- Tags: no-parallel -DROP DATABASE IF EXISTS test_show_tables; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; -CREATE DATABASE test_show_tables; +CREATE DATABASE {CLICKHOUSE_DATABASE:Identifier}; -CREATE TABLE test_show_tables.A (A UInt8) ENGINE = TinyLog; -CREATE TABLE test_show_tables.B (A UInt8) ENGINE = TinyLog; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.A (A UInt8) ENGINE = TinyLog; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.B (A UInt8) ENGINE = TinyLog; -SHOW TABLES from test_show_tables; +SHOW TABLES from {CLICKHOUSE_DATABASE:Identifier}; SHOW TABLES in system where engine like '%System%' and name in ('numbers', 'one'); -SELECT name, toUInt32(metadata_modification_time) > 0, engine_full, create_table_query FROM system.tables WHERE database = 'test_show_tables' ORDER BY name FORMAT TSVRaw; +SELECT name, toUInt32(metadata_modification_time) > 0, engine_full, create_table_query FROM system.tables WHERE database = currentDatabase() ORDER BY name FORMAT TSVRaw; CREATE TEMPORARY TABLE test_temporary_table (id UInt64); SELECT name FROM system.tables WHERE is_temporary = 1 AND name = 'test_temporary_table'; -CREATE TABLE test_show_tables.test_log(id UInt64) ENGINE = Log; -CREATE MATERIALIZED VIEW test_show_tables.test_materialized ENGINE = Log AS SELECT * FROM test_show_tables.test_log; -SELECT dependencies_database, dependencies_table FROM system.tables WHERE name = 'test_log'; - -DROP DATABASE test_show_tables; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.test_log(id UInt64) ENGINE = Log; +CREATE MATERIALIZED VIEW {CLICKHOUSE_DATABASE:Identifier}.test_materialized ENGINE = Log AS SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.test_log; +SELECT dependencies_database, dependencies_table FROM system.tables WHERE name = 'test_log' and database=currentDatabase(); +DROP DATABASE {CLICKHOUSE_DATABASE:Identifier}; -- Check that create_table_query works for system tables and unusual Databases -DROP DATABASE IF EXISTS test_DatabaseMemory; -CREATE DATABASE test_DatabaseMemory ENGINE = Memory; -CREATE TABLE test_DatabaseMemory.A (A UInt8) ENGINE = Null; +CREATE DATABASE {CLICKHOUSE_DATABASE:Identifier} ENGINE = Memory; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.A (A UInt8) ENGINE = Null; -SELECT sum(ignore(*, metadata_modification_time, engine_full, create_table_query)) FROM system.tables WHERE database = 'test_DatabaseMemory'; - -DROP DATABASE test_DatabaseMemory; +SELECT sum(ignore(*, metadata_modification_time, engine_full, create_table_query)) FROM system.tables WHERE database = '{CLICKHOUSE_DATABASE:String}'; diff --git a/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql b/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql index 95d46032601..7d925bc4ff1 100644 --- a/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql +++ b/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql @@ -1,8 +1,3 @@ --- Tags: no-parallel - -CREATE DATABASE IF NOT EXISTS test_00101_0; - -USE test_00101_0; DROP TABLE IF EXISTS test_table; DROP TABLE IF EXISTS test_view; @@ -25,9 +20,9 @@ DROP TABLE test_view_filtered; -- Check only sophisticated constructors and desctructors: -CREATE DATABASE IF NOT EXISTS test_00101_1; +CREATE DATABASE IF NOT EXISTS {CLICKHOUSE_DATABASE_1:Identifier}; -USE test_00101_1; +USE {CLICKHOUSE_DATABASE_1:Identifier}; DROP TABLE IF EXISTS tmp; DROP TABLE IF EXISTS tmp_mv; @@ -57,5 +52,5 @@ EXISTS TABLE `.inner.tmp_mv4`; DROP TABLE tmp; -DROP DATABASE test_00101_0; -DROP DATABASE test_00101_1; +DROP DATABASE {CLICKHOUSE_DATABASE:Identifier}; +DROP DATABASE {CLICKHOUSE_DATABASE_1:Identifier}; diff --git a/tests/queries/0_stateless/00158_buffer_and_nonexistent_table.sql b/tests/queries/0_stateless/00158_buffer_and_nonexistent_table.sql index 23c157db875..1d988b38b05 100644 --- a/tests/queries/0_stateless/00158_buffer_and_nonexistent_table.sql +++ b/tests/queries/0_stateless/00158_buffer_and_nonexistent_table.sql @@ -1,11 +1,10 @@ --- Tags: no-parallel -CREATE DATABASE IF NOT EXISTS test2_00158; -DROP TABLE IF EXISTS test2_00158.mt_buffer_00158; -DROP TABLE IF EXISTS test2_00158.mt_00158; -CREATE TABLE test2_00158.mt_buffer_00158 (d Date DEFAULT today(), x UInt64) ENGINE = Buffer(test2_00158, mt_00158, 16, 100, 100, 1000000, 1000000, 1000000000, 1000000000); +CREATE DATABASE IF NOT EXISTS {CLICKHOUSE_DATABASE:Identifier}; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.mt_buffer_00158; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.mt_00158; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.mt_buffer_00158 (d Date DEFAULT today(), x UInt64) ENGINE = Buffer({CLICKHOUSE_DATABASE:Identifier}, mt_00158, 16, 100, 100, 1000000, 1000000, 1000000000, 1000000000); SET send_logs_level = 'fatal'; -- Supress "Destination table test2.mt doesn't exist. Block of data is discarded." -INSERT INTO test2_00158.mt_buffer_00158 (x) SELECT number AS x FROM system.numbers LIMIT 100000; -INSERT INTO test2_00158.mt_buffer_00158 (x) SELECT number AS x FROM system.numbers LIMIT 1000000; -DROP TABLE IF EXISTS test2_00158.mt_buffer_00158; -DROP DATABASE test2_00158; +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.mt_buffer_00158 (x) SELECT number AS x FROM system.numbers LIMIT 100000; +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.mt_buffer_00158 (x) SELECT number AS x FROM system.numbers LIMIT 1000000; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.mt_buffer_00158; +DROP DATABASE {CLICKHOUSE_DATABASE:Identifier}; diff --git a/tests/queries/0_stateless/00171_shard_array_of_tuple_remote.reference b/tests/queries/0_stateless/00171_shard_array_of_tuple_remote.reference index cd4823e219f..ece1f5aa525 100644 --- a/tests/queries/0_stateless/00171_shard_array_of_tuple_remote.reference +++ b/tests/queries/0_stateless/00171_shard_array_of_tuple_remote.reference @@ -1,2 +1,4 @@ [(1,4),(2,5),(3,6)] [(1,4),(2,5),(3,6)] +[(1,4),(2,5),(3,6)] +[(1,4),(2,5),(3,6)] diff --git a/tests/queries/0_stateless/00171_shard_array_of_tuple_remote.sql b/tests/queries/0_stateless/00171_shard_array_of_tuple_remote.sql index b28f4e65487..0189d3a63f5 100644 --- a/tests/queries/0_stateless/00171_shard_array_of_tuple_remote.sql +++ b/tests/queries/0_stateless/00171_shard_array_of_tuple_remote.sql @@ -1,3 +1,4 @@ -- Tags: shard SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) FROM remote('127.0.0.{2,3}', system.one) ORDER BY rand(); +SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) FROM remote('127.0.0.{2,3}') ORDER BY rand(); diff --git a/tests/queries/0_stateless/00220_shard_with_totals_in_subquery_remote_and_limit.reference b/tests/queries/0_stateless/00220_shard_with_totals_in_subquery_remote_and_limit.reference index a594e1495c1..d3cd76be236 100644 --- a/tests/queries/0_stateless/00220_shard_with_totals_in_subquery_remote_and_limit.reference +++ b/tests/queries/0_stateless/00220_shard_with_totals_in_subquery_remote_and_limit.reference @@ -1,3 +1,6 @@ 1 1 +1 + +1 diff --git a/tests/queries/0_stateless/00220_shard_with_totals_in_subquery_remote_and_limit.sql b/tests/queries/0_stateless/00220_shard_with_totals_in_subquery_remote_and_limit.sql index 80a35a4855a..a1e8d907a35 100644 --- a/tests/queries/0_stateless/00220_shard_with_totals_in_subquery_remote_and_limit.sql +++ b/tests/queries/0_stateless/00220_shard_with_totals_in_subquery_remote_and_limit.sql @@ -1,3 +1,4 @@ -- Tags: shard SELECT x FROM (SELECT count() AS x FROM remote('127.0.0.2', system.one) WITH TOTALS) LIMIT 1; +SELECT x FROM (SELECT count() AS x FROM remote('127.0.0.2') WITH TOTALS) LIMIT 1; diff --git a/tests/queries/0_stateless/00284_external_aggregation.sql b/tests/queries/0_stateless/00284_external_aggregation.sql index d19f9f5aee8..c1140faaa28 100644 --- a/tests/queries/0_stateless/00284_external_aggregation.sql +++ b/tests/queries/0_stateless/00284_external_aggregation.sql @@ -13,13 +13,13 @@ SET group_by_two_level_threshold = 100000; SET max_bytes_before_external_group_by = '1Mi'; -- method: key_string & key_string_two_level -CREATE TABLE t_00284_str(s String) ENGINE = MergeTree() ORDER BY tuple(); +CREATE TABLE t_00284_str(s String) ENGINE = MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO t_00284_str SELECT toString(number) FROM numbers_mt(1e6); INSERT INTO t_00284_str SELECT toString(number) FROM numbers_mt(1e6); SELECT s, count() FROM t_00284_str GROUP BY s ORDER BY s LIMIT 10 OFFSET 42; -- method: low_cardinality_key_string & low_cardinality_key_string_two_level -CREATE TABLE t_00284_lc_str(s LowCardinality(String)) ENGINE = MergeTree() ORDER BY tuple(); +CREATE TABLE t_00284_lc_str(s LowCardinality(String)) ENGINE = MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO t_00284_lc_str SELECT toString(number) FROM numbers_mt(1e6); INSERT INTO t_00284_lc_str SELECT toString(number) FROM numbers_mt(1e6); SELECT s, count() FROM t_00284_lc_str GROUP BY s ORDER BY s LIMIT 10 OFFSET 42; diff --git a/tests/queries/0_stateless/00301_csv.sh b/tests/queries/0_stateless/00301_csv.sh index 80053c99a17..2e2e1384534 100755 --- a/tests/queries/0_stateless/00301_csv.sh +++ b/tests/queries/0_stateless/00301_csv.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -echo === Test input_format_csv_empty_as_default +echo '=== Test input_format_csv_empty_as_default' $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS csv"; $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (s String, n UInt64 DEFAULT 1, d Date DEFAULT '2019-06-19') ENGINE = Memory"; @@ -19,7 +19,7 @@ Hello "world", 789 ,2016-01-03 $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY d, s"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; -echo === Test datetime +echo '=== Test datetime' $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (t DateTime('Asia/Istanbul'), s String) ENGINE = Memory"; echo '"2016-01-01 01:02:03","1" @@ -30,7 +30,7 @@ echo '"2016-01-01 01:02:03","1" $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; -echo === Test nullable datetime +echo '=== Test nullable datetime' $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (t Nullable(DateTime('Asia/Istanbul')), s Nullable(String)) ENGINE = Memory"; echo 'NULL, NULL @@ -41,7 +41,7 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s NULLS LAST"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; -echo === Test ignore extra columns +echo '=== Test ignore extra columns' $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (s String, n UInt64 DEFAULT 3, d String DEFAULT 'String4') ENGINE = Memory"; echo '"Hello", 1, "String1" @@ -55,7 +55,7 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s, n"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; -echo === Test missing as default +echo '=== Test missing as default' $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (f1 String, f2 UInt64, f3 UInt256, f4 UInt64 Default 33, f5 Nullable(UInt64), f6 Nullable(UInt64) Default 55, f7 String DEFAULT 'Default') ENGINE = Memory"; echo ' diff --git a/tests/queries/0_stateless/00417_kill_query.reference b/tests/queries/0_stateless/00417_kill_query.reference index 1a3b47964c0..0dd1d78ac82 100644 --- a/tests/queries/0_stateless/00417_kill_query.reference +++ b/tests/queries/0_stateless/00417_kill_query.reference @@ -1,2 +1,2 @@ -SELECT sleep(1) FROM system.numbers LIMIT 30 -SELECT sleep(1) FROM system.numbers LIMIT 31 +SELECT sleep(1) FROM system.numbers LIMIT 300 +SELECT sleep(1) FROM system.numbers LIMIT 301 diff --git a/tests/queries/0_stateless/00417_kill_query.sh b/tests/queries/0_stateless/00417_kill_query.sh index cd5b788a147..e64e93fd4a0 100755 --- a/tests/queries/0_stateless/00417_kill_query.sh +++ b/tests/queries/0_stateless/00417_kill_query.sh @@ -7,19 +7,26 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -QUERY_FIELND_NUM=4 +QUERY_FIELD_NUM=4 -$CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LIMIT 30" &>/dev/null & -sleep 1 -$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE current_database = '${CLICKHOUSE_DATABASE}' and query LIKE 'SELECT sleep(%' AND (elapsed >= 0.) SYNC" | cut -f $QUERY_FIELND_NUM +$CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LIMIT 300" &>/dev/null & -# 31 is for the query to be different from the previous one -$CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LIMIT 31" &>/dev/null & -sleep 1 -$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE current_database = '${CLICKHOUSE_DATABASE}' and query = 'SELECT sleep(1) FROM system.numbers LIMIT 31' ASYNC" | cut -f $QUERY_FIELND_NUM +while true +do + $CLICKHOUSE_CLIENT --query "KILL QUERY WHERE current_database = '${CLICKHOUSE_DATABASE}' AND query LIKE 'SELECT sleep(%' AND (elapsed >= 0.) SYNC" | cut -f $QUERY_FIELD_NUM | grep '.' && break + sleep 0.1 +done + +# 31 is for the query to be different from the previous one +$CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LIMIT 301" &>/dev/null & + +while true +do + $CLICKHOUSE_CLIENT --query "KILL QUERY WHERE current_database = '${CLICKHOUSE_DATABASE}' AND query = 'SELECT sleep(1) FROM system.numbers LIMIT 301' ASYNC" | cut -f $QUERY_FIELD_NUM | grep '.' && break + sleep 0.1 +done $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 0 ASYNC" $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 0 FORMAT TabSeparated" $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 0 SYNC FORMAT TabSeparated" $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 1 TEST" &>/dev/null - diff --git a/tests/queries/0_stateless/00508_materialized_view_to.sql b/tests/queries/0_stateless/00508_materialized_view_to.sql index 522ceb40404..0d8fb85eeeb 100644 --- a/tests/queries/0_stateless/00508_materialized_view_to.sql +++ b/tests/queries/0_stateless/00508_materialized_view_to.sql @@ -1,9 +1,3 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS test_00508; -CREATE DATABASE test_00508; - -USE test_00508; CREATE TABLE src (x UInt8) ENGINE = Null; CREATE TABLE dst (x UInt8) ENGINE = Memory; @@ -20,17 +14,17 @@ SELECT * FROM dst ORDER BY x; USE default; -- Reattach MV (shortcut) -ATTACH TABLE test_00508.mv_00508; +ATTACH TABLE {CLICKHOUSE_DATABASE:Identifier}.mv_00508; -INSERT INTO test_00508.src VALUES (3); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.src VALUES (3); -SELECT * FROM test_00508.mv_00508 ORDER BY x; +SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.mv_00508 ORDER BY x; -- Drop the MV and see if the data is still readable -DROP TABLE test_00508.mv_00508; -SELECT * FROM test_00508.dst ORDER BY x; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.mv_00508; +SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.dst ORDER BY x; -DROP TABLE test_00508.src; -DROP TABLE test_00508.dst; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.src; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.dst; -DROP DATABASE test_00508; +DROP DATABASE {CLICKHOUSE_DATABASE:Identifier}; diff --git a/tests/queries/0_stateless/00514_interval_operators.reference b/tests/queries/0_stateless/00514_interval_operators.reference index 43238eecb3d..b420e1679e0 100644 --- a/tests/queries/0_stateless/00514_interval_operators.reference +++ b/tests/queries/0_stateless/00514_interval_operators.reference @@ -37,3 +37,15 @@ 2030-02-28 01:02:03 2017-04-29 01:02:03 2031-02-28 01:02:03 2017-05-29 01:02:03 2015-11-29 01:02:03 +2000-01-15 18:56:07 DateTime +2000-01-01 12:20:34.567 DateTime64(3) +2000-01-01 12:00:01.234567 DateTime64(6) +2000-01-01 12:00:00.001234567 DateTime64(9) +1999-12-18 05:03:53 DateTime +2000-01-01 11:39:25.433 DateTime64(3) +2000-01-01 11:59:58.765433 DateTime64(6) +2000-01-01 11:59:59.998765433 DateTime64(9) +2000-01-01 11:59:48.333 DateTime64(3) +2000-01-01 11:59:48.33398 DateTime64(5) +2000-01-01 11:59:48.325 DateTime64(3) +2299-12-31 12:00:00.000000 diff --git a/tests/queries/0_stateless/00514_interval_operators.sql b/tests/queries/0_stateless/00514_interval_operators.sql index a4b6c983abf..e98e3211aaf 100644 --- a/tests/queries/0_stateless/00514_interval_operators.sql +++ b/tests/queries/0_stateless/00514_interval_operators.sql @@ -3,3 +3,23 @@ SELECT toDateTime('2017-10-30 08:18:19') + INTERVAL 1 HOUR + INTERVAL 1000 MINUT SELECT toDateTime('2017-10-30 08:18:19') + INTERVAL 1 DAY + INTERVAL number MONTH FROM system.numbers LIMIT 20; SELECT toDateTime('2016-02-29 01:02:03') + INTERVAL number YEAR, toDateTime('2016-02-29 01:02:03') + INTERVAL number MONTH FROM system.numbers LIMIT 16; SELECT toDateTime('2016-02-29 01:02:03') - INTERVAL 1 QUARTER; + +SELECT (toDateTime('2000-01-01 12:00:00') + INTERVAL 1234567 SECOND) x, toTypeName(x); +SELECT (toDateTime('2000-01-01 12:00:00') + INTERVAL 1234567 MILLISECOND) x, toTypeName(x); +SELECT (toDateTime('2000-01-01 12:00:00') + INTERVAL 1234567 MICROSECOND) x, toTypeName(x); +SELECT (toDateTime('2000-01-01 12:00:00') + INTERVAL 1234567 NANOSECOND) x, toTypeName(x); + +SELECT (toDateTime('2000-01-01 12:00:00') - INTERVAL 1234567 SECOND) x, toTypeName(x); +SELECT (toDateTime('2000-01-01 12:00:00') - INTERVAL 1234567 MILLISECOND) x, toTypeName(x); +SELECT (toDateTime('2000-01-01 12:00:00') - INTERVAL 1234567 MICROSECOND) x, toTypeName(x); +SELECT (toDateTime('2000-01-01 12:00:00') - INTERVAL 1234567 NANOSECOND) x, toTypeName(x); + +SELECT (toDateTime64('2000-01-01 12:00:00.678', 3) - INTERVAL 12345 MILLISECOND) x, toTypeName(x); +SELECT (toDateTime64('2000-01-01 12:00:00.67898', 5) - INTERVAL 12345 MILLISECOND) x, toTypeName(x); +SELECT (toDateTime64('2000-01-01 12:00:00.67', 2) - INTERVAL 12345 MILLISECOND) x, toTypeName(x); + +select toDateTime64('3000-01-01 12:00:00.12345', 0) + interval 0 nanosecond; -- { serverError 407 } +select toDateTime64('3000-01-01 12:00:00.12345', 0) + interval 0 microsecond; + +-- Check that the error is thrown during typechecking, not execution. +select materialize(toDate('2000-01-01')) + interval 1 nanosecond from numbers(0); -- { serverError 43 } diff --git a/tests/queries/0_stateless/00522_multidimensional.sql b/tests/queries/0_stateless/00522_multidimensional.sql index c3c41257ab9..ea9881c612a 100644 --- a/tests/queries/0_stateless/00522_multidimensional.sql +++ b/tests/queries/0_stateless/00522_multidimensional.sql @@ -1,5 +1,5 @@ DROP TABLE IF EXISTS multidimensional; -CREATE TABLE multidimensional ENGINE = MergeTree ORDER BY number AS SELECT number, arrayMap(x -> (x, [x], [[x]], (x, toString(x))), arrayMap(x -> range(x), range(number % 10))) AS value FROM system.numbers LIMIT 100000; +CREATE TABLE multidimensional ENGINE = MergeTree ORDER BY number SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi' AS SELECT number, arrayMap(x -> (x, [x], [[x]], (x, toString(x))), arrayMap(x -> range(x), range(number % 10))) AS value FROM system.numbers LIMIT 100000; SELECT sum(cityHash64(toString(value))) FROM multidimensional; diff --git a/tests/queries/0_stateless/00556_array_intersect.reference b/tests/queries/0_stateless/00556_array_intersect.reference index fad9b2f507d..b9841ef42c4 100644 --- a/tests/queries/0_stateless/00556_array_intersect.reference +++ b/tests/queries/0_stateless/00556_array_intersect.reference @@ -5,7 +5,7 @@ [1] [1] [1] -[NULL,1] +[1,NULL] [1] [1] [[1,1]] diff --git a/tests/queries/0_stateless/00571_non_exist_database_when_create_materializ_view.sql b/tests/queries/0_stateless/00571_non_exist_database_when_create_materializ_view.sql index 46fc0dd586d..d24a57187b4 100644 --- a/tests/queries/0_stateless/00571_non_exist_database_when_create_materializ_view.sql +++ b/tests/queries/0_stateless/00571_non_exist_database_when_create_materializ_view.sql @@ -1,32 +1,25 @@ --- Tags: no-parallel -CREATE DATABASE test_00571; - -USE test_00571; - -DROP DATABASE IF EXISTS none; -DROP TABLE IF EXISTS test_00571; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE_1:Identifier}; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; DROP TABLE IF EXISTS test_materialized_00571; set allow_deprecated_syntax_for_merge_tree=1; -CREATE DATABASE none; +CREATE DATABASE {CLICKHOUSE_DATABASE_1:Identifier}; CREATE TABLE test_00571 ( date Date, platform Enum8('a' = 0, 'b' = 1, 'c' = 2), app Enum8('a' = 0, 'b' = 1) ) ENGINE = MergeTree(date, (platform, app), 8192); CREATE MATERIALIZED VIEW test_materialized_00571 ENGINE = MergeTree(date, (platform, app), 8192) POPULATE AS SELECT date, platform, app FROM (SELECT * FROM test_00571); -USE none; +USE {CLICKHOUSE_DATABASE_1:Identifier}; -INSERT INTO test_00571.test_00571 VALUES('2018-02-16', 'a', 'a'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.test_00571 VALUES('2018-02-16', 'a', 'a'); -SELECT * FROM test_00571.test_00571; -SELECT * FROM test_00571.test_materialized_00571; +SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.test_00571; +SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.test_materialized_00571; -DETACH TABLE test_00571.test_materialized_00571; -ATTACH TABLE test_00571.test_materialized_00571; +DETACH TABLE {CLICKHOUSE_DATABASE:Identifier}.test_materialized_00571; +ATTACH TABLE {CLICKHOUSE_DATABASE:Identifier}.test_materialized_00571; -SELECT * FROM test_00571.test_materialized_00571; +SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.test_materialized_00571; -DROP DATABASE IF EXISTS none; -DROP TABLE IF EXISTS test_00571.test_00571; -DROP TABLE IF EXISTS test_00571.test_materialized_00571; - -DROP DATABASE test_00571; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE_1:Identifier}; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.test_00571; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.test_materialized_00571; diff --git a/tests/queries/0_stateless/00576_nested_and_prewhere.sql b/tests/queries/0_stateless/00576_nested_and_prewhere.sql index b15af582a19..5916e679f1e 100644 --- a/tests/queries/0_stateless/00576_nested_and_prewhere.sql +++ b/tests/queries/0_stateless/00576_nested_and_prewhere.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS nested; -CREATE TABLE nested (x UInt64, filter UInt8, n Nested(a UInt64)) ENGINE = MergeTree ORDER BY x; +CREATE TABLE nested (x UInt64, filter UInt8, n Nested(a UInt64)) ENGINE = MergeTree ORDER BY x SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO nested SELECT number, number % 2, range(number % 10) FROM system.numbers LIMIT 100000; ALTER TABLE nested ADD COLUMN n.b Array(UInt64); diff --git a/tests/queries/0_stateless/00604_show_create_database.reference b/tests/queries/0_stateless/00604_show_create_database.reference index c05b088280e..52fd2c48df1 100644 --- a/tests/queries/0_stateless/00604_show_create_database.reference +++ b/tests/queries/0_stateless/00604_show_create_database.reference @@ -1 +1 @@ -CREATE DATABASE test_00604\nENGINE = Atomic +CREATE DATABASE default\nENGINE = Atomic diff --git a/tests/queries/0_stateless/00604_show_create_database.sql b/tests/queries/0_stateless/00604_show_create_database.sql index c990e7abed3..23ebd23c9f1 100644 --- a/tests/queries/0_stateless/00604_show_create_database.sql +++ b/tests/queries/0_stateless/00604_show_create_database.sql @@ -1,5 +1,3 @@ --- Tags: no-ordinary-database, no-parallel +-- Tags: no-ordinary-database, no-replicated-database -create database if not exists test_00604; -show create database test_00604; -drop database test_00604; +show create database {CLICKHOUSE_DATABASE:Identifier}; diff --git a/tests/queries/0_stateless/00612_count.sql b/tests/queries/0_stateless/00612_count.sql index 5dd9c770700..9c435bd97fe 100644 --- a/tests/queries/0_stateless/00612_count.sql +++ b/tests/queries/0_stateless/00612_count.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS count; -CREATE TABLE count (x UInt64) ENGINE = MergeTree ORDER BY tuple(); +CREATE TABLE count (x UInt64) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO count SELECT * FROM numbers(1234567); SELECT count() FROM count; diff --git a/tests/queries/0_stateless/00612_http_max_query_size.sh b/tests/queries/0_stateless/00612_http_max_query_size.sh index cfcae330b85..6289470c21e 100755 --- a/tests/queries/0_stateless/00612_http_max_query_size.sh +++ b/tests/queries/0_stateless/00612_http_max_query_size.sh @@ -36,7 +36,7 @@ def gen_data(q): pattern = ''' or toString(number) = '{}'\n''' - for i in range(1, 4 * 1024): + for i in range(0, 1024 * 2): yield pattern.format(str(i).zfill(1024 - len(pattern) + 2)).encode() s = requests.Session() diff --git a/tests/queries/0_stateless/00612_http_max_query_size_for_distributed.sql b/tests/queries/0_stateless/00612_http_max_query_size_for_distributed.sql index 1802fadc57b..462fd0fef61 100644 --- a/tests/queries/0_stateless/00612_http_max_query_size_for_distributed.sql +++ b/tests/queries/0_stateless/00612_http_max_query_size_for_distributed.sql @@ -1,5 +1,3 @@ --- Tags: no-parallel - DROP TABLE IF EXISTS data_00612; DROP TABLE IF EXISTS dist_00612; diff --git a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh index 399511db701..334025cba28 100755 --- a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-s3-storage, no-upgrade-check +# Tags: zookeeper, no-s3-storage # Because REPLACE PARTITION does not forces immediate removal of replaced data parts from local filesystem # (it tries to do it as quick as possible, but it still performed in separate thread asynchronously) @@ -11,26 +11,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -function query_with_retry -{ - local query="$1" && shift - - local retry=0 - until [ $retry -ge 5 ] - do - local result - result="$($CLICKHOUSE_CLIENT "$@" --query="$query" 2>&1)" - if [ "$?" == 0 ]; then - echo -n "$result" - return - else - retry=$((retry + 1)) - sleep 3 - fi - done - echo "Query '$query' failed with '$result'" -} - $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS src;" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r1;" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r2;" diff --git a/tests/queries/0_stateless/00678_murmurhash.reference b/tests/queries/0_stateless/00678_murmurhash.reference index fb4a00ba046..988c022f1bf 100644 --- a/tests/queries/0_stateless/00678_murmurhash.reference +++ b/tests/queries/0_stateless/00678_murmurhash.reference @@ -25,5 +25,5 @@ 9631199822919835226 4334672815104069193 4334672815104069193 -1 -1 +6145F501578671E2877DBA2BE487AF7E +16FE7483905CCE7A85670E43E4678877 diff --git a/tests/queries/0_stateless/00678_murmurhash.sql b/tests/queries/0_stateless/00678_murmurhash.sql index eda29fd17cd..705c62480a0 100644 --- a/tests/queries/0_stateless/00678_murmurhash.sql +++ b/tests/queries/0_stateless/00678_murmurhash.sql @@ -32,7 +32,5 @@ SELECT gccMurmurHash('foo'); SELECT gccMurmurHash('\x01'); SELECT gccMurmurHash(1); --- Comparison with reverse for big endian -SELECT hex(murmurHash3_128('foo')) = hex(reverse(unhex('6145F501578671E2877DBA2BE487AF7E'))) or hex(murmurHash3_128('foo')) = '6145F501578671E2877DBA2BE487AF7E'; --- Comparison with reverse for big endian -SELECT hex(murmurHash3_128('\x01')) = hex(reverse(unhex('16FE7483905CCE7A85670E43E4678877'))) or hex(murmurHash3_128('\x01')) = '16FE7483905CCE7A85670E43E4678877'; +SELECT hex(murmurHash3_128('foo')); +SELECT hex(murmurHash3_128('\x01')); diff --git a/tests/queries/0_stateless/00688_low_cardinality_dictionary_deserialization.sql b/tests/queries/0_stateless/00688_low_cardinality_dictionary_deserialization.sql index 5a169403872..c4613acf5f3 100644 --- a/tests/queries/0_stateless/00688_low_cardinality_dictionary_deserialization.sql +++ b/tests/queries/0_stateless/00688_low_cardinality_dictionary_deserialization.sql @@ -1,6 +1,5 @@ drop table if exists lc_dict_reading; -create table lc_dict_reading (val UInt64, str StringWithDictionary, pat String) engine = MergeTree order by val; +create table lc_dict_reading (val UInt64, str StringWithDictionary, pat String) engine = MergeTree order by val SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into lc_dict_reading select number, if(number < 8192 * 4, number % 100, number) as s, s from system.numbers limit 1000000; select sum(toUInt64(str)), sum(toUInt64(pat)) from lc_dict_reading where val < 8129 or val > 8192 * 4; drop table if exists lc_dict_reading; - diff --git a/tests/queries/0_stateless/00688_low_cardinality_serialization.sql b/tests/queries/0_stateless/00688_low_cardinality_serialization.sql index 3c0e64a9637..b4fe4b29200 100644 --- a/tests/queries/0_stateless/00688_low_cardinality_serialization.sql +++ b/tests/queries/0_stateless/00688_low_cardinality_serialization.sql @@ -8,8 +8,8 @@ select 'MergeTree'; drop table if exists lc_small_dict; drop table if exists lc_big_dict; -create table lc_small_dict (str StringWithDictionary) engine = MergeTree order by str; -create table lc_big_dict (str StringWithDictionary) engine = MergeTree order by str; +create table lc_small_dict (str StringWithDictionary) engine = MergeTree order by str SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +create table lc_big_dict (str StringWithDictionary) engine = MergeTree order by str SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into lc_small_dict select toString(number % 1000) from system.numbers limit 1000000; insert into lc_big_dict select toString(number) from system.numbers limit 1000000; @@ -25,4 +25,3 @@ select sum(toUInt64OrZero(str)) from lc_big_dict; drop table if exists lc_small_dict; drop table if exists lc_big_dict; - diff --git a/tests/queries/0_stateless/00700_decimal_compare.sql b/tests/queries/0_stateless/00700_decimal_compare.sql index 41ff8b38102..7740c75f859 100644 --- a/tests/queries/0_stateless/00700_decimal_compare.sql +++ b/tests/queries/0_stateless/00700_decimal_compare.sql @@ -27,6 +27,7 @@ SELECT a > 0, b > 0, g > 0 FROM decimal ORDER BY a DESC; SELECT a, g > toInt8(0), g > toInt16(0), g > toInt32(0), g > toInt64(0) FROM decimal ORDER BY a; SELECT a, g > toUInt8(0), g > toUInt16(0), g > toUInt32(0), g > toUInt64(0) FROM decimal ORDER BY a; SELECT a, b, g FROM decimal WHERE a IN(42) AND b IN(42) AND g IN(42); +SELECT a, b, g FROM decimal WHERE a IN(42) AND b IN(42) AND g IN(42) SETTINGS allow_experimental_analyzer = 1; SELECT a, b, g FROM decimal WHERE a > 0 AND a <= 42 AND b <= 42 AND g <= 42; SELECT d, e, f from decimal WHERE d > 0 AND d < 1 AND e > 0 AND e < 1 AND f > 0 AND f < 1; diff --git a/tests/queries/0_stateless/00719_parallel_ddl_db.sh b/tests/queries/0_stateless/00719_parallel_ddl_db.sh index 31ea1dbbe58..004590c21df 100755 --- a/tests/queries/0_stateless/00719_parallel_ddl_db.sh +++ b/tests/queries/0_stateless/00719_parallel_ddl_db.sh @@ -11,7 +11,7 @@ ${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl" function query() { - for _ in {1..100}; do + for _ in {1..50}; do ${CLICKHOUSE_CLIENT} --query "CREATE DATABASE IF NOT EXISTS parallel_ddl" ${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl" done diff --git a/tests/queries/0_stateless/00732_quorum_insert_lost_part_and_alive_part_zookeeper_long.sql b/tests/queries/0_stateless/00732_quorum_insert_lost_part_and_alive_part_zookeeper_long.sql index 9c02ac795ed..a1859220c6c 100644 --- a/tests/queries/0_stateless/00732_quorum_insert_lost_part_and_alive_part_zookeeper_long.sql +++ b/tests/queries/0_stateless/00732_quorum_insert_lost_part_and_alive_part_zookeeper_long.sql @@ -1,4 +1,4 @@ --- Tags: long, zookeeper, no-replicated-database, no-upgrade-check +-- Tags: long, zookeeper, no-replicated-database -- Tag no-replicated-database: Fails due to additional replicas or shards SET send_logs_level = 'fatal'; diff --git a/tests/queries/0_stateless/00738_lock_for_inner_table.sh b/tests/queries/0_stateless/00738_lock_for_inner_table.sh index 9a7ae92439d..b62a639d8f4 100755 --- a/tests/queries/0_stateless/00738_lock_for_inner_table.sh +++ b/tests/queries/0_stateless/00738_lock_for_inner_table.sh @@ -13,7 +13,7 @@ uuid=$(${CLICKHOUSE_CLIENT} --query "SELECT reinterpretAsUUID(currentDatabase()) echo "DROP TABLE IF EXISTS tab_00738 SYNC; DROP TABLE IF EXISTS mv SYNC; -CREATE TABLE tab_00738(a Int) ENGINE = MergeTree() ORDER BY a; +CREATE TABLE tab_00738(a Int) ENGINE = MergeTree() ORDER BY a SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; -- The matview will take at least 2 seconds to be finished (10000000 * 0.0000002) CREATE MATERIALIZED VIEW mv UUID '$uuid' ENGINE = Log AS SELECT sleepEachRow(0.0000002) FROM tab_00738;" | ${CLICKHOUSE_CLIENT} -n @@ -63,4 +63,3 @@ drop_inner_id wait drop_at_exit - diff --git a/tests/queries/0_stateless/00740_database_in_nested_view.sql b/tests/queries/0_stateless/00740_database_in_nested_view.sql index e2debe2859b..42c26a709b1 100644 --- a/tests/queries/0_stateless/00740_database_in_nested_view.sql +++ b/tests/queries/0_stateless/00740_database_in_nested_view.sql @@ -1,10 +1,5 @@ --- Tags: no-parallel -DROP DATABASE IF EXISTS test_00740; -CREATE DATABASE test_00740; -USE test_00740; - -DROP TABLE IF EXISTS test_00740; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; DROP TABLE IF EXISTS test_view_00740; DROP TABLE IF EXISTS test_nested_view_00740; DROP TABLE IF EXISTS test_joined_view_00740; @@ -19,12 +14,10 @@ SELECT * FROM test_nested_view_00740; SELECT * FROM test_joined_view_00740; USE default; -SELECT * FROM test_00740.test_view_00740; -SELECT * FROM test_00740.test_nested_view_00740; -SELECT * FROM test_00740.test_joined_view_00740; +SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.test_view_00740; +SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.test_nested_view_00740; +SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.test_joined_view_00740; -DROP TABLE IF EXISTS test_00740.test_00740; -DROP TABLE IF EXISTS test_00740.test_view_00740; -DROP TABLE IF EXISTS test_00740.test_nested_view_00740; - -DROP DATABASE test_00740; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.test_00740; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.test_view_00740; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.test_nested_view_00740; diff --git a/tests/queries/0_stateless/00746_hashing_tuples.reference b/tests/queries/0_stateless/00746_hashing_tuples.reference index e3b896f6077..71d45be5a54 100644 --- a/tests/queries/0_stateless/00746_hashing_tuples.reference +++ b/tests/queries/0_stateless/00746_hashing_tuples.reference @@ -1,11 +1,11 @@ 12940785793559895259 17926972817233444501 7456555839952096623 -1 -1 -1 -1 -1 +CC45107CC4B79F62D831BEF2103C7CBF +DF2EC2F0669B000EDFF6ADEE264E7D68 +4CD1C30C38AB935D418B5269EF197B9E +9D78134EE48654D753CCA1B76185CF8E +389D16428D2AADEC9713905572F42864 955237314186186656 8175794665478042155 9325786087413524176 @@ -18,8 +18,8 @@ 8163029322371165472 8788309436660676487 236561483980029756 -1 -1 +8DD5527CC43D76F4760D26BE0F641F7E +F8F7AD9B6CD4CF117A71E277E2EC2931 12384823029245979431 4507350192761038840 1188926775431157506 diff --git a/tests/queries/0_stateless/00746_hashing_tuples.sql b/tests/queries/0_stateless/00746_hashing_tuples.sql index f17ad6fa77f..466a2184c65 100644 --- a/tests/queries/0_stateless/00746_hashing_tuples.sql +++ b/tests/queries/0_stateless/00746_hashing_tuples.sql @@ -4,11 +4,11 @@ SELECT sipHash64(1, 2, 3); SELECT sipHash64(1, 3, 2); SELECT sipHash64(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2)))); -SELECT hex(sipHash128('foo')) = hex(reverse(unhex('CC45107CC4B79F62D831BEF2103C7CBF'))) or hex(sipHash128('foo')) = 'CC45107CC4B79F62D831BEF2103C7CBF'; -SELECT hex(sipHash128('\x01')) = hex(reverse(unhex('DF2EC2F0669B000EDFF6ADEE264E7D68'))) or hex(sipHash128('\x01')) = 'DF2EC2F0669B000EDFF6ADEE264E7D68'; -SELECT hex(sipHash128('foo', 'foo')) = hex(reverse(unhex('4CD1C30C38AB935D418B5269EF197B9E'))) or hex(sipHash128('foo', 'foo')) = '4CD1C30C38AB935D418B5269EF197B9E'; -SELECT hex(sipHash128('foo', 'foo', 'foo')) = hex(reverse(unhex('9D78134EE48654D753CCA1B76185CF8E'))) or hex(sipHash128('foo', 'foo', 'foo')) = '9D78134EE48654D753CCA1B76185CF8E'; -SELECT hex(sipHash128(1, 2, 3)) = hex(reverse(unhex('389D16428D2AADEC9713905572F42864'))) or hex(sipHash128(1, 2, 3)) = '389D16428D2AADEC9713905572F42864'; +SELECT hex(sipHash128('foo')); +SELECT hex(sipHash128('\x01')); +SELECT hex(sipHash128('foo', 'foo')); +SELECT hex(sipHash128('foo', 'foo', 'foo')); +SELECT hex(sipHash128(1, 2, 3)); SELECT halfMD5(1, 2, 3); SELECT halfMD5(1, 3, 2); @@ -26,8 +26,8 @@ SELECT murmurHash3_64(1, 2, 3); SELECT murmurHash3_64(1, 3, 2); SELECT murmurHash3_64(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2)))); -SELECT hex(murmurHash3_128('foo', 'foo')) = hex(reverse(unhex('8DD5527CC43D76F4760D26BE0F641F7E'))) or hex(murmurHash3_128('foo', 'foo')) = '8DD5527CC43D76F4760D26BE0F641F7E'; -SELECT hex(murmurHash3_128('foo', 'foo', 'foo')) = hex(reverse(unhex('F8F7AD9B6CD4CF117A71E277E2EC2931'))) or hex(murmurHash3_128('foo', 'foo', 'foo')) = 'F8F7AD9B6CD4CF117A71E277E2EC2931'; +SELECT hex(murmurHash3_128('foo', 'foo')); +SELECT hex(murmurHash3_128('foo', 'foo', 'foo')); SELECT gccMurmurHash(1, 2, 3); SELECT gccMurmurHash(1, 3, 2); diff --git a/tests/queries/0_stateless/00751_default_databasename_for_view.reference b/tests/queries/0_stateless/00751_default_databasename_for_view.reference index b3f1875ae91..4899e230924 100644 --- a/tests/queries/0_stateless/00751_default_databasename_for_view.reference +++ b/tests/queries/0_stateless/00751_default_databasename_for_view.reference @@ -1,4 +1,4 @@ -CREATE MATERIALIZED VIEW test_00751.t_mv_00751 +CREATE MATERIALIZED VIEW default.t_mv_00751 ( `date` Date, `platform` Enum8('a' = 0, 'b' = 1), @@ -11,14 +11,14 @@ SELECT date, platform, app -FROM test_00751.t_00751 +FROM default.t_00751 WHERE (app = ( SELECT min(app) - FROM test_00751.u_00751 + FROM default.u_00751 )) AND (platform = ( SELECT ( SELECT min(platform) - FROM test_00751.v_00751 + FROM default.v_00751 ) )) 2000-01-01 a a diff --git a/tests/queries/0_stateless/00751_default_databasename_for_view.sql b/tests/queries/0_stateless/00751_default_databasename_for_view.sql index 9292b612d87..599ef5b89ae 100644 --- a/tests/queries/0_stateless/00751_default_databasename_for_view.sql +++ b/tests/queries/0_stateless/00751_default_databasename_for_view.sql @@ -1,7 +1,3 @@ --- Tags: no-parallel - -CREATE DATABASE IF NOT EXISTS test_00751; -USE test_00751; DROP TABLE IF EXISTS t_00751; DROP TABLE IF EXISTS t_mv_00751; @@ -25,25 +21,24 @@ CREATE MATERIALIZED VIEW t_mv_00751 ENGINE = MergeTree ORDER BY date AS SELECT date, platform, app FROM t_00751 WHERE app = (SELECT min(app) from u_00751) AND platform = (SELECT (SELECT min(platform) from v_00751)); -SHOW CREATE TABLE test_00751.t_mv_00751 FORMAT TabSeparatedRaw; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.t_mv_00751 FORMAT TabSeparatedRaw; USE default; -DETACH TABLE test_00751.t_mv_00751; -ATTACH TABLE test_00751.t_mv_00751; +DETACH TABLE {CLICKHOUSE_DATABASE:Identifier}.t_mv_00751; +ATTACH TABLE {CLICKHOUSE_DATABASE:Identifier}.t_mv_00751; -INSERT INTO test_00751.t_00751 VALUES ('2000-01-01', 'a', 'a') ('2000-01-02', 'b', 'b'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.t_00751 VALUES ('2000-01-01', 'a', 'a') ('2000-01-02', 'b', 'b'); -INSERT INTO test_00751.u_00751 VALUES ('a'); -INSERT INTO test_00751.v_00751 VALUES ('a'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.u_00751 VALUES ('a'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.v_00751 VALUES ('a'); -INSERT INTO test_00751.t_00751 VALUES ('2000-01-03', 'a', 'a') ('2000-01-04', 'b', 'b'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.t_00751 VALUES ('2000-01-03', 'a', 'a') ('2000-01-04', 'b', 'b'); -SELECT * FROM test_00751.t_00751 ORDER BY date; -SELECT * FROM test_00751.t_mv_00751 ORDER BY date; +SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.t_00751 ORDER BY date; +SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.t_mv_00751 ORDER BY date; -DROP TABLE test_00751.t_00751; -DROP TABLE test_00751.t_mv_00751; -DROP TABLE test_00751.u_00751; -DROP TABLE test_00751.v_00751; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.t_00751; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.t_mv_00751; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.u_00751; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.v_00751; -DROP DATABASE test_00751; diff --git a/tests/queries/0_stateless/00800_versatile_storage_join.sql b/tests/queries/0_stateless/00800_versatile_storage_join.sql index 3690ea6bf89..cccc655e34b 100644 --- a/tests/queries/0_stateless/00800_versatile_storage_join.sql +++ b/tests/queries/0_stateless/00800_versatile_storage_join.sql @@ -1,8 +1,3 @@ --- Tags: no-parallel - -CREATE DATABASE IF NOT EXISTS test_00800; - -USE test_00800; DROP TABLE IF EXISTS join_any_inner; DROP TABLE IF EXISTS join_any_left; @@ -54,24 +49,22 @@ SELECT joinGet('join_string_key', 'x', 'abc'), joinGet('join_string_key', 'k', ' USE default; -DROP TABLE test_00800.join_any_inner; -DROP TABLE test_00800.join_any_left; -DROP TABLE test_00800.join_any_left_null; -DROP TABLE test_00800.join_all_inner; -DROP TABLE test_00800.join_all_left; -DROP TABLE test_00800.join_string_key; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.join_any_inner; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.join_any_left; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.join_any_left_null; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.join_all_inner; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.join_all_left; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.join_string_key; -- test provided by Alexander Zaitsev -DROP TABLE IF EXISTS test_00800.join_test; -CREATE TABLE test_00800.join_test (a UInt8, b UInt8) Engine = Join(ANY, LEFT, a); +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.join_test; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.join_test (a UInt8, b UInt8) Engine = Join(ANY, LEFT, a); -USE test_00800; +USE {CLICKHOUSE_DATABASE:Identifier}; select joinGet('join_test', 'b', 1); USE system; -SELECT joinGet('test_00800.join_test', 'b', 1); +SELECT joinGet({CLICKHOUSE_DATABASE:String} || '.join_test', 'b', 1); USE default; -DROP TABLE test_00800.join_test; - -DROP DATABASE test_00800; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.join_test; diff --git a/tests/queries/0_stateless/00815_left_join_on_stepanel.sql b/tests/queries/0_stateless/00815_left_join_on_stepanel.sql index 13172f8c18d..725c9523cd7 100644 --- a/tests/queries/0_stateless/00815_left_join_on_stepanel.sql +++ b/tests/queries/0_stateless/00815_left_join_on_stepanel.sql @@ -1,7 +1,3 @@ --- Tags: no-parallel - -CREATE DATABASE IF NOT EXISTS test_00815; -USE test_00815; DROP TABLE IF EXISTS fact_cpc_clicks; DROP TABLE IF EXISTS dim_model; @@ -16,6 +12,6 @@ select f.model_id from fact_cpc_clicks as f left join dim_model as d on f.model_ USE default; -select f.model_id from test_00815.fact_cpc_clicks as f left join test_00815.dim_model as d on f.model_id=d.model_id limit 10; +select f.model_id from {CLICKHOUSE_DATABASE:Identifier}.fact_cpc_clicks as f left join {CLICKHOUSE_DATABASE:Identifier}.dim_model as d on f.model_id=d.model_id limit 10; -DROP DATABASE test_00815; +DROP DATABASE {CLICKHOUSE_DATABASE:Identifier}; diff --git a/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh b/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh index 6714d8b35ca..cbe37de6651 100755 --- a/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh +++ b/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: deadlock, no-parallel +# Tags: deadlock, no-parallel, no-debug # NOTE: database = $CLICKHOUSE_DATABASE is unwanted diff --git a/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql b/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql index e7f034131a2..2044a9b8d22 100644 --- a/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql +++ b/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql @@ -1,7 +1,4 @@ --- Tags: global, no-parallel -CREATE DATABASE IF NOT EXISTS test_00857; -USE test_00857; DROP TABLE IF EXISTS local_table; DROP TABLE IF EXISTS other_table; @@ -27,7 +24,7 @@ INSERT INTO other_table VALUES(100, 'One Hundred', now(), 1000); INSERT INTO other_table VALUES(200, 'Two Hundred', now(), 2000); select t2.name from remote('127.0.0.2', currentDatabase(), 'local_table') as t1 -left join test_00857.other_table as t2 -- FIXME: doesn't work properly on remote without explicit database prefix +left join {CLICKHOUSE_DATABASE:Identifier}.other_table as t2 -- FIXME: doesn't work properly on remote without explicit database prefix on t1.oth_id = t2.id order by t2.name; @@ -58,4 +55,3 @@ order by other_table.name; DROP TABLE local_table; DROP TABLE other_table; -DROP DATABASE test_00857; diff --git a/tests/queries/0_stateless/00900_long_parquet_load.sh b/tests/queries/0_stateless/00900_long_parquet_load.sh index 8142c5b5810..2f19cce9107 100755 --- a/tests/queries/0_stateless/00900_long_parquet_load.sh +++ b/tests/queries/0_stateless/00900_long_parquet_load.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest +# Tags: long, no-fasttest, no-debug # # Load all possible .parquet files found in submodules. @@ -43,9 +43,9 @@ for NAME in $(find "$DATA_DIR"/*.parquet -print0 | xargs -0 -n 1 basename | LC_A JSON=$DATA_DIR/$NAME.json COLUMNS_FILE=$DATA_DIR/$NAME.columns - ([ -z "$PARQUET_READER" ] || [ ! -s "$PARQUET_READER" ]) && [ ! -s "$COLUMNS_FILE" ] && continue + { [ -z "$PARQUET_READER" ] || [ ! -s "$PARQUET_READER" ]; } && [ ! -s "$COLUMNS_FILE" ] && continue - echo === Try load data from "$NAME" + echo "=== Try load data from $NAME" # If you want change or add .parquet file - rm data_parquet/*.json data_parquet/*.columns [ -n "$PARQUET_READER" ] && [ ! -s "$COLUMNS_FILE" ] && [ ! -s "$JSON" ] && "$PARQUET_READER" --json "$DATA_DIR"/"$NAME" > "$JSON" @@ -64,7 +64,7 @@ CREATE TABLE parquet_load ($COLUMNS) ENGINE = Memory; EOF # Some files contain unsupported data structures, exception is ok. - cat "$DATA_DIR"/"$NAME" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO parquet_load FORMAT Parquet" 2>&1 | sed 's/Exception/Ex---tion/' + ${CLICKHOUSE_CLIENT} --query="INSERT INTO parquet_load FORMAT Parquet" < "$DATA_DIR"/"$NAME" 2>&1 | sed 's/Exception/Ex---tion/' ${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load ORDER BY tuple(*) LIMIT 100" ${CLICKHOUSE_CLIENT} --query="DROP TABLE parquet_load" diff --git a/tests/queries/0_stateless/00906_low_cardinality_cache.sql b/tests/queries/0_stateless/00906_low_cardinality_cache.sql index cd2ceabcf6d..55eacd0db44 100644 --- a/tests/queries/0_stateless/00906_low_cardinality_cache.sql +++ b/tests/queries/0_stateless/00906_low_cardinality_cache.sql @@ -1,5 +1,5 @@ drop table if exists lc_00906; -create table lc_00906 (b LowCardinality(String)) engine=MergeTree order by b; +create table lc_00906 (b LowCardinality(String)) engine=MergeTree order by b SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into lc_00906 select '0123456789' from numbers(100000000); select count(), b from lc_00906 group by b; drop table if exists lc_00906; diff --git a/tests/queries/0_stateless/00921_datetime64_basic.sql b/tests/queries/0_stateless/00921_datetime64_basic.sql index 28205c72ef0..13abe3e64d0 100644 --- a/tests/queries/0_stateless/00921_datetime64_basic.sql +++ b/tests/queries/0_stateless/00921_datetime64_basic.sql @@ -3,17 +3,17 @@ DROP TABLE IF EXISTS A; SELECT CAST(1 as DateTime64('abc')); -- { serverError 43 } # Invalid scale parameter type SELECT CAST(1 as DateTime64(100)); -- { serverError 69 } # too big scale SELECT CAST(1 as DateTime64(-1)); -- { serverError 43 } # signed scale parameter type -SELECT CAST(1 as DateTime64(3, 'qqq')); -- { serverError 1000 } # invalid timezone +SELECT CAST(1 as DateTime64(3, 'qqq')); -- { serverError BAD_ARGUMENTS } # invalid timezone SELECT toDateTime64('2019-09-16 19:20:11.234', 'abc'); -- { serverError 43 } # invalid scale SELECT toDateTime64('2019-09-16 19:20:11.234', 100); -- { serverError 69 } # too big scale SELECT toDateTime64(CAST([['CLb5Ph ']], 'String'), uniqHLL12('2Gs1V', 752)); -- { serverError 44 } # non-const string and non-const scale -SELECT toDateTime64('2019-09-16 19:20:11.234', 3, 'qqq'); -- { serverError 1000 } # invalid timezone +SELECT toDateTime64('2019-09-16 19:20:11.234', 3, 'qqq'); -- { serverError BAD_ARGUMENTS } # invalid timezone SELECT ignore(now64(gccMurmurHash())); -- { serverError 43 } # Illegal argument type SELECT ignore(now64('abcd')); -- { serverError 43 } # Illegal argument type SELECT ignore(now64(number)) FROM system.numbers LIMIT 10; -- { serverError 43 } # Illegal argument type -SELECT ignore(now64(3, 'invalid timezone')); -- { serverError 1000 } +SELECT ignore(now64(3, 'invalid timezone')); -- { serverError BAD_ARGUMENTS } SELECT ignore(now64(3, 1111)); -- { serverError 44 } # invalid timezone parameter type WITH 'UTC' as timezone SELECT timezone, timeZoneOf(now64(3, timezone)) == timezone; diff --git a/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.sh b/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.sh index 22d9e0690b3..d06037fb836 100755 --- a/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.sh +++ b/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.sh @@ -5,22 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -function query_with_retry -{ - retry=0 - until [ $retry -ge 5 ] - do - result=$($CLICKHOUSE_CLIENT $2 --query="$1" 2>&1) - if [ "$?" == 0 ]; then - echo -n "$result" - return - else - retry=$(($retry + 1)) - sleep 3 - fi - done - echo "Query '$1' failed with '$result'" -} $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS ttl_repl1" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS ttl_repl2" diff --git a/tests/queries/0_stateless/00942_dataparts_500.sh b/tests/queries/0_stateless/00942_dataparts_500.sh index a6c3fcd4303..91c95816590 100755 --- a/tests/queries/0_stateless/00942_dataparts_500.sh +++ b/tests/queries/0_stateless/00942_dataparts_500.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-upgrade-check # Test fix for issue #5066 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/00975_move_partition_merge_tree.sql b/tests/queries/0_stateless/00975_move_partition_merge_tree.sql index 2fc82b96403..c17f7c57de0 100644 --- a/tests/queries/0_stateless/00975_move_partition_merge_tree.sql +++ b/tests/queries/0_stateless/00975_move_partition_merge_tree.sql @@ -6,14 +6,14 @@ CREATE TABLE IF NOT EXISTS test_move_partition_src ( val UInt32 ) Engine = MergeTree() PARTITION BY pk - ORDER BY (pk, val); + ORDER BY (pk, val) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; CREATE TABLE IF NOT EXISTS test_move_partition_dest ( pk UInt8, val UInt32 ) Engine = MergeTree() PARTITION BY pk - ORDER BY (pk, val); + ORDER BY (pk, val) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO test_move_partition_src SELECT number % 2, number FROM system.numbers LIMIT 10000000; diff --git a/tests/queries/0_stateless/00981_topK_topKWeighted_long.sql b/tests/queries/0_stateless/00981_topK_topKWeighted_long.sql index 48d9dedc61c..7ee38867b53 100644 --- a/tests/queries/0_stateless/00981_topK_topKWeighted_long.sql +++ b/tests/queries/0_stateless/00981_topK_topKWeighted_long.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS topk; -CREATE TABLE topk (val1 String, val2 UInt32) ENGINE = MergeTree ORDER BY val1; +CREATE TABLE topk (val1 String, val2 UInt32) ENGINE = MergeTree ORDER BY val1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO topk WITH number % 7 = 0 AS frequent SELECT toString(frequent ? number % 10 : number), frequent ? 999999999 : number FROM numbers(4000000); diff --git a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh index 811681794a5..6025279e570 100755 --- a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh +++ b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-parallel, no-upgrade-check, no-replicated-database +# Tags: race, zookeeper, no-parallel, no-replicated-database CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -59,7 +59,8 @@ function thread6() CREATE TABLE alter_table_$REPLICA (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r_$REPLICA') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0, - cleanup_thread_preferred_points_per_iteration=0, replicated_max_mutations_in_one_entry = $(($RANDOM / 50));"; + cleanup_thread_preferred_points_per_iteration=0, replicated_max_mutations_in_one_entry = $(($RANDOM / 50)), + index_granularity = 8192, index_granularity_bytes = '10Mi';"; sleep 0.$RANDOM; done } diff --git a/tests/queries/0_stateless/01012_show_tables_limit.sql b/tests/queries/0_stateless/01012_show_tables_limit.sql index 82cde6917d0..18a11f66d08 100644 --- a/tests/queries/0_stateless/01012_show_tables_limit.sql +++ b/tests/queries/0_stateless/01012_show_tables_limit.sql @@ -1,22 +1,15 @@ --- Tags: no-parallel -DROP DATABASE IF EXISTS test_show_limit; - -CREATE DATABASE test_show_limit; - -CREATE TABLE test_show_limit.test1 (test UInt8) ENGINE = TinyLog; -CREATE TABLE test_show_limit.test2 (test UInt8) ENGINE = TinyLog; -CREATE TABLE test_show_limit.test3 (test UInt8) ENGINE = TinyLog; -CREATE TABLE test_show_limit.test4 (test UInt8) ENGINE = TinyLog; -CREATE TABLE test_show_limit.test5 (test UInt8) ENGINE = TinyLog; -CREATE TABLE test_show_limit.test6 (test UInt8) ENGINE = TinyLog; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.test1 (test UInt8) ENGINE = TinyLog; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.test2 (test UInt8) ENGINE = TinyLog; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.test3 (test UInt8) ENGINE = TinyLog; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.test4 (test UInt8) ENGINE = TinyLog; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.test5 (test UInt8) ENGINE = TinyLog; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.test6 (test UInt8) ENGINE = TinyLog; SELECT '*** Should show 6: ***'; -SHOW TABLES FROM test_show_limit; +SHOW TABLES FROM {CLICKHOUSE_DATABASE:Identifier}; SELECT '*** Should show 2: ***'; -SHOW TABLES FROM test_show_limit LIMIT 2; +SHOW TABLES FROM {CLICKHOUSE_DATABASE:Identifier} LIMIT 2; SELECT '*** Should show 4: ***'; -SHOW TABLES FROM test_show_limit LIMIT 2 * 2; - -DROP DATABASE test_show_limit; +SHOW TABLES FROM {CLICKHOUSE_DATABASE:Identifier} LIMIT 2 * 2; diff --git a/tests/queries/0_stateless/01015_attach_part.sql b/tests/queries/0_stateless/01015_attach_part.sql index a2f949d3499..9ff505efd8f 100644 --- a/tests/queries/0_stateless/01015_attach_part.sql +++ b/tests/queries/0_stateless/01015_attach_part.sql @@ -1,4 +1,3 @@ --- Tags: no-parallel DROP TABLE IF EXISTS table_01; diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference b/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference index f69302fb90f..17a77b91d45 100644 --- a/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference +++ b/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference @@ -15,12 +15,12 @@ dict1 Dictionary dict2 Dictionary table_for_dict MergeTree -database_for_dict dict1 ComplexKeyCache -database_for_dict dict2 Hashed +default dict1 ComplexKeyCache +default dict2 Hashed 6 6 6 6 6 -database_for_dict.dict3 6 +default.dict3 6 6 diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql b/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql index 748c733bf9a..523b057d4e1 100644 --- a/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql +++ b/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql @@ -1,13 +1,9 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-fasttest SET send_logs_level = 'fatal'; SET check_table_dependencies=0; -DROP DATABASE IF EXISTS database_for_dict; - -CREATE DATABASE database_for_dict; - -CREATE TABLE database_for_dict.table_for_dict +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.table_for_dict ( key_column UInt64, second_column UInt8, @@ -17,9 +13,9 @@ CREATE TABLE database_for_dict.table_for_dict ENGINE = MergeTree() ORDER BY key_column; -INSERT INTO database_for_dict.table_for_dict SELECT number, number % 17, toString(number * number), number / 2.0 from numbers(100); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_for_dict SELECT number, number % 17, toString(number * number), number / 2.0 from numbers(100); -CREATE DICTIONARY database_for_dict.dict1 +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict1 ( key_column UInt64 DEFAULT 0, second_column UInt8 DEFAULT 1, @@ -27,35 +23,35 @@ CREATE DICTIONARY database_for_dict.dict1 fourth_column Float64 DEFAULT 42.0 ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' PASSWORD '' DB currentDatabase())) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()); -SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(11)); -SELECT second_column FROM database_for_dict.dict1 WHERE key_column = 11; -SELECT dictGetString('database_for_dict.dict1', 'third_column', toUInt64(12)); -SELECT third_column FROM database_for_dict.dict1 WHERE key_column = 12; -SELECT dictGetFloat64('database_for_dict.dict1', 'fourth_column', toUInt64(14)); -SELECT fourth_column FROM database_for_dict.dict1 WHERE key_column = 14; +SELECT dictGetUInt8({CLICKHOUSE_DATABASE:String} || '.dict1', 'second_column', toUInt64(11)); +SELECT second_column FROM {CLICKHOUSE_DATABASE:Identifier}.dict1 WHERE key_column = 11; +SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.dict1', 'third_column', toUInt64(12)); +SELECT third_column FROM {CLICKHOUSE_DATABASE:Identifier}.dict1 WHERE key_column = 12; +SELECT dictGetFloat64({CLICKHOUSE_DATABASE:String} || '.dict1', 'fourth_column', toUInt64(14)); +SELECT fourth_column FROM {CLICKHOUSE_DATABASE:Identifier}.dict1 WHERE key_column = 14; -SELECT count(distinct(dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(number)))) from numbers(100); +SELECT count(distinct(dictGetUInt8({CLICKHOUSE_DATABASE:String} || '.dict1', 'second_column', toUInt64(number)))) from numbers(100); -DETACH DICTIONARY database_for_dict.dict1; +DETACH DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict1; -SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(11)); -- {serverError 36} +SELECT dictGetUInt8({CLICKHOUSE_DATABASE:String} || '.dict1', 'second_column', toUInt64(11)); -- {serverError 36} -ATTACH DICTIONARY database_for_dict.dict1; +ATTACH DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict1; -SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(11)); +SELECT dictGetUInt8({CLICKHOUSE_DATABASE:String} || '.dict1', 'second_column', toUInt64(11)); -DROP DICTIONARY database_for_dict.dict1; +DROP DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict1; -SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(11)); -- {serverError 36} +SELECT dictGetUInt8({CLICKHOUSE_DATABASE:String} || '.dict1', 'second_column', toUInt64(11)); -- {serverError 36} -- SOURCE(CLICKHOUSE(...)) uses default params if not specified -DROP DICTIONARY IF EXISTS database_for_dict.dict1; +DROP DICTIONARY IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.dict1; -CREATE DICTIONARY database_for_dict.dict1 +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict1 ( key_column UInt64 DEFAULT 0, second_column UInt8 DEFAULT 1, @@ -63,17 +59,17 @@ CREATE DICTIONARY database_for_dict.dict1 fourth_column Float64 DEFAULT 42.0 ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(TABLE 'table_for_dict' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(TABLE 'table_for_dict' DB currentDatabase())) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()); -SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(11)); +SELECT dictGetUInt8({CLICKHOUSE_DATABASE:String} || '.dict1', 'second_column', toUInt64(11)); -SELECT count(distinct(dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(number)))) from numbers(100); +SELECT count(distinct(dictGetUInt8({CLICKHOUSE_DATABASE:String} || '.dict1', 'second_column', toUInt64(number)))) from numbers(100); -DROP DICTIONARY database_for_dict.dict1; +DROP DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict1; -CREATE DICTIONARY database_for_dict.dict1 +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict1 ( key_column UInt64 DEFAULT 0, second_column UInt8 DEFAULT 1, @@ -81,69 +77,68 @@ CREATE DICTIONARY database_for_dict.dict1 fourth_column Float64 DEFAULT 42.0 ) PRIMARY KEY key_column, third_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB currentDatabase())) LIFETIME(MIN 1 MAX 10) LAYOUT(COMPLEX_KEY_CACHE(SIZE_IN_CELLS 1)); -SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', tuple(toUInt64(11), '121')); -SELECT dictGetFloat64('database_for_dict.dict1', 'fourth_column', tuple(toUInt64(14), '196')); +SELECT dictGetUInt8({CLICKHOUSE_DATABASE:String} || '.dict1', 'second_column', tuple(toUInt64(11), '121')); +SELECT dictGetFloat64({CLICKHOUSE_DATABASE:String} || '.dict1', 'fourth_column', tuple(toUInt64(14), '196')); -DETACH DICTIONARY database_for_dict.dict1; +DETACH DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict1; -SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', tuple(toUInt64(11), '121')); -- {serverError 36} +SELECT dictGetUInt8({CLICKHOUSE_DATABASE:String} || '.dict1', 'second_column', tuple(toUInt64(11), '121')); -- {serverError 36} -ATTACH DICTIONARY database_for_dict.dict1; +ATTACH DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict1; -SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', tuple(toUInt64(11), '121')); +SELECT dictGetUInt8({CLICKHOUSE_DATABASE:String} || '.dict1', 'second_column', tuple(toUInt64(11), '121')); -CREATE DICTIONARY database_for_dict.dict2 +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict2 ( key_column UInt64 DEFAULT 0, some_column String EXPRESSION toString(fourth_column), fourth_column Float64 DEFAULT 42.0 ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB currentDatabase())) LIFETIME(MIN 1 MAX 10) LAYOUT(HASHED()); -SELECT dictGetString('database_for_dict.dict2', 'some_column', toUInt64(12)); +SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.dict2', 'some_column', toUInt64(12)); -SELECT name, engine FROM system.tables WHERE database = 'database_for_dict' ORDER BY name; +-- NOTE: database = currentDatabase() is not mandatory +SELECT name, engine FROM system.tables WHERE database = {CLICKHOUSE_DATABASE:String} ORDER BY name; -SELECT database, name, type FROM system.dictionaries WHERE database = 'database_for_dict' ORDER BY name; +SELECT database, name, type FROM system.dictionaries WHERE database = {CLICKHOUSE_DATABASE:String} ORDER BY name; -- check dictionary will not update -CREATE DICTIONARY database_for_dict.dict3 +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict3 ( key_column UInt64 DEFAULT 0, some_column String EXPRESSION toString(fourth_column), fourth_column Float64 DEFAULT 42.0 ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB currentDatabase())) LIFETIME(0) LAYOUT(HASHED()); -SELECT dictGetString('database_for_dict.dict3', 'some_column', toUInt64(12)); +SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.dict3', 'some_column', toUInt64(12)); -- dictGet with table name -USE database_for_dict; +USE {CLICKHOUSE_DATABASE:Identifier}; SELECT dictGetString(dict3, 'some_column', toUInt64(12)); -SELECT dictGetString(database_for_dict.dict3, 'some_column', toUInt64(12)); +SELECT dictGetString({CLICKHOUSE_DATABASE:Identifier}.dict3, 'some_column', toUInt64(12)); SELECT dictGetString(default.dict3, 'some_column', toUInt64(12)); -- {serverError 36} SELECT dictGet(dict3, 'some_column', toUInt64(12)); -SELECT dictGet(database_for_dict.dict3, 'some_column', toUInt64(12)); +SELECT dictGet({CLICKHOUSE_DATABASE:Identifier}.dict3, 'some_column', toUInt64(12)); SELECT dictGet(default.dict3, 'some_column', toUInt64(12)); -- {serverError 36} USE default; -- alias should be handled correctly -SELECT 'database_for_dict.dict3' as n, dictGet(n, 'some_column', toUInt64(12)); +SELECT {CLICKHOUSE_DATABASE:String} || '.dict3' as n, dictGet(n, 'some_column', toUInt64(12)); -DROP TABLE database_for_dict.table_for_dict; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.table_for_dict; SYSTEM RELOAD DICTIONARIES; -- {serverError 60} -SELECT dictGetString('database_for_dict.dict3', 'some_column', toUInt64(12)); - -DROP DATABASE IF EXISTS database_for_dict; +SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.dict3', 'some_column', toUInt64(12)); diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_special.sql b/tests/queries/0_stateless/01018_ddl_dictionaries_special.sql index 1877c289d3f..51e1eb2e12f 100644 --- a/tests/queries/0_stateless/01018_ddl_dictionaries_special.sql +++ b/tests/queries/0_stateless/01018_ddl_dictionaries_special.sql @@ -1,14 +1,10 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-fasttest SET send_logs_level = 'fatal'; -DROP DATABASE IF EXISTS database_for_dict; - -CREATE DATABASE database_for_dict; - SELECT '***date dict***'; -CREATE TABLE database_for_dict.date_table +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.date_table ( CountryID UInt64, StartDate Date, @@ -18,11 +14,11 @@ CREATE TABLE database_for_dict.date_table ENGINE = MergeTree() ORDER BY CountryID; -INSERT INTO database_for_dict.date_table VALUES(1, toDate('2019-05-05'), toDate('2019-05-20'), 0.33); -INSERT INTO database_for_dict.date_table VALUES(1, toDate('2019-05-21'), toDate('2019-05-30'), 0.42); -INSERT INTO database_for_dict.date_table VALUES(2, toDate('2019-05-21'), toDate('2019-05-30'), 0.46); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.date_table VALUES(1, toDate('2019-05-05'), toDate('2019-05-20'), 0.33); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.date_table VALUES(1, toDate('2019-05-21'), toDate('2019-05-30'), 0.42); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.date_table VALUES(2, toDate('2019-05-21'), toDate('2019-05-30'), 0.46); -CREATE DICTIONARY database_for_dict.dict1 +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict1 ( CountryID UInt64, StartDate Date, @@ -30,19 +26,19 @@ CREATE DICTIONARY database_for_dict.dict1 Tax Float64 ) PRIMARY KEY CountryID -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'date_table' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'date_table' DB currentDatabase())) LIFETIME(MIN 1 MAX 1000) LAYOUT(RANGE_HASHED()) RANGE(MIN StartDate MAX EndDate); -SELECT dictGetFloat64('database_for_dict.dict1', 'Tax', toUInt64(1), toDate('2019-05-15')); -SELECT dictGetFloat64('database_for_dict.dict1', 'Tax', toUInt64(1), toDate('2019-05-29')); -SELECT dictGetFloat64('database_for_dict.dict1', 'Tax', toUInt64(2), toDate('2019-05-29')); -SELECT dictGetFloat64('database_for_dict.dict1', 'Tax', toUInt64(2), toDate('2019-05-31')); +SELECT dictGetFloat64({CLICKHOUSE_DATABASE:String} || '.dict1', 'Tax', toUInt64(1), toDate('2019-05-15')); +SELECT dictGetFloat64({CLICKHOUSE_DATABASE:String} || '.dict1', 'Tax', toUInt64(1), toDate('2019-05-29')); +SELECT dictGetFloat64({CLICKHOUSE_DATABASE:String} || '.dict1', 'Tax', toUInt64(2), toDate('2019-05-29')); +SELECT dictGetFloat64({CLICKHOUSE_DATABASE:String} || '.dict1', 'Tax', toUInt64(2), toDate('2019-05-31')); SELECT '***datetime dict***'; -CREATE TABLE database_for_dict.datetime_table +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.datetime_table ( CountryID UInt64, StartDate DateTime, @@ -52,11 +48,11 @@ CREATE TABLE database_for_dict.datetime_table ENGINE = MergeTree() ORDER BY CountryID; -INSERT INTO database_for_dict.datetime_table VALUES(1, toDateTime('2019-05-05 00:00:00'), toDateTime('2019-05-20 00:00:00'), 0.33); -INSERT INTO database_for_dict.datetime_table VALUES(1, toDateTime('2019-05-21 00:00:00'), toDateTime('2019-05-30 00:00:00'), 0.42); -INSERT INTO database_for_dict.datetime_table VALUES(2, toDateTime('2019-05-21 00:00:00'), toDateTime('2019-05-30 00:00:00'), 0.46); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.datetime_table VALUES(1, toDateTime('2019-05-05 00:00:00'), toDateTime('2019-05-20 00:00:00'), 0.33); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.datetime_table VALUES(1, toDateTime('2019-05-21 00:00:00'), toDateTime('2019-05-30 00:00:00'), 0.42); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.datetime_table VALUES(2, toDateTime('2019-05-21 00:00:00'), toDateTime('2019-05-30 00:00:00'), 0.46); -CREATE DICTIONARY database_for_dict.dict2 +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict2 ( CountryID UInt64, StartDate DateTime, @@ -64,19 +60,19 @@ CREATE DICTIONARY database_for_dict.dict2 Tax Float64 ) PRIMARY KEY CountryID -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'datetime_table' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'datetime_table' DB currentDatabase())) LIFETIME(MIN 1 MAX 1000) LAYOUT(RANGE_HASHED()) RANGE(MIN StartDate MAX EndDate); -SELECT dictGetFloat64('database_for_dict.dict2', 'Tax', toUInt64(1), toDateTime('2019-05-15 00:00:00')); -SELECT dictGetFloat64('database_for_dict.dict2', 'Tax', toUInt64(1), toDateTime('2019-05-29 00:00:00')); -SELECT dictGetFloat64('database_for_dict.dict2', 'Tax', toUInt64(2), toDateTime('2019-05-29 00:00:00')); -SELECT dictGetFloat64('database_for_dict.dict2', 'Tax', toUInt64(2), toDateTime('2019-05-31 00:00:00')); +SELECT dictGetFloat64({CLICKHOUSE_DATABASE:String} || '.dict2', 'Tax', toUInt64(1), toDateTime('2019-05-15 00:00:00')); +SELECT dictGetFloat64({CLICKHOUSE_DATABASE:String} || '.dict2', 'Tax', toUInt64(1), toDateTime('2019-05-29 00:00:00')); +SELECT dictGetFloat64({CLICKHOUSE_DATABASE:String} || '.dict2', 'Tax', toUInt64(2), toDateTime('2019-05-29 00:00:00')); +SELECT dictGetFloat64({CLICKHOUSE_DATABASE:String} || '.dict2', 'Tax', toUInt64(2), toDateTime('2019-05-31 00:00:00')); SELECT '***hierarchy dict***'; -CREATE TABLE database_for_dict.table_with_hierarchy +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.table_with_hierarchy ( RegionID UInt64, ParentRegionID UInt64, @@ -85,24 +81,24 @@ CREATE TABLE database_for_dict.table_with_hierarchy ENGINE = MergeTree() ORDER BY RegionID; -INSERT INTO database_for_dict.table_with_hierarchy VALUES (3, 2, 'Hamovniki'), (2, 1, 'Moscow'), (1, 10000, 'Russia') (7, 10000, 'Ulan-Ude'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_with_hierarchy VALUES (3, 2, 'Hamovniki'), (2, 1, 'Moscow'), (1, 10000, 'Russia') (7, 10000, 'Ulan-Ude'); -CREATE DICTIONARY database_for_dict.dictionary_with_hierarchy +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dictionary_with_hierarchy ( RegionID UInt64, ParentRegionID UInt64 HIERARCHICAL, RegionName String ) PRIMARY KEY RegionID -SOURCE(CLICKHOUSE(host 'localhost' port tcpPort() user 'default' db 'database_for_dict' table 'table_with_hierarchy')) +SOURCE(CLICKHOUSE(host 'localhost' port tcpPort() user 'default' db currentDatabase() table 'table_with_hierarchy')) LAYOUT(HASHED()) LIFETIME(MIN 1 MAX 1000); -SELECT dictGetString('database_for_dict.dictionary_with_hierarchy', 'RegionName', toUInt64(2)); -SELECT dictGetHierarchy('database_for_dict.dictionary_with_hierarchy', toUInt64(3)); -SELECT dictIsIn('database_for_dict.dictionary_with_hierarchy', toUInt64(3), toUInt64(2)); -SELECT dictIsIn('database_for_dict.dictionary_with_hierarchy', toUInt64(7), toUInt64(10000)); -SELECT dictIsIn('database_for_dict.dictionary_with_hierarchy', toUInt64(1), toUInt64(5)); +SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.dictionary_with_hierarchy', 'RegionName', toUInt64(2)); +SELECT dictGetHierarchy({CLICKHOUSE_DATABASE:String} || '.dictionary_with_hierarchy', toUInt64(3)); +SELECT dictIsIn({CLICKHOUSE_DATABASE:String} || '.dictionary_with_hierarchy', toUInt64(3), toUInt64(2)); +SELECT dictIsIn({CLICKHOUSE_DATABASE:String} || '.dictionary_with_hierarchy', toUInt64(7), toUInt64(10000)); +SELECT dictIsIn({CLICKHOUSE_DATABASE:String} || '.dictionary_with_hierarchy', toUInt64(1), toUInt64(5)); -DROP DATABASE IF EXISTS database_for_dict; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; diff --git a/tests/queries/0_stateless/01018_ip_dictionary_long.sql b/tests/queries/0_stateless/01018_ip_dictionary_long.sql index 647c36429cc..bb7f120163c 100644 --- a/tests/queries/0_stateless/01018_ip_dictionary_long.sql +++ b/tests/queries/0_stateless/01018_ip_dictionary_long.sql @@ -1,13 +1,9 @@ --- Tags: long, no-parallel +-- Tags: long SET send_logs_level = 'fatal'; -DROP DATABASE IF EXISTS database_for_dict; - -CREATE DATABASE database_for_dict; - SELECT '***ipv4 trie dict***'; -CREATE TABLE database_for_dict.table_ipv4_trie +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie ( prefix String, asn UInt32, @@ -16,648 +12,648 @@ CREATE TABLE database_for_dict.table_ipv4_trie engine = TinyLog; -- numbers reordered to test sorting criteria too -INSERT INTO database_for_dict.table_ipv4_trie +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie SELECT '255.255.255.255/' || toString((number + 1) * 13 % 33) AS prefix, toUInt32((number + 1) * 13 % 33) AS asn, 'NA' as cca2 FROM system.numbers LIMIT 33; -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.0.0.2', 1272, 'RU'); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.0.0.0/8', 1270, 'RU'); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('202.79.32.2', 11211, 'NP'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.0.0.2', 1272, 'RU'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.0.0.0/8', 1270, 'RU'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('202.79.32.2', 11211, 'NP'); -- non-unique entries will be squashed into one -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('202.79.32.2', 11211, 'NP'); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('202.79.32.2', 11211, 'NP'); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('202.79.32.2', 11211, 'NP'); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('101.79.55.22', 11212, 'UK'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('202.79.32.2', 11211, 'NP'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('202.79.32.2', 11211, 'NP'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('202.79.32.2', 11211, 'NP'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('101.79.55.22', 11212, 'UK'); -CREATE DICTIONARY database_for_dict.dict_ipv4_trie +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict_ipv4_trie ( prefix String, asn UInt32, cca2 String ) PRIMARY KEY prefix -SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db 'database_for_dict' table 'table_ipv4_trie')) +SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db currentDatabase() table 'table_ipv4_trie')) LAYOUT(IP_TRIE()) LIFETIME(MIN 10 MAX 100); -- fuzzer -SELECT '127.0.0.0/24' = dictGetString('database_for_dict.dict_ipv4_trie', 'prefixprefixprefixprefix', tuple(IPv4StringToNumOrDefault('127.0.0.0127.0.0.0'))); -- { serverError 36 } +SELECT '127.0.0.0/24' = dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'prefixprefixprefixprefix', tuple(IPv4StringToNumOrDefault('127.0.0.0127.0.0.0'))); -- { serverError 36 } -SELECT 0 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('0.0.0.0'))); -SELECT 1 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('128.0.0.0'))); -SELECT 2 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('192.0.0.0'))); -SELECT 3 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('224.0.0.0'))); -SELECT 4 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('240.0.0.0'))); -SELECT 5 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('248.0.0.0'))); -SELECT 6 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('252.0.0.0'))); -SELECT 7 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('254.0.0.0'))); -SELECT 8 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.0.0.0'))); -SELECT 9 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.128.0.0'))); -SELECT 10 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.192.0.0'))); -SELECT 11 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.224.0.0'))); -SELECT 12 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.240.0.0'))); -SELECT 13 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.248.0.0'))); -SELECT 14 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.252.0.0'))); -SELECT 15 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.254.0.0'))); -SELECT 16 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.0.0'))); -SELECT 17 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.128.0'))); -SELECT 18 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.192.0'))); -SELECT 19 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.224.0'))); -SELECT 20 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.240.0'))); -SELECT 21 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.248.0'))); -SELECT 22 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.252.0'))); -SELECT 23 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.254.0'))); -SELECT 24 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.0'))); -SELECT 25 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.128'))); -SELECT 26 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.192'))); -SELECT 27 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.224'))); -SELECT 28 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.240'))); -SELECT 29 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.248'))); -SELECT 30 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.252'))); -SELECT 31 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.254'))); -SELECT 32 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.255'))); +SELECT 0 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('0.0.0.0'))); +SELECT 1 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('128.0.0.0'))); +SELECT 2 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('192.0.0.0'))); +SELECT 3 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('224.0.0.0'))); +SELECT 4 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('240.0.0.0'))); +SELECT 5 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('248.0.0.0'))); +SELECT 6 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('252.0.0.0'))); +SELECT 7 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('254.0.0.0'))); +SELECT 8 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.0.0.0'))); +SELECT 9 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.128.0.0'))); +SELECT 10 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.192.0.0'))); +SELECT 11 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.224.0.0'))); +SELECT 12 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.240.0.0'))); +SELECT 13 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.248.0.0'))); +SELECT 14 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.252.0.0'))); +SELECT 15 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.254.0.0'))); +SELECT 16 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.0.0'))); +SELECT 17 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.128.0'))); +SELECT 18 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.192.0'))); +SELECT 19 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.224.0'))); +SELECT 20 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.240.0'))); +SELECT 21 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.248.0'))); +SELECT 22 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.252.0'))); +SELECT 23 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.254.0'))); +SELECT 24 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.0'))); +SELECT 25 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.128'))); +SELECT 26 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.192'))); +SELECT 27 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.224'))); +SELECT 28 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.240'))); +SELECT 29 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.248'))); +SELECT 30 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.252'))); +SELECT 31 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.254'))); +SELECT 32 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('255.255.255.255'))); -SELECT 'RU' == dictGetString('database_for_dict.dict_ipv4_trie', 'cca2', tuple(IPv4StringToNum('127.0.0.1'))); +SELECT 'RU' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'cca2', tuple(IPv4StringToNum('127.0.0.1'))); -SELECT 1270 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('127.0.0.0'))); -SELECT 1270 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('127.0.0.1'))); -SELECT 1272 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('127.0.0.2'))); -SELECT 1270 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('127.0.0.3'))); -SELECT 1270 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('127.0.0.255'))); +SELECT 1270 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('127.0.0.0'))); +SELECT 1270 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('127.0.0.1'))); +SELECT 1272 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('127.0.0.2'))); +SELECT 1270 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('127.0.0.3'))); +SELECT 1270 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('127.0.0.255'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.0'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.1'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.2'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.3'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.255'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.0'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.1'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.2'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.3'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.255'))); -SELECT 11212 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('101.79.55.22'))); -SELECT 11212 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv6StringToNum('::ffff:654f:3716'))); -SELECT 11212 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv6StringToNum('::ffff:101.79.55.22'))); +SELECT 11212 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('101.79.55.22'))); +SELECT 11212 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv6StringToNum('::ffff:654f:3716'))); +SELECT 11212 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv6StringToNum('::ffff:101.79.55.22'))); -SELECT 11211 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('202.79.32.2'))); +SELECT 11211 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(IPv4StringToNum('202.79.32.2'))); -- check that dictionary works with aliased types `IPv4` and `IPv6` -SELECT 11211 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(toIPv4('202.79.32.2'))); -SELECT 11212 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'asn', tuple(toIPv6('::ffff:101.79.55.22'))); +SELECT 11211 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(toIPv4('202.79.32.2'))); +SELECT 11212 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'asn', tuple(toIPv6('::ffff:101.79.55.22'))); -CREATE TABLE database_for_dict.table_from_ipv4_trie_dict +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.table_from_ipv4_trie_dict ( prefix String, asn UInt32, cca2 String -) ENGINE = Dictionary(database_for_dict.dict_ipv4_trie); +) ENGINE = Dictionary({CLICKHOUSE_DATABASE:Identifier}.dict_ipv4_trie); -SELECT 1272 == asn AND 'RU' == cca2 FROM database_for_dict.table_from_ipv4_trie_dict +SELECT 1272 == asn AND 'RU' == cca2 FROM {CLICKHOUSE_DATABASE:Identifier}.table_from_ipv4_trie_dict WHERE prefix == '127.0.0.2/32'; -SELECT 37 == COUNT(*) FROM database_for_dict.table_from_ipv4_trie_dict; -SELECT 37 == COUNT(DISTINCT prefix) FROM database_for_dict.table_from_ipv4_trie_dict; +SELECT 37 == COUNT(*) FROM {CLICKHOUSE_DATABASE:Identifier}.table_from_ipv4_trie_dict; +SELECT 37 == COUNT(DISTINCT prefix) FROM {CLICKHOUSE_DATABASE:Identifier}.table_from_ipv4_trie_dict; -DROP TABLE IF EXISTS database_for_dict.table_from_ipv4_trie_dict; -DROP DICTIONARY IF EXISTS database_for_dict.dict_ipv4_trie; -DROP TABLE IF EXISTS database_for_dict.table_ipv4_trie; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.table_from_ipv4_trie_dict; +DROP DICTIONARY IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.dict_ipv4_trie; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie; SELECT '***ipv4 trie dict mask***'; -CREATE TABLE database_for_dict.table_ipv4_trie +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie ( prefix String, val UInt32 ) engine = TinyLog; -INSERT INTO database_for_dict.table_ipv4_trie +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie SELECT '255.255.255.255/' || toString(number) AS prefix, toUInt32(number) AS val FROM VALUES ('number UInt32', 5, 13, 24, 30); -CREATE DICTIONARY database_for_dict.dict_ipv4_trie +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict_ipv4_trie ( prefix String, val UInt32 ) PRIMARY KEY prefix -SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db 'database_for_dict' table 'table_ipv4_trie')) +SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db currentDatabase() table 'table_ipv4_trie')) LAYOUT(IP_TRIE()) LIFETIME(MIN 10 MAX 100); -SELECT 0 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('0.0.0.0'))); -SELECT 0 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('128.0.0.0'))); -SELECT 0 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('192.0.0.0'))); -SELECT 0 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('224.0.0.0'))); -SELECT 0 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('240.0.0.0'))); -SELECT 5 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('248.0.0.0'))); -SELECT 5 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('252.0.0.0'))); -SELECT 5 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('254.0.0.0'))); -SELECT 5 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.0.0.0'))); -SELECT 5 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.128.0.0'))); -SELECT 5 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.192.0.0'))); -SELECT 5 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.224.0.0'))); -SELECT 5 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.240.0.0'))); -SELECT 13 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.248.0.0'))); -SELECT 13 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.252.0.0'))); -SELECT 13 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.254.0.0'))); -SELECT 13 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.0.0'))); -SELECT 13 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.128.0'))); -SELECT 13 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.192.0'))); -SELECT 13 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.224.0'))); -SELECT 13 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.240.0'))); -SELECT 13 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.248.0'))); -SELECT 13 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.252.0'))); -SELECT 13 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.254.0'))); -SELECT 24 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.0'))); -SELECT 24 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.128'))); -SELECT 24 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.192'))); -SELECT 24 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.224'))); -SELECT 24 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.240'))); -SELECT 24 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.248'))); -SELECT 30 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.252'))); -SELECT 30 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.254'))); -SELECT 30 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.255'))); +SELECT 0 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('0.0.0.0'))); +SELECT 0 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('128.0.0.0'))); +SELECT 0 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('192.0.0.0'))); +SELECT 0 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('224.0.0.0'))); +SELECT 0 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('240.0.0.0'))); +SELECT 5 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('248.0.0.0'))); +SELECT 5 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('252.0.0.0'))); +SELECT 5 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('254.0.0.0'))); +SELECT 5 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.0.0.0'))); +SELECT 5 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.128.0.0'))); +SELECT 5 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.192.0.0'))); +SELECT 5 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.224.0.0'))); +SELECT 5 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.240.0.0'))); +SELECT 13 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.248.0.0'))); +SELECT 13 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.252.0.0'))); +SELECT 13 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.254.0.0'))); +SELECT 13 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.0.0'))); +SELECT 13 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.128.0'))); +SELECT 13 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.192.0'))); +SELECT 13 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.224.0'))); +SELECT 13 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.240.0'))); +SELECT 13 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.248.0'))); +SELECT 13 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.252.0'))); +SELECT 13 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.254.0'))); +SELECT 24 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.0'))); +SELECT 24 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.128'))); +SELECT 24 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.192'))); +SELECT 24 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.224'))); +SELECT 24 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.240'))); +SELECT 24 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.248'))); +SELECT 30 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.252'))); +SELECT 30 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.254'))); +SELECT 30 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('255.255.255.255'))); -DROP DICTIONARY IF EXISTS database_for_dict.dict_ipv4_trie; -DROP TABLE IF EXISTS database_for_dict.table_from_ipv4_trie_dict; -DROP TABLE IF EXISTS database_for_dict.table_ipv4_trie; +DROP DICTIONARY IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.dict_ipv4_trie; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.table_from_ipv4_trie_dict; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie; SELECT '***ipv4 trie dict pt2***'; -CREATE TABLE database_for_dict.table_ipv4_trie ( prefix String, val UInt32 ) engine = TinyLog; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie ( prefix String, val UInt32 ) engine = TinyLog; -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.0.0.0/8', 1); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.0.0.0/16', 2); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.0.0.0/24', 3); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.0.0.1/32', 4); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.0.127.0/32', 5); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.0.128.1/32', 6); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.0.255.0/32', 7); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.0.255.1/32', 8); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.0.255.255/32', 9); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.1.0.0/16', 10); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.1.1.0', 11); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.1.255.0/24', 12); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.254.0.0/15', 13); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.254.0.127', 14); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.255.0.0/16', 15); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.255.128.0/24', 16); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.255.128.1/32', 17); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.255.128.10/32', 18); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.255.128.128/25', 19); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.255.255.128/32', 20); -INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.255.255.255/32', 21); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.0.0.0/8', 1); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.0.0.0/16', 2); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.0.0.0/24', 3); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.0.0.1/32', 4); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.0.127.0/32', 5); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.0.128.1/32', 6); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.0.255.0/32', 7); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.0.255.1/32', 8); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.0.255.255/32', 9); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.1.0.0/16', 10); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.1.1.0', 11); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.1.255.0/24', 12); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.254.0.0/15', 13); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.254.0.127', 14); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.255.0.0/16', 15); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.255.128.0/24', 16); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.255.128.1/32', 17); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.255.128.10/32', 18); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.255.128.128/25', 19); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.255.255.128/32', 20); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ipv4_trie VALUES ('127.255.255.255/32', 21); -CREATE DICTIONARY database_for_dict.dict_ipv4_trie ( prefix String, val UInt32 ) +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict_ipv4_trie ( prefix String, val UInt32 ) PRIMARY KEY prefix -SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db 'database_for_dict' table 'table_ipv4_trie')) +SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db currentDatabase() table 'table_ipv4_trie')) LAYOUT(IP_TRIE(ACCESS_TO_KEY_FROM_ATTRIBUTES 1)) LIFETIME(MIN 10 MAX 100); -SELECT '127.0.0.0/24' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.0.0.0'))); -SELECT '127.0.0.1/32' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.0.0.1'))); -SELECT '127.0.0.0/24' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.0.0.127'))); -SELECT '127.0.0.0/16' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.0.255.127'))); -SELECT '127.255.0.0/16' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.127.127'))); -SELECT '127.255.128.0/24' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.128.9'))); -SELECT '127.255.128.0/24' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.128.127'))); -SELECT '127.255.128.10/32' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.128.10'))); -SELECT '127.255.128.128/25' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.128.255'))); -SELECT '127.255.255.128/32' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.255.128'))); +SELECT '127.0.0.0/24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.0.0.0'))); +SELECT '127.0.0.1/32' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.0.0.1'))); +SELECT '127.0.0.0/24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.0.0.127'))); +SELECT '127.0.0.0/16' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.0.255.127'))); +SELECT '127.255.0.0/16' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.127.127'))); +SELECT '127.255.128.0/24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.128.9'))); +SELECT '127.255.128.0/24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.128.127'))); +SELECT '127.255.128.10/32' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.128.10'))); +SELECT '127.255.128.128/25' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.128.255'))); +SELECT '127.255.255.128/32' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.255.128'))); -SELECT 3 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.0.0'))); -SELECT 4 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.0.1'))); -SELECT 3 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.0.127'))); -SELECT 2 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.255.127'))); -SELECT 15 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.127.127'))); -SELECT 16 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.128.9'))); -SELECT 16 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.128.127'))); -SELECT 18 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.128.10'))); -SELECT 19 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.128.255'))); -SELECT 20 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.255.128'))); +SELECT 3 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.0.0'))); +SELECT 4 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.0.1'))); +SELECT 3 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.0.127'))); +SELECT 2 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.255.127'))); +SELECT 15 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.127.127'))); +SELECT 16 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.128.9'))); +SELECT 16 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.128.127'))); +SELECT 18 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.128.10'))); +SELECT 19 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.128.255'))); +SELECT 20 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.255.128'))); -SELECT 3 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7f00:0'))); -SELECT 4 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7f00:1'))); -SELECT 3 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7f00:7f'))); -SELECT 2 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7f00:ff7f'))); -SELECT 15 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:7f7f'))); -SELECT 16 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:8009'))); -SELECT 16 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:807f'))); -SELECT 18 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:800a'))); -SELECT 19 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:80ff'))); -SELECT 20 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:ff80'))); +SELECT 3 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7f00:0'))); +SELECT 4 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7f00:1'))); +SELECT 3 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7f00:7f'))); +SELECT 2 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7f00:ff7f'))); +SELECT 15 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:7f7f'))); +SELECT 16 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:8009'))); +SELECT 16 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:807f'))); +SELECT 18 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:800a'))); +SELECT 19 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:80ff'))); +SELECT 20 == dictGetUInt32({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:ff80'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.0'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.1'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.127'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.255.127'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.255.127.127'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.255.128.9'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.255.128.127'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.255.128.10'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.255.128.255'))); -SELECT 1 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('127.255.255.128'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.0'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.1'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.0.127'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.0.255.127'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.255.127.127'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.255.128.9'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.255.128.127'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.255.128.10'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.255.128.255'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('127.255.255.128'))); -SELECT 0 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('128.127.127.127'))); -SELECT 0 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('128.127.127.0'))); -SELECT 0 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('255.127.127.0'))); -SELECT 0 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('255.0.0.0'))); -SELECT 0 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('0.0.0.0'))); -SELECT 0 == dictHas('database_for_dict.dict_ipv4_trie', tuple(IPv4StringToNum('1.1.1.1'))); +SELECT 0 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('128.127.127.127'))); +SELECT 0 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('128.127.127.0'))); +SELECT 0 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('255.127.127.0'))); +SELECT 0 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('255.0.0.0'))); +SELECT 0 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('0.0.0.0'))); +SELECT 0 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', tuple(IPv4StringToNum('1.1.1.1'))); SELECT '***ipv6 trie dict***'; -CREATE TABLE database_for_dict.table_ip_trie +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.table_ip_trie ( prefix String, val String ) engine = TinyLog; -INSERT INTO database_for_dict.table_ip_trie VALUES ('101.79.55.22', 'JA'), ('127.0.0.1', 'RU'), ('2620:0:870::/48', 'US'), ('2a02:6b8:1::/48', 'UK'), ('2001:db8::/32', 'ZZ'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ip_trie VALUES ('101.79.55.22', 'JA'), ('127.0.0.1', 'RU'), ('2620:0:870::/48', 'US'), ('2a02:6b8:1::/48', 'UK'), ('2001:db8::/32', 'ZZ'); -INSERT INTO database_for_dict.table_ip_trie +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ip_trie SELECT 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/' || toString((number + 1) * 13 % 129) AS prefix, toString((number + 1) * 13 % 129) AS val FROM system.numbers LIMIT 129; -CREATE DICTIONARY database_for_dict.dict_ip_trie +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict_ip_trie ( prefix String, val String ) PRIMARY KEY prefix -SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db 'database_for_dict' table 'table_ip_trie')) +SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db currentDatabase() table 'table_ip_trie')) LAYOUT(IP_TRIE(ACCESS_TO_KEY_FROM_ATTRIBUTES 1)) LIFETIME(MIN 10 MAX 100); -SELECT 'US' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('2620:0:870::'))); -SELECT 'UK' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('2a02:6b8:1::'))); -SELECT 'ZZ' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('2001:db8::'))); -SELECT 'ZZ' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('2001:db8:ffff::'))); +SELECT 'US' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('2620:0:870::'))); +SELECT 'UK' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('2a02:6b8:1::'))); +SELECT 'ZZ' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('2001:db8::'))); +SELECT 'ZZ' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('2001:db8:ffff::'))); -SELECT 1 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('2001:db8:ffff::'))); -SELECT 1 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('2001:db8:ffff:ffff::'))); -SELECT 1 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('2001:db8:ffff:1::'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', tuple(IPv6StringToNum('2001:db8:ffff::'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', tuple(IPv6StringToNum('2001:db8:ffff:ffff::'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', tuple(IPv6StringToNum('2001:db8:ffff:1::'))); -SELECT '0' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('654f:3716::'))); +SELECT '0' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('654f:3716::'))); -SELECT 'JA' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('::ffff:654f:3716'))); -SELECT 'JA' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('::ffff:101.79.55.22'))); -SELECT 'JA' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('101.79.55.22'))); -SELECT 1 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv4StringToNum('127.0.0.1'))); -SELECT 1 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('::ffff:127.0.0.1'))); +SELECT 'JA' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('::ffff:654f:3716'))); +SELECT 'JA' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('::ffff:101.79.55.22'))); +SELECT 'JA' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('101.79.55.22'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', tuple(IPv4StringToNum('127.0.0.1'))); +SELECT 1 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', tuple(IPv6StringToNum('::ffff:127.0.0.1'))); -SELECT '2620:0:870::/48' == dictGetString('database_for_dict.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('2620:0:870::'))); -SELECT '2a02:6b8:1::/48' == dictGetString('database_for_dict.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('2a02:6b8:1::1'))); -SELECT '2001:db8::/32' == dictGetString('database_for_dict.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('2001:db8::1'))); -SELECT '::ffff:101.79.55.22/128' == dictGetString('database_for_dict.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('::ffff:654f:3716'))); -SELECT '::ffff:101.79.55.22/128' == dictGetString('database_for_dict.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('::ffff:101.79.55.22'))); +SELECT '2620:0:870::/48' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('2620:0:870::'))); +SELECT '2a02:6b8:1::/48' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('2a02:6b8:1::1'))); +SELECT '2001:db8::/32' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('2001:db8::1'))); +SELECT '::ffff:101.79.55.22/128' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('::ffff:654f:3716'))); +SELECT '::ffff:101.79.55.22/128' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('::ffff:101.79.55.22'))); -SELECT '0' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('::0'))); -SELECT '1' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('8000::'))); -SELECT '2' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('c000::'))); -SELECT '3' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('e000::'))); -SELECT '4' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('f000::'))); -SELECT '5' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('f800::'))); -SELECT '6' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('fc00::'))); -SELECT '7' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('fe00::'))); -SELECT '8' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ff00::'))); -SELECT '9' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ff80::'))); -SELECT '10' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffc0::'))); -SELECT '11' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffe0::'))); -SELECT '12' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('fff0::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('fff8::'))); -SELECT '14' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('fffc::'))); -SELECT '15' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('fffe::'))); -SELECT '16' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff::'))); -SELECT '17' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:8000::'))); -SELECT '18' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:c000::'))); -SELECT '19' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:e000::'))); -SELECT '20' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f000::'))); -SELECT '21' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f800::'))); -SELECT '22' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fc00::'))); -SELECT '18' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:c000::'))); -SELECT '19' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:e000::'))); -SELECT '20' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f000::'))); -SELECT '21' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f800::'))); -SELECT '22' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fc00::'))); -SELECT '23' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fe00::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ff00::'))); -SELECT '25' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ff80::'))); -SELECT '26' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffc0::'))); -SELECT '27' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffe0::'))); -SELECT '28' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fff0::'))); -SELECT '29' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fff8::'))); -SELECT '30' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fffc::'))); -SELECT '31' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fffe::'))); -SELECT '32' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff::'))); -SELECT '33' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:8000::'))); -SELECT '34' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:c000::'))); -SELECT '35' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:e000::'))); -SELECT '36' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:f000::'))); -SELECT '37' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:f800::'))); -SELECT '38' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fc00::'))); -SELECT '39' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fe00::'))); -SELECT '40' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ff00::'))); -SELECT '41' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ff80::'))); -SELECT '42' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffc0::'))); -SELECT '43' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffe0::'))); -SELECT '44' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fff0::'))); -SELECT '45' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fff8::'))); -SELECT '46' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fffc::'))); -SELECT '47' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fffe::'))); -SELECT '48' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:8000::'))); -SELECT '50' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:c000::'))); -SELECT '51' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:e000::'))); -SELECT '52' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:f000::'))); -SELECT '53' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:f800::'))); -SELECT '54' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fc00::'))); -SELECT '55' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fe00::'))); -SELECT '56' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ff00::'))); -SELECT '57' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ff80::'))); -SELECT '58' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffc0::'))); -SELECT '59' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffe0::'))); -SELECT '60' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fff0::'))); -SELECT '61' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fff8::'))); -SELECT '62' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fffc::'))); -SELECT '63' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fffe::'))); -SELECT '64' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff::'))); -SELECT '65' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:8000::'))); -SELECT '66' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:c000::'))); -SELECT '67' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:e000::'))); -SELECT '68' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:f000::'))); -SELECT '69' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:f800::'))); -SELECT '70' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fc00::'))); -SELECT '71' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fe00::'))); -SELECT '72' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ff00::'))); -SELECT '73' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ff80::'))); -SELECT '74' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffc0::'))); -SELECT '75' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffe0::'))); -SELECT '76' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fff0::'))); -SELECT '77' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fff8::'))); -SELECT '78' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fffc::'))); -SELECT '79' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fffe::'))); -SELECT '80' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff::'))); -SELECT '81' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:8000::'))); -SELECT '82' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:c000::'))); -SELECT '83' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:e000::'))); -SELECT '84' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:f000::'))); -SELECT '85' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:f800::'))); -SELECT '86' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fc00::'))); -SELECT '87' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fe00::'))); -SELECT '88' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ff00::'))); -SELECT '89' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ff80::'))); -SELECT '90' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffc0::'))); -SELECT '91' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffe0::'))); -SELECT '92' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fff0::'))); -SELECT '93' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fff8::'))); -SELECT '94' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fffc::'))); -SELECT '95' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fffe::'))); -SELECT '96' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff::'))); -SELECT '97' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:8000:0'))); -SELECT '98' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:c000:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:e000:0'))); -SELECT '100' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:f000:0'))); -SELECT '101' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:f800:0'))); -SELECT '102' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fc00:0'))); -SELECT '103' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fe00:0'))); -SELECT '104' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ff00:0'))); -SELECT '105' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ff80:0'))); -SELECT '106' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffc0:0'))); -SELECT '107' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffe0:0'))); -SELECT '108' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fff0:0'))); -SELECT '109' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fff8:0'))); -SELECT '110' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fffc:0'))); -SELECT '111' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fffe:0'))); -SELECT '112' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:0'))); -SELECT '113' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:8000'))); -SELECT '114' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:c000'))); -SELECT '115' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:e000'))); -SELECT '116' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:f000'))); -SELECT '117' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:f800'))); -SELECT '118' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fc00'))); -SELECT '119' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fe00'))); -SELECT '120' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ff00'))); -SELECT '121' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ff80'))); -SELECT '122' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffc0'))); -SELECT '123' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffe0'))); -SELECT '124' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fff0'))); -SELECT '125' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fff8'))); -SELECT '126' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffc'))); -SELECT '127' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe'))); -SELECT '128' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'))); +SELECT '0' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('::0'))); +SELECT '1' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('8000::'))); +SELECT '2' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('c000::'))); +SELECT '3' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('e000::'))); +SELECT '4' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('f000::'))); +SELECT '5' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('f800::'))); +SELECT '6' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('fc00::'))); +SELECT '7' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('fe00::'))); +SELECT '8' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ff00::'))); +SELECT '9' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ff80::'))); +SELECT '10' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffc0::'))); +SELECT '11' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffe0::'))); +SELECT '12' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('fff0::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('fff8::'))); +SELECT '14' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('fffc::'))); +SELECT '15' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('fffe::'))); +SELECT '16' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff::'))); +SELECT '17' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:8000::'))); +SELECT '18' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:c000::'))); +SELECT '19' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:e000::'))); +SELECT '20' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f000::'))); +SELECT '21' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f800::'))); +SELECT '22' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fc00::'))); +SELECT '18' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:c000::'))); +SELECT '19' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:e000::'))); +SELECT '20' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f000::'))); +SELECT '21' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f800::'))); +SELECT '22' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fc00::'))); +SELECT '23' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fe00::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ff00::'))); +SELECT '25' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ff80::'))); +SELECT '26' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffc0::'))); +SELECT '27' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffe0::'))); +SELECT '28' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fff0::'))); +SELECT '29' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fff8::'))); +SELECT '30' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fffc::'))); +SELECT '31' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fffe::'))); +SELECT '32' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff::'))); +SELECT '33' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:8000::'))); +SELECT '34' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:c000::'))); +SELECT '35' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:e000::'))); +SELECT '36' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:f000::'))); +SELECT '37' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:f800::'))); +SELECT '38' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fc00::'))); +SELECT '39' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fe00::'))); +SELECT '40' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ff00::'))); +SELECT '41' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ff80::'))); +SELECT '42' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffc0::'))); +SELECT '43' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffe0::'))); +SELECT '44' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fff0::'))); +SELECT '45' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fff8::'))); +SELECT '46' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fffc::'))); +SELECT '47' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fffe::'))); +SELECT '48' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:8000::'))); +SELECT '50' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:c000::'))); +SELECT '51' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:e000::'))); +SELECT '52' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:f000::'))); +SELECT '53' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:f800::'))); +SELECT '54' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fc00::'))); +SELECT '55' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fe00::'))); +SELECT '56' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ff00::'))); +SELECT '57' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ff80::'))); +SELECT '58' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffc0::'))); +SELECT '59' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffe0::'))); +SELECT '60' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fff0::'))); +SELECT '61' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fff8::'))); +SELECT '62' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fffc::'))); +SELECT '63' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fffe::'))); +SELECT '64' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff::'))); +SELECT '65' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:8000::'))); +SELECT '66' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:c000::'))); +SELECT '67' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:e000::'))); +SELECT '68' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:f000::'))); +SELECT '69' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:f800::'))); +SELECT '70' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fc00::'))); +SELECT '71' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fe00::'))); +SELECT '72' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ff00::'))); +SELECT '73' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ff80::'))); +SELECT '74' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffc0::'))); +SELECT '75' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffe0::'))); +SELECT '76' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fff0::'))); +SELECT '77' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fff8::'))); +SELECT '78' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fffc::'))); +SELECT '79' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fffe::'))); +SELECT '80' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff::'))); +SELECT '81' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:8000::'))); +SELECT '82' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:c000::'))); +SELECT '83' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:e000::'))); +SELECT '84' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:f000::'))); +SELECT '85' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:f800::'))); +SELECT '86' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fc00::'))); +SELECT '87' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fe00::'))); +SELECT '88' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ff00::'))); +SELECT '89' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ff80::'))); +SELECT '90' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffc0::'))); +SELECT '91' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffe0::'))); +SELECT '92' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fff0::'))); +SELECT '93' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fff8::'))); +SELECT '94' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fffc::'))); +SELECT '95' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fffe::'))); +SELECT '96' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff::'))); +SELECT '97' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:8000:0'))); +SELECT '98' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:c000:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:e000:0'))); +SELECT '100' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:f000:0'))); +SELECT '101' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:f800:0'))); +SELECT '102' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fc00:0'))); +SELECT '103' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fe00:0'))); +SELECT '104' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ff00:0'))); +SELECT '105' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ff80:0'))); +SELECT '106' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffc0:0'))); +SELECT '107' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffe0:0'))); +SELECT '108' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fff0:0'))); +SELECT '109' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fff8:0'))); +SELECT '110' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fffc:0'))); +SELECT '111' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fffe:0'))); +SELECT '112' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:0'))); +SELECT '113' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:8000'))); +SELECT '114' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:c000'))); +SELECT '115' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:e000'))); +SELECT '116' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:f000'))); +SELECT '117' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:f800'))); +SELECT '118' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fc00'))); +SELECT '119' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fe00'))); +SELECT '120' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ff00'))); +SELECT '121' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ff80'))); +SELECT '122' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffc0'))); +SELECT '123' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffe0'))); +SELECT '124' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fff0'))); +SELECT '125' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fff8'))); +SELECT '126' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffc'))); +SELECT '127' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe'))); +SELECT '128' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'))); -CREATE TABLE database_for_dict.table_from_ip_trie_dict +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.table_from_ip_trie_dict ( prefix String, val String -) ENGINE = Dictionary(database_for_dict.dict_ip_trie); +) ENGINE = Dictionary({CLICKHOUSE_DATABASE:Identifier}.dict_ip_trie); -SELECT MIN(val == 'US') FROM database_for_dict.table_from_ip_trie_dict +SELECT MIN(val == 'US') FROM {CLICKHOUSE_DATABASE:Identifier}.table_from_ip_trie_dict WHERE prefix == '2620:0:870::/48'; -SELECT 134 == COUNT(*) FROM database_for_dict.table_from_ip_trie_dict; +SELECT 134 == COUNT(*) FROM {CLICKHOUSE_DATABASE:Identifier}.table_from_ip_trie_dict; -DROP TABLE IF EXISTS database_for_dict.table_from_ip_trie_dict; -DROP DICTIONARY IF EXISTS database_for_dict.dict_ip_trie; -DROP TABLE IF EXISTS database_for_dict.table_ip_trie; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.table_from_ip_trie_dict; +DROP DICTIONARY IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.dict_ip_trie; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.table_ip_trie; SELECT '***ipv6 trie dict mask***'; -CREATE TABLE database_for_dict.table_ip_trie +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.table_ip_trie ( prefix String, val String ) engine = TinyLog; -INSERT INTO database_for_dict.table_ip_trie +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ip_trie SELECT 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/' || toString(number) AS prefix, toString(number) AS val FROM VALUES ('number UInt32', 5, 13, 24, 48, 49, 99, 127); -INSERT INTO database_for_dict.table_ip_trie VALUES ('101.79.55.22', 'JA'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ip_trie VALUES ('101.79.55.22', 'JA'); -INSERT INTO database_for_dict.table_ip_trie +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_ip_trie SELECT '255.255.255.255/' || toString(number) AS prefix, toString(number) AS val FROM VALUES ('number UInt32', 5, 13, 24, 30); -CREATE DICTIONARY database_for_dict.dict_ip_trie +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict_ip_trie ( prefix String, val String ) PRIMARY KEY prefix -SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db 'database_for_dict' table 'table_ip_trie')) +SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db currentDatabase() table 'table_ip_trie')) LAYOUT(IP_TRIE()) LIFETIME(MIN 10 MAX 100); -SELECT 0 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('::ffff:1:1'))); +SELECT 0 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', tuple(IPv6StringToNum('::ffff:1:1'))); -SELECT '' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('654f:3716::'))); -SELECT 0 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('654f:3716::'))); -SELECT 0 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('654f:3716:ffff::'))); +SELECT '' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('654f:3716::'))); +SELECT 0 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', tuple(IPv6StringToNum('654f:3716::'))); +SELECT 0 == dictHas({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', tuple(IPv6StringToNum('654f:3716:ffff::'))); -SELECT 'JA' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('::ffff:654f:3716'))); -SELECT 'JA' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('::ffff:101.79.55.22'))); -SELECT 'JA' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('101.79.55.22'))); +SELECT 'JA' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('::ffff:654f:3716'))); +SELECT 'JA' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('::ffff:101.79.55.22'))); +SELECT 'JA' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('101.79.55.22'))); -SELECT '' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('::0'))); -SELECT '' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('8000::'))); -SELECT '' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('c000::'))); -SELECT '' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('e000::'))); -SELECT '' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('f000::'))); -SELECT '5' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('f800::'))); -SELECT '5' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('fc00::'))); -SELECT '5' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('fe00::'))); -SELECT '5' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ff00::'))); -SELECT '5' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ff80::'))); -SELECT '5' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffc0::'))); -SELECT '5' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffe0::'))); -SELECT '5' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('fff0::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('fff8::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('fffc::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('fffe::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:8000::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:c000::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:e000::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f000::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f800::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fc00::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:c000::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:e000::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f000::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f800::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fc00::'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fe00::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ff00::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ff80::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffc0::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffe0::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fff0::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fff8::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fffc::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fffe::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:8000::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:c000::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:e000::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:f000::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:f800::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fc00::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fe00::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ff00::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ff80::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffc0::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffe0::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fff0::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fff8::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fffc::'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fffe::'))); -SELECT '48' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:8000::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:c000::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:e000::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:f000::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:f800::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fc00::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fe00::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ff00::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ff80::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffc0::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffe0::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fff0::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fff8::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fffc::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fffe::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:8000::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:c000::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:e000::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:f000::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:f800::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fc00::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fe00::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ff00::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ff80::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffc0::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffe0::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fff0::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fff8::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fffc::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fffe::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:8000::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:c000::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:e000::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:f000::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:f800::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fc00::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fe00::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ff00::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ff80::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffc0::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffe0::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fff0::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fff8::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fffc::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fffe::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff::'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:8000:0'))); -SELECT '49' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:c000:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:e000:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:f000:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:f800:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fc00:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fe00:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ff00:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ff80:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffc0:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffe0:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fff0:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fff8:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fffc:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fffe:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:8000'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:c000'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:e000'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:f000'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:f800'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fc00'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fe00'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ff00'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ff80'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffc0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffe0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fff0'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fff8'))); -SELECT '99' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffc'))); -SELECT '127' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe'))); -SELECT '127' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'))); +SELECT '' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('::0'))); +SELECT '' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('8000::'))); +SELECT '' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('c000::'))); +SELECT '' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('e000::'))); +SELECT '' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('f000::'))); +SELECT '5' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('f800::'))); +SELECT '5' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('fc00::'))); +SELECT '5' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('fe00::'))); +SELECT '5' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ff00::'))); +SELECT '5' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ff80::'))); +SELECT '5' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffc0::'))); +SELECT '5' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffe0::'))); +SELECT '5' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('fff0::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('fff8::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('fffc::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('fffe::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:8000::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:c000::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:e000::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f000::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f800::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fc00::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:c000::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:e000::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f000::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:f800::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fc00::'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fe00::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ff00::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ff80::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffc0::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffe0::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fff0::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fff8::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fffc::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:fffe::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:8000::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:c000::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:e000::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:f000::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:f800::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fc00::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fe00::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ff00::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ff80::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffc0::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffe0::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fff0::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fff8::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fffc::'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:fffe::'))); +SELECT '48' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:8000::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:c000::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:e000::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:f000::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:f800::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fc00::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fe00::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ff00::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ff80::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffc0::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffe0::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fff0::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fff8::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fffc::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:fffe::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:8000::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:c000::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:e000::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:f000::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:f800::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fc00::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fe00::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ff00::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ff80::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffc0::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffe0::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fff0::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fff8::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fffc::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:fffe::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:8000::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:c000::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:e000::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:f000::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:f800::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fc00::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fe00::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ff00::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ff80::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffc0::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffe0::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fff0::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fff8::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fffc::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:fffe::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff::'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:8000:0'))); +SELECT '49' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:c000:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:e000:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:f000:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:f800:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fc00:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fe00:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ff00:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ff80:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffc0:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffe0:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fff0:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fff8:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fffc:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:fffe:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:8000'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:c000'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:e000'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:f000'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:f800'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fc00'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fe00'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ff00'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ff80'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffc0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffe0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fff0'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fff8'))); +SELECT '99' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffc'))); +SELECT '127' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe'))); +SELECT '127' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'))); -SELECT '' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('0.0.0.0'))); -SELECT '' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('128.0.0.0'))); -SELECT '' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('240.0.0.0'))); -SELECT '5' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('248.0.0.0'))); -SELECT '5' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('252.0.0.0'))); -SELECT '5' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.240.0.0'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.248.0.0'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.252.0.0'))); -SELECT '13' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.254.0'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.255.0'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.255.128'))); -SELECT '24' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.255.248'))); -SELECT '30' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.255.252'))); -SELECT '30' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.255.254'))); -SELECT '30' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.255.255'))); +SELECT '' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('0.0.0.0'))); +SELECT '' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('128.0.0.0'))); +SELECT '' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('240.0.0.0'))); +SELECT '5' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('248.0.0.0'))); +SELECT '5' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('252.0.0.0'))); +SELECT '5' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.240.0.0'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.248.0.0'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.252.0.0'))); +SELECT '13' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.254.0'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.255.0'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.255.128'))); +SELECT '24' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.255.248'))); +SELECT '30' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.255.252'))); +SELECT '30' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.255.254'))); +SELECT '30' == dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ip_trie', 'val', tuple(IPv4StringToNum('255.255.255.255'))); -DROP DATABASE IF EXISTS database_for_dict; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; diff --git a/tests/queries/0_stateless/01021_only_tuple_columns.sql b/tests/queries/0_stateless/01021_only_tuple_columns.sql index 02db21bc0b2..d8d146f59fd 100644 --- a/tests/queries/0_stateless/01021_only_tuple_columns.sql +++ b/tests/queries/0_stateless/01021_only_tuple_columns.sql @@ -1,4 +1,3 @@ --- Tags: no-parallel CREATE TABLE test ( diff --git a/tests/queries/0_stateless/01033_dictionaries_lifetime.sql b/tests/queries/0_stateless/01033_dictionaries_lifetime.sql index 67e1adf5574..e74ac8bde30 100644 --- a/tests/queries/0_stateless/01033_dictionaries_lifetime.sql +++ b/tests/queries/0_stateless/01033_dictionaries_lifetime.sql @@ -1,12 +1,7 @@ --- Tags: no-parallel SET send_logs_level = 'fatal'; -DROP DATABASE IF EXISTS database_for_dict; - -CREATE DATABASE database_for_dict; - -CREATE TABLE database_for_dict.table_for_dict +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.table_for_dict ( key_column UInt64, second_column UInt8, @@ -15,34 +10,34 @@ CREATE TABLE database_for_dict.table_for_dict ENGINE = MergeTree() ORDER BY key_column; -INSERT INTO database_for_dict.table_for_dict VALUES (1, 100, 'Hello world'); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_for_dict VALUES (1, 100, 'Hello world'); -DROP DATABASE IF EXISTS ordinary_db; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE_1:Identifier}; -CREATE DATABASE ordinary_db; +CREATE DATABASE {CLICKHOUSE_DATABASE_1:Identifier}; -CREATE DICTIONARY ordinary_db.dict1 +CREATE DICTIONARY {CLICKHOUSE_DATABASE_1:Identifier}.dict1 ( key_column UInt64 DEFAULT 0, second_column UInt8 DEFAULT 1, third_column String DEFAULT 'qqq' ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' PASSWORD '' DB currentDatabase())) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()); SELECT 'INITIALIZING DICTIONARY'; -SELECT dictGetUInt8('ordinary_db.dict1', 'second_column', toUInt64(100500)); +SELECT dictGetUInt8({CLICKHOUSE_DATABASE_1:String}||'.dict1', 'second_column', toUInt64(100500)); -SELECT lifetime_min, lifetime_max FROM system.dictionaries WHERE database='ordinary_db' AND name = 'dict1'; +SELECT lifetime_min, lifetime_max FROM system.dictionaries WHERE database={CLICKHOUSE_DATABASE_1:String} AND name = 'dict1'; -DROP DICTIONARY IF EXISTS ordinary_db.dict1; +DROP DICTIONARY IF EXISTS {CLICKHOUSE_DATABASE_1:Identifier}.dict1; -DROP DATABASE IF EXISTS ordinary_db; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE_1:Identifier}; -DROP TABLE IF EXISTS database_for_dict.table_for_dict; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.table_for_dict; -DROP DATABASE IF EXISTS database_for_dict; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; diff --git a/tests/queries/0_stateless/01034_move_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/01034_move_partition_from_table_zookeeper.sh index e0a84323dbd..39c5742e7a7 100755 --- a/tests/queries/0_stateless/01034_move_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/01034_move_partition_from_table_zookeeper.sh @@ -7,23 +7,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -function query_with_retry -{ - retry=0 - until [ $retry -ge 5 ] - do - result=$($CLICKHOUSE_CLIENT $2 --query="$1" 2>&1) - if [ "$?" == 0 ]; then - echo -n "$result" - return - else - retry=$(($retry + 1)) - sleep 3 - fi - done - echo "Query '$1' failed with '$result'" -} - $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS src;" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst;" diff --git a/tests/queries/0_stateless/01034_sample_final_distributed.sql b/tests/queries/0_stateless/01034_sample_final_distributed.sql index a81fef645db..64bafd17b2d 100644 --- a/tests/queries/0_stateless/01034_sample_final_distributed.sql +++ b/tests/queries/0_stateless/01034_sample_final_distributed.sql @@ -3,7 +3,7 @@ set allow_experimental_parallel_reading_from_replicas = 0; drop table if exists sample_final; -create table sample_final (CounterID UInt32, EventDate Date, EventTime DateTime, UserID UInt64, Sign Int8) engine = CollapsingMergeTree(Sign) order by (CounterID, EventDate, intHash32(UserID), EventTime) sample by intHash32(UserID); +create table sample_final (CounterID UInt32, EventDate Date, EventTime DateTime, UserID UInt64, Sign Int8) engine = CollapsingMergeTree(Sign) order by (CounterID, EventDate, intHash32(UserID), EventTime) sample by intHash32(UserID) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into sample_final select number / (8192 * 4), toDate('2019-01-01'), toDateTime('2019-01-01 00:00:01') + number, number / (8192 * 2), number % 3 = 1 ? -1 : 1 from numbers(1000000); select 'count'; diff --git a/tests/queries/0_stateless/01035_avg.sql b/tests/queries/0_stateless/01035_avg.sql index d683ada0aec..a3cb35a80ec 100644 --- a/tests/queries/0_stateless/01035_avg.sql +++ b/tests/queries/0_stateless/01035_avg.sql @@ -22,7 +22,7 @@ CREATE TABLE IF NOT EXISTS test_01035_avg ( d64 Decimal64(18) DEFAULT toDecimal64(u64 / 1000000, 8), d128 Decimal128(20) DEFAULT toDecimal128(i128 / 100000, 20), d256 Decimal256(40) DEFAULT toDecimal256(i256 / 100000, 40) -) ENGINE = MergeTree() ORDER BY i64; +) ENGINE = MergeTree() ORDER BY i64 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; SELECT avg(i8), avg(i16), avg(i32), avg(i64), avg(i128), avg(i256), avg(u8), avg(u16), avg(u32), avg(u64), avg(u128), avg(u256), diff --git a/tests/queries/0_stateless/01041_create_dictionary_if_not_exists.sql b/tests/queries/0_stateless/01041_create_dictionary_if_not_exists.sql index ad364237544..ab0b5a243ba 100644 --- a/tests/queries/0_stateless/01041_create_dictionary_if_not_exists.sql +++ b/tests/queries/0_stateless/01041_create_dictionary_if_not_exists.sql @@ -1,10 +1,5 @@ --- Tags: no-parallel -DROP DATABASE IF EXISTS dictdb; - -CREATE DATABASE dictdb; - -CREATE TABLE dictdb.table_for_dict +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.table_for_dict ( key_column UInt64, value Float64 @@ -12,33 +7,32 @@ CREATE TABLE dictdb.table_for_dict ENGINE = MergeTree() ORDER BY key_column; -INSERT INTO dictdb.table_for_dict VALUES (1, 1.1); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.table_for_dict VALUES (1, 1.1); -CREATE DICTIONARY IF NOT EXISTS dictdb.dict_exists +CREATE DICTIONARY IF NOT EXISTS {CLICKHOUSE_DATABASE:Identifier}.dict_exists ( key_column UInt64, value Float64 DEFAULT 77.77 ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB 'dictdb')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB currentDatabase())) LIFETIME(1) LAYOUT(FLAT()); -SELECT dictGetFloat64('dictdb.dict_exists', 'value', toUInt64(1)); +SELECT dictGetFloat64({CLICKHOUSE_DATABASE:String} || '.dict_exists', 'value', toUInt64(1)); -CREATE DICTIONARY IF NOT EXISTS dictdb.dict_exists +CREATE DICTIONARY IF NOT EXISTS {CLICKHOUSE_DATABASE:Identifier}.dict_exists ( key_column UInt64, value Float64 DEFAULT 77.77 ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB 'dictdb')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB currentDatabase())) LIFETIME(1) LAYOUT(FLAT()); -SELECT dictGetFloat64('dictdb.dict_exists', 'value', toUInt64(1)); +SELECT dictGetFloat64({CLICKHOUSE_DATABASE:String} || '.dict_exists', 'value', toUInt64(1)); -DROP DICTIONARY dictdb.dict_exists; -DROP TABLE dictdb.table_for_dict; -DROP DATABASE dictdb; +DROP DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict_exists; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.table_for_dict; diff --git a/tests/queries/0_stateless/01043_dictionary_attribute_properties_values.sql b/tests/queries/0_stateless/01043_dictionary_attribute_properties_values.sql index 5d629d4e8db..4f078499972 100644 --- a/tests/queries/0_stateless/01043_dictionary_attribute_properties_values.sql +++ b/tests/queries/0_stateless/01043_dictionary_attribute_properties_values.sql @@ -1,13 +1,9 @@ --- Tags: no-parallel -DROP DATABASE IF EXISTS dictdb_01043; -CREATE DATABASE dictdb_01043; - -CREATE TABLE dictdb_01043.dicttbl(key Int64, value_default String, value_expression String) ENGINE = MergeTree ORDER BY tuple(); -INSERT INTO dictdb_01043.dicttbl VALUES (12, 'hello', '55:66:77'); +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.dicttbl(key Int64, value_default String, value_expression String) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.dicttbl VALUES (12, 'hello', '55:66:77'); -CREATE DICTIONARY dictdb_01043.dict +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict ( key Int64 DEFAULT -1, value_default String DEFAULT 'world', @@ -15,15 +11,13 @@ CREATE DICTIONARY dictdb_01043.dict ) PRIMARY KEY key -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dicttbl' DB 'dictdb_01043')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dicttbl' DB currentDatabase())) LAYOUT(FLAT()) LIFETIME(1); -SELECT dictGetString('dictdb_01043.dict', 'value_default', toUInt64(12)); -SELECT dictGetString('dictdb_01043.dict', 'value_default', toUInt64(14)); +SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.dict', 'value_default', toUInt64(12)); +SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.dict', 'value_default', toUInt64(14)); -SELECT dictGetString('dictdb_01043.dict', 'value_expression', toUInt64(12)); -SELECT dictGetString('dictdb_01043.dict', 'value_expression', toUInt64(14)); - -DROP DATABASE IF EXISTS dictdb_01043; +SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.dict', 'value_expression', toUInt64(12)); +SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.dict', 'value_expression', toUInt64(14)); diff --git a/tests/queries/0_stateless/01045_dictionaries_restrictions.sql b/tests/queries/0_stateless/01045_dictionaries_restrictions.sql index d41be7482f8..b4dbd741767 100644 --- a/tests/queries/0_stateless/01045_dictionaries_restrictions.sql +++ b/tests/queries/0_stateless/01045_dictionaries_restrictions.sql @@ -1,10 +1,5 @@ --- Tags: no-parallel -DROP DATABASE IF EXISTS dictdb_01045; - -CREATE DATABASE dictdb_01045; - -CREATE DICTIONARY dictdb_01045.restricted_dict ( +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.restricted_dict ( key UInt64, value String ) @@ -14,10 +9,9 @@ LIFETIME(MIN 0 MAX 1) LAYOUT(CACHE(SIZE_IN_CELLS 10)); -- because of lazy load we can check only in dictGet query -select dictGetString('dictdb_01045.restricted_dict', 'value', toUInt64(1)); -- {serverError 482} +select dictGetString({CLICKHOUSE_DATABASE:String} || '.restricted_dict', 'value', toUInt64(1)); -- {serverError 482} select 'Ok.'; -DROP DICTIONARY IF EXISTS dictdb_01045.restricted_dict; +DROP DICTIONARY IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.restricted_dict; -DROP DATABASE IF EXISTS dictdb_01045; diff --git a/tests/queries/0_stateless/01045_zookeeper_system_mutations_with_parts_names.sh b/tests/queries/0_stateless/01045_zookeeper_system_mutations_with_parts_names.sh index 68c511b80ac..cd6501bbebf 100755 --- a/tests/queries/0_stateless/01045_zookeeper_system_mutations_with_parts_names.sh +++ b/tests/queries/0_stateless/01045_zookeeper_system_mutations_with_parts_names.sh @@ -21,7 +21,7 @@ function wait_mutation_to_start() ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS table_for_mutations" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE table_for_mutations(k UInt32, v1 UInt64) ENGINE MergeTree ORDER BY k PARTITION BY modulo(k, 2)" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE table_for_mutations(k UInt32, v1 UInt64) ENGINE MergeTree ORDER BY k PARTITION BY modulo(k, 2) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" ${CLICKHOUSE_CLIENT} --query="SYSTEM STOP MERGES table_for_mutations" @@ -48,7 +48,7 @@ ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS table_for_mutations" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS replicated_table_for_mutations" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE replicated_table_for_mutations(k UInt32, v1 UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/replicated_table_for_mutations', '1') ORDER BY k PARTITION BY modulo(k, 2)" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE replicated_table_for_mutations(k UInt32, v1 UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/replicated_table_for_mutations', '1') ORDER BY k PARTITION BY modulo(k, 2) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" ${CLICKHOUSE_CLIENT} --query="SYSTEM STOP MERGES replicated_table_for_mutations" diff --git a/tests/queries/0_stateless/01047_window_view_parser_inner_table.reference b/tests/queries/0_stateless/01047_window_view_parser_inner_table.reference index 53df91c5523..bb4fb9ddb47 100644 --- a/tests/queries/0_stateless/01047_window_view_parser_inner_table.reference +++ b/tests/queries/0_stateless/01047_window_view_parser_inner_table.reference @@ -1,32 +1,32 @@ ---TUMBLE--- ||---WINDOW COLUMN NAME--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 ||---WINDOW COLUMN ALIAS--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 ||---DATA COLUMN ALIAS--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY b\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY b\nSETTINGS index_granularity = 8192 ||---IDENTIFIER--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, b)\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, b)\nSETTINGS index_granularity = 8192 ||---FUNCTION--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 ||---PARTITION--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `count(a)` AggregateFunction(count, Int32),\n `windowID(____timestamp, toIntervalSecond(\'1\'))` UInt32\n)\nENGINE = AggregatingMergeTree\nPARTITION BY `windowID(____timestamp, toIntervalSecond(\'1\'))`\nORDER BY `windowID(____timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `count(a)` AggregateFunction(count, Int32),\n `windowID(____timestamp, toIntervalSecond(\'1\'))` UInt32\n)\nENGINE = AggregatingMergeTree\nPARTITION BY `windowID(____timestamp, toIntervalSecond(\'1\'))`\nORDER BY `windowID(____timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 ||---JOIN--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 -CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 ---HOP--- ||---WINDOW COLUMN NAME--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 ||---WINDOW COLUMN ALIAS--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 ||---DATA COLUMN ALIAS--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY b\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY b\nSETTINGS index_granularity = 8192 ||---IDENTIFIER--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, b)\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, b)\nSETTINGS index_granularity = 8192 ||---FUNCTION--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 ||---PARTITION--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `count(a)` AggregateFunction(count, Int32),\n `windowID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32\n)\nENGINE = AggregatingMergeTree\nPARTITION BY `windowID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY `windowID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `count(a)` AggregateFunction(count, Int32),\n `windowID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32\n)\nENGINE = AggregatingMergeTree\nPARTITION BY `windowID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY `windowID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 ||---JOIN--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 -CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE default.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql b/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql index bf1ac254783..e292447512c 100644 --- a/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql +++ b/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql @@ -1,110 +1,109 @@ --- Tags: no-parallel SET allow_experimental_analyzer = 0; SET allow_experimental_window_view = 1; -DROP DATABASE IF EXISTS test_01047; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; set allow_deprecated_database_ordinary=1; -CREATE DATABASE test_01047 ENGINE=Ordinary; +CREATE DATABASE {CLICKHOUSE_DATABASE:Identifier} ENGINE=Ordinary; -DROP TABLE IF EXISTS test_01047.mt; -DROP TABLE IF EXISTS test_01047.mt_2; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.mt; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.mt_2; -CREATE TABLE test_01047.mt(a Int32, b Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); -CREATE TABLE test_01047.mt_2(a Int32, b Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.mt(a Int32, b Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.mt_2(a Int32, b Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); SELECT '---TUMBLE---'; SELECT '||---WINDOW COLUMN NAME---'; -DROP TABLE IF EXISTS test_01047.wv; -DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY tumble(timestamp, INTERVAL '1' SECOND) ENGINE Memory AS SELECT count(a), tumbleEnd(wid) AS count FROM test_01047.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND) as wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY tumble(timestamp, INTERVAL '1' SECOND) ENGINE Memory AS SELECT count(a), tumbleEnd(wid) AS count FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND) as wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '||---WINDOW COLUMN ALIAS---'; -DROP TABLE IF EXISTS test_01047.wv; -DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY wid ENGINE Memory AS SELECT count(a) AS count, tumble(timestamp, INTERVAL '1' SECOND) AS wid FROM test_01047.mt GROUP BY wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY wid ENGINE Memory AS SELECT count(a) AS count, tumble(timestamp, INTERVAL '1' SECOND) AS wid FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '||---DATA COLUMN ALIAS---'; -DROP TABLE IF EXISTS test_01047.wv; -DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY id ENGINE Memory AS SELECT count(a) AS count, b as id FROM test_01047.mt GROUP BY id, tumble(timestamp, INTERVAL '1' SECOND); -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY id ENGINE Memory AS SELECT count(a) AS count, b as id FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY id, tumble(timestamp, INTERVAL '1' SECOND); +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '||---IDENTIFIER---'; -DROP TABLE IF EXISTS test_01047.wv; -DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY (tumble(timestamp, INTERVAL '1' SECOND), b) PRIMARY KEY tumble(timestamp, INTERVAL '1' SECOND) ENGINE Memory AS SELECT count(a) AS count FROM test_01047.mt GROUP BY b, tumble(timestamp, INTERVAL '1' SECOND) AS wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY (tumble(timestamp, INTERVAL '1' SECOND), b) PRIMARY KEY tumble(timestamp, INTERVAL '1' SECOND) ENGINE Memory AS SELECT count(a) AS count FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY b, tumble(timestamp, INTERVAL '1' SECOND) AS wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '||---FUNCTION---'; -DROP TABLE IF EXISTS test_01047.wv; -DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY (tumble(timestamp, INTERVAL '1' SECOND), plus(a, b)) PRIMARY KEY tumble(timestamp, INTERVAL '1' SECOND) ENGINE Memory AS SELECT count(a) AS count FROM test_01047.mt GROUP BY plus(a, b) as _type, tumble(timestamp, INTERVAL '1' SECOND) AS wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY (tumble(timestamp, INTERVAL '1' SECOND), plus(a, b)) PRIMARY KEY tumble(timestamp, INTERVAL '1' SECOND) ENGINE Memory AS SELECT count(a) AS count FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY plus(a, b) as _type, tumble(timestamp, INTERVAL '1' SECOND) AS wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '||---PARTITION---'; -DROP TABLE IF EXISTS test_01047.wv; -DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY wid PARTITION BY wid ENGINE Memory AS SELECT count(a) AS count, tumble(now(), INTERVAL '1' SECOND) AS wid FROM test_01047.mt GROUP BY wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY wid PARTITION BY wid ENGINE Memory AS SELECT count(a) AS count, tumble(now(), INTERVAL '1' SECOND) AS wid FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '||---JOIN---'; -DROP TABLE IF EXISTS test_01047.wv; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY tumble(test_01047.mt.timestamp, INTERVAL '1' SECOND) ENGINE Memory AS SELECT count(test_01047.mt.a), count(test_01047.mt_2.b), wid FROM test_01047.mt JOIN test_01047.mt_2 ON test_01047.mt.timestamp = test_01047.mt_2.timestamp GROUP BY tumble(test_01047.mt.timestamp, INTERVAL '1' SECOND) AS wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY tumble({CLICKHOUSE_DATABASE:Identifier}.mt.timestamp, INTERVAL '1' SECOND) ENGINE Memory AS SELECT count({CLICKHOUSE_DATABASE:Identifier}.mt.a), count({CLICKHOUSE_DATABASE:Identifier}.mt_2.b), wid FROM {CLICKHOUSE_DATABASE:Identifier}.mt JOIN {CLICKHOUSE_DATABASE:Identifier}.mt_2 ON {CLICKHOUSE_DATABASE:Identifier}.mt.timestamp = {CLICKHOUSE_DATABASE:Identifier}.mt_2.timestamp GROUP BY tumble({CLICKHOUSE_DATABASE:Identifier}.mt.timestamp, INTERVAL '1' SECOND) AS wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; -DROP TABLE IF EXISTS test_01047.wv; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY wid ENGINE Memory AS SELECT count(test_01047.mt.a), count(test_01047.mt_2.b), wid FROM test_01047.mt JOIN test_01047.mt_2 ON test_01047.mt.timestamp = test_01047.mt_2.timestamp GROUP BY tumble(test_01047.mt.timestamp, INTERVAL '1' SECOND) AS wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY wid ENGINE Memory AS SELECT count({CLICKHOUSE_DATABASE:Identifier}.mt.a), count({CLICKHOUSE_DATABASE:Identifier}.mt_2.b), wid FROM {CLICKHOUSE_DATABASE:Identifier}.mt JOIN {CLICKHOUSE_DATABASE:Identifier}.mt_2 ON {CLICKHOUSE_DATABASE:Identifier}.mt.timestamp = {CLICKHOUSE_DATABASE:Identifier}.mt_2.timestamp GROUP BY tumble({CLICKHOUSE_DATABASE:Identifier}.mt.timestamp, INTERVAL '1' SECOND) AS wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '---HOP---'; SELECT '||---WINDOW COLUMN NAME---'; -DROP TABLE IF EXISTS test_01047.wv; -DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) ENGINE Memory AS SELECT count(a) AS count, hopEnd(wid) FROM test_01047.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) as wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) ENGINE Memory AS SELECT count(a) AS count, hopEnd(wid) FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) as wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '||---WINDOW COLUMN ALIAS---'; -DROP TABLE IF EXISTS test_01047.wv; -DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY wid ENGINE Memory AS SELECT count(a) AS count, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid FROM test_01047.mt GROUP BY wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY wid ENGINE Memory AS SELECT count(a) AS count, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '||---DATA COLUMN ALIAS---'; -DROP TABLE IF EXISTS test_01047.wv; -DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY id ENGINE Memory AS SELECT count(a) AS count, b as id FROM test_01047.mt GROUP BY id, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND); -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY id ENGINE Memory AS SELECT count(a) AS count, b as id FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY id, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND); +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '||---IDENTIFIER---'; -DROP TABLE IF EXISTS test_01047.wv; -DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY (hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND), b) PRIMARY KEY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) ENGINE Memory AS SELECT count(a) AS count FROM test_01047.mt GROUP BY b, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY (hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND), b) PRIMARY KEY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) ENGINE Memory AS SELECT count(a) AS count FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY b, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '||---FUNCTION---'; -DROP TABLE IF EXISTS test_01047.wv; -DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY (hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND), plus(a, b)) PRIMARY KEY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) ENGINE Memory AS SELECT count(a) AS count FROM test_01047.mt GROUP BY plus(a, b) as _type, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY (hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND), plus(a, b)) PRIMARY KEY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) ENGINE Memory AS SELECT count(a) AS count FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY plus(a, b) as _type, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '||---PARTITION---'; -DROP TABLE IF EXISTS test_01047.wv; -DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY wid PARTITION BY wid ENGINE Memory AS SELECT count(a) AS count, hopEnd(wid) FROM test_01047.mt GROUP BY hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND) as wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY wid PARTITION BY wid ENGINE Memory AS SELECT count(a) AS count, hopEnd(wid) FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND) as wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; SELECT '||---JOIN---'; -DROP TABLE IF EXISTS test_01047.wv; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY hop(test_01047.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) ENGINE Memory AS SELECT count(test_01047.mt.a), count(test_01047.mt_2.b), wid FROM test_01047.mt JOIN test_01047.mt_2 ON test_01047.mt.timestamp = test_01047.mt_2.timestamp GROUP BY hop(test_01047.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY hop({CLICKHOUSE_DATABASE:Identifier}.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) ENGINE Memory AS SELECT count({CLICKHOUSE_DATABASE:Identifier}.mt.a), count({CLICKHOUSE_DATABASE:Identifier}.mt_2.b), wid FROM {CLICKHOUSE_DATABASE:Identifier}.mt JOIN {CLICKHOUSE_DATABASE:Identifier}.mt_2 ON {CLICKHOUSE_DATABASE:Identifier}.mt.timestamp = {CLICKHOUSE_DATABASE:Identifier}.mt_2.timestamp GROUP BY hop({CLICKHOUSE_DATABASE:Identifier}.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; -DROP TABLE IF EXISTS test_01047.wv; -CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY wid ENGINE Memory AS SELECT count(test_01047.mt.a), count(test_01047.mt_2.b), wid FROM test_01047.mt JOIN test_01047.mt_2 ON test_01047.mt.timestamp = test_01047.mt_2.timestamp GROUP BY hop(test_01047.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; -SHOW CREATE TABLE test_01047.`.inner.wv`; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv INNER ENGINE AggregatingMergeTree ORDER BY wid ENGINE Memory AS SELECT count({CLICKHOUSE_DATABASE:Identifier}.mt.a), count({CLICKHOUSE_DATABASE:Identifier}.mt_2.b), wid FROM {CLICKHOUSE_DATABASE:Identifier}.mt JOIN {CLICKHOUSE_DATABASE:Identifier}.mt_2 ON {CLICKHOUSE_DATABASE:Identifier}.mt.timestamp = {CLICKHOUSE_DATABASE:Identifier}.mt_2.timestamp GROUP BY hop({CLICKHOUSE_DATABASE:Identifier}.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +SHOW CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`.inner.wv`; -DROP TABLE test_01047.wv; -DROP TABLE test_01047.mt; -DROP TABLE test_01047.mt_2; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.wv; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.mt; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.mt_2; diff --git a/tests/queries/0_stateless/01049_zookeeper_synchronous_mutations_long.sql b/tests/queries/0_stateless/01049_zookeeper_synchronous_mutations_long.sql index c77ab50ab8b..2458fe14981 100644 --- a/tests/queries/0_stateless/01049_zookeeper_synchronous_mutations_long.sql +++ b/tests/queries/0_stateless/01049_zookeeper_synchronous_mutations_long.sql @@ -5,9 +5,9 @@ DROP TABLE IF EXISTS table_for_synchronous_mutations2; SELECT 'Replicated'; -CREATE TABLE table_for_synchronous_mutations1(k UInt32, v1 UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_01049/table_for_synchronous_mutations', '1') ORDER BY k; +CREATE TABLE table_for_synchronous_mutations1(k UInt32, v1 UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_01049/table_for_synchronous_mutations', '1') ORDER BY k SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; -CREATE TABLE table_for_synchronous_mutations2(k UInt32, v1 UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_01049/table_for_synchronous_mutations', '2') ORDER BY k; +CREATE TABLE table_for_synchronous_mutations2(k UInt32, v1 UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_01049/table_for_synchronous_mutations', '2') ORDER BY k SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO table_for_synchronous_mutations1 select number, number from numbers(100000); @@ -29,7 +29,7 @@ SELECT 'Normal'; DROP TABLE IF EXISTS table_for_synchronous_mutations_no_replication; -CREATE TABLE table_for_synchronous_mutations_no_replication(k UInt32, v1 UInt64) ENGINE MergeTree ORDER BY k; +CREATE TABLE table_for_synchronous_mutations_no_replication(k UInt32, v1 UInt64) ENGINE MergeTree ORDER BY k SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO table_for_synchronous_mutations_no_replication select number, number from numbers(100000); diff --git a/tests/queries/0_stateless/01050_clickhouse_dict_source_with_subquery.sql b/tests/queries/0_stateless/01050_clickhouse_dict_source_with_subquery.sql index ec440baf8d6..a790f384f7a 100644 --- a/tests/queries/0_stateless/01050_clickhouse_dict_source_with_subquery.sql +++ b/tests/queries/0_stateless/01050_clickhouse_dict_source_with_subquery.sql @@ -1,18 +1,17 @@ --- Tags: no-parallel -drop dictionary if exists default.test_dict_01051_d; -drop table if exists default.test_01051_d; -drop table if exists default.test_view_01051_d; +drop dictionary if exists {CLICKHOUSE_DATABASE:Identifier}.test_dict_01051_d; +drop table if exists {CLICKHOUSE_DATABASE:Identifier}.test_01051_d; +drop table if exists {CLICKHOUSE_DATABASE:Identifier}.test_view_01051_d; -create table default.test_01051_d (key UInt64, value String) engine = MergeTree order by key; -create view default.test_view_01051_d (key UInt64, value String) as select k2 + 1 as key, v2 || '_x' as value from (select key + 2 as k2, value || '_y' as v2 from default.test_01051_d); +create table {CLICKHOUSE_DATABASE:Identifier}.test_01051_d (key UInt64, value String) engine = MergeTree order by key; +create view {CLICKHOUSE_DATABASE:Identifier}.test_view_01051_d (key UInt64, value String) as select k2 + 1 as key, v2 || '_x' as value from (select key + 2 as k2, value || '_y' as v2 from test_01051_d); -insert into default.test_01051_d values (1, 'a'); +insert into {CLICKHOUSE_DATABASE:Identifier}.test_01051_d values (1, 'a'); -create dictionary default.test_dict_01051_d (key UInt64, value String) primary key key source(clickhouse(host 'localhost' port '9000' user 'default' password '' db 'default' table 'test_view_01051_d')) layout(flat()) lifetime(100500); +create dictionary {CLICKHOUSE_DATABASE:Identifier}.test_dict_01051_d (key UInt64, value String) primary key key source(clickhouse(host 'localhost' port '9000' user 'default' password '' db currentDatabase() table 'test_view_01051_d')) layout(flat()) lifetime(100500); -select dictGet('default.test_dict_01051_d', 'value', toUInt64(4)); +select dictGet({CLICKHOUSE_DATABASE:String} || '.test_dict_01051_d', 'value', toUInt64(4)); -drop dictionary if exists default.test_dict_01051_d; -drop table if exists default.test_01051_d; -drop table if exists default.test_view_01051_d; +drop dictionary if exists {CLICKHOUSE_DATABASE:Identifier}.test_dict_01051_d; +drop table if exists {CLICKHOUSE_DATABASE:Identifier}.test_01051_d; +drop table if exists {CLICKHOUSE_DATABASE:Identifier}.test_view_01051_d; diff --git a/tests/queries/0_stateless/01053_drop_database_mat_view.sql b/tests/queries/0_stateless/01053_drop_database_mat_view.sql index 7651ac4885c..2642430eb05 100644 --- a/tests/queries/0_stateless/01053_drop_database_mat_view.sql +++ b/tests/queries/0_stateless/01053_drop_database_mat_view.sql @@ -1,14 +1,13 @@ --- Tags: no-parallel -DROP DATABASE IF EXISTS some_tests; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; set allow_deprecated_database_ordinary=1; -CREATE DATABASE some_tests ENGINE=Ordinary; -- Different inner table name with Atomic +CREATE DATABASE {CLICKHOUSE_DATABASE:Identifier} ENGINE=Ordinary; -- Different inner table name with Atomic set allow_deprecated_syntax_for_merge_tree=1; -create table some_tests.my_table ENGINE = MergeTree(day, (day), 8192) as select today() as day, 'mystring' as str; -show tables from some_tests; -create materialized view some_tests.my_materialized_view ENGINE = MergeTree(day, (day), 8192) as select * from some_tests.my_table; -show tables from some_tests; -select * from some_tests.my_materialized_view; +create table {CLICKHOUSE_DATABASE:Identifier}.my_table ENGINE = MergeTree(day, (day), 8192) as select today() as day, 'mystring' as str; +show tables from {CLICKHOUSE_DATABASE:Identifier}; +create materialized view {CLICKHOUSE_DATABASE:Identifier}.my_materialized_view ENGINE = MergeTree(day, (day), 8192) as select * from {CLICKHOUSE_DATABASE:Identifier}.my_table; +show tables from {CLICKHOUSE_DATABASE:Identifier}; +select * from {CLICKHOUSE_DATABASE:Identifier}.my_materialized_view; -DROP DATABASE some_tests; +DROP DATABASE {CLICKHOUSE_DATABASE:Identifier}; diff --git a/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh b/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh index fe757acfd06..8e28995980f 100755 --- a/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh +++ b/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh @@ -16,14 +16,14 @@ DROP TABLE IF EXISTS wv; CREATE TABLE dst(count UInt64) Engine=MergeTree ORDER BY tuple(); CREATE TABLE mt(a Int32) ENGINE=MergeTree ORDER BY tuple(); -CREATE WINDOW VIEW wv TO dst AS SELECT count(a) AS count FROM mt GROUP BY hop(now('US/Samoa'), INTERVAL '5' SECOND, INTERVAL '5' SECOND, 'US/Samoa') AS wid; +CREATE WINDOW VIEW wv TO dst AS SELECT count(a) AS count FROM mt GROUP BY hop(now('US/Samoa'), INTERVAL '10' SECOND, INTERVAL '10' SECOND, 'US/Samoa') AS wid; INSERT INTO mt VALUES (1); EOF for _ in {1..100}; do $CLICKHOUSE_CLIENT "${opts[@]}" --query="SELECT count(*) FROM dst" | grep -q "1" && echo 'OK' && break - sleep .5 + sleep .2 done $CLICKHOUSE_CLIENT "${opts[@]}" --query="SELECT count FROM dst" diff --git a/tests/queries/0_stateless/01056_create_table_as.sql b/tests/queries/0_stateless/01056_create_table_as.sql index 6df660dba61..aa2dffb6e2d 100644 --- a/tests/queries/0_stateless/01056_create_table_as.sql +++ b/tests/queries/0_stateless/01056_create_table_as.sql @@ -1,4 +1,3 @@ --- Tags: no-parallel DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; @@ -23,9 +22,9 @@ DROP TABLE v; -- dictionary DROP DICTIONARY IF EXISTS dict; -DROP DATABASE if exists test_01056_dict_data; -CREATE DATABASE test_01056_dict_data; -CREATE TABLE test_01056_dict_data.dict_data (key Int, value UInt16) Engine=Memory(); +DROP DATABASE if exists {CLICKHOUSE_DATABASE_1:Identifier}; +CREATE DATABASE {CLICKHOUSE_DATABASE_1:Identifier}; +CREATE TABLE {CLICKHOUSE_DATABASE_1:Identifier}.dict_data (key Int, value UInt16) Engine=Memory(); CREATE DICTIONARY dict ( `key` UInt64, @@ -34,7 +33,7 @@ CREATE DICTIONARY dict PRIMARY KEY key SOURCE(CLICKHOUSE( HOST '127.0.0.1' PORT tcpPort() - TABLE 'dict_data' DB 'test_01056_dict_data' USER 'default' PASSWORD '')) + TABLE 'dict_data' DB concat(currentDatabase(), '_1') USER 'default' PASSWORD '')) LIFETIME(MIN 0 MAX 0) LAYOUT(SPARSE_HASHED()); CREATE TABLE t3 AS dict; -- { serverError 80 } @@ -42,9 +41,9 @@ CREATE TABLE t3 AS dict; -- { serverError 80 } DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t3; DROP DICTIONARY dict; -DROP TABLE test_01056_dict_data.dict_data; +DROP TABLE {CLICKHOUSE_DATABASE_1:Identifier}.dict_data; -DROP DATABASE test_01056_dict_data; +DROP DATABASE {CLICKHOUSE_DATABASE_1:Identifier}; CREATE TABLE t1 (x String) ENGINE = Memory AS SELECT 1; SELECT x, toTypeName(x) FROM t1; diff --git a/tests/queries/0_stateless/01060_shutdown_table_after_detach.sql b/tests/queries/0_stateless/01060_shutdown_table_after_detach.sql index bfe928d7003..7a853f32d0f 100644 --- a/tests/queries/0_stateless/01060_shutdown_table_after_detach.sql +++ b/tests/queries/0_stateless/01060_shutdown_table_after_detach.sql @@ -1,7 +1,7 @@ -- Tags: no-parallel DROP TABLE IF EXISTS test; -CREATE TABLE test Engine = MergeTree ORDER BY number AS SELECT number, toString(rand()) x from numbers(10000000); +CREATE TABLE test Engine = MergeTree ORDER BY number SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi' AS SELECT number, toString(rand()) x from numbers(10000000); SELECT count() FROM test; diff --git a/tests/queries/0_stateless/01071_prohibition_secondary_index_with_old_format_merge_tree.sql b/tests/queries/0_stateless/01071_prohibition_secondary_index_with_old_format_merge_tree.sql index 992973c97e8..f92b6779587 100644 --- a/tests/queries/0_stateless/01071_prohibition_secondary_index_with_old_format_merge_tree.sql +++ b/tests/queries/0_stateless/01071_prohibition_secondary_index_with_old_format_merge_tree.sql @@ -1,4 +1,3 @@ --- Tags: no-parallel set allow_deprecated_syntax_for_merge_tree=1; CREATE TABLE old_syntax_01071_test (date Date, id UInt8) ENGINE = MergeTree(date, id, 8192); diff --git a/tests/queries/0_stateless/01073_attach_if_not_exists.sql b/tests/queries/0_stateless/01073_attach_if_not_exists.sql index 8bd356b466c..a99d5fb5041 100644 --- a/tests/queries/0_stateless/01073_attach_if_not_exists.sql +++ b/tests/queries/0_stateless/01073_attach_if_not_exists.sql @@ -1,4 +1,3 @@ --- Tags: no-parallel CREATE TABLE aine (a Int) ENGINE = Log; ATTACH TABLE aine; -- { serverError 57 } diff --git a/tests/queries/0_stateless/01073_show_tables_not_like.sql b/tests/queries/0_stateless/01073_show_tables_not_like.sql index 405a6cb199b..9ff2afe7f27 100644 --- a/tests/queries/0_stateless/01073_show_tables_not_like.sql +++ b/tests/queries/0_stateless/01073_show_tables_not_like.sql @@ -1,11 +1,6 @@ --- Tags: no-parallel SHOW TABLES NOT LIKE '%'; -DROP DATABASE IF EXISTS test_01073; -CREATE DATABASE test_01073; -USE test_01073; - SHOW TABLES; SELECT '---'; CREATE TABLE test1 (x UInt8) ENGINE = Memory; @@ -22,14 +17,14 @@ SELECT '--'; SHOW TABLES NOT LIKE 'tes%2'; SELECT '---'; -SHOW TABLES FROM test_01073; +SHOW TABLES FROM {CLICKHOUSE_DATABASE:Identifier}; SELECT '--'; -SHOW TABLES FROM test_01073 LIKE 'tes%'; +SHOW TABLES FROM {CLICKHOUSE_DATABASE:Identifier} LIKE 'tes%'; SELECT '--'; -SHOW TABLES FROM test_01073 NOT LIKE 'tes%'; +SHOW TABLES FROM {CLICKHOUSE_DATABASE:Identifier} NOT LIKE 'tes%'; SELECT '--'; -SHOW TABLES FROM test_01073 LIKE 'tes%1'; +SHOW TABLES FROM {CLICKHOUSE_DATABASE:Identifier} LIKE 'tes%1'; SELECT '--'; -SHOW TABLES FROM test_01073 NOT LIKE 'tes%2'; +SHOW TABLES FROM {CLICKHOUSE_DATABASE:Identifier} NOT LIKE 'tes%2'; -DROP DATABASE test_01073; +DROP DATABASE {CLICKHOUSE_DATABASE:Identifier}; diff --git a/tests/queries/0_stateless/01077_mutations_index_consistency.sh b/tests/queries/0_stateless/01077_mutations_index_consistency.sh index c41eab62ecb..ffbe3692b64 100755 --- a/tests/queries/0_stateless/01077_mutations_index_consistency.sh +++ b/tests/queries/0_stateless/01077_mutations_index_consistency.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS movement" -$CLICKHOUSE_CLIENT -n --query "CREATE TABLE movement (date DateTime('Asia/Istanbul')) Engine = MergeTree ORDER BY (toStartOfHour(date));" +$CLICKHOUSE_CLIENT -n --query "CREATE TABLE movement (date DateTime('Asia/Istanbul')) Engine = MergeTree ORDER BY (toStartOfHour(date)) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';" $CLICKHOUSE_CLIENT --query "insert into movement select toDateTime('2020-01-22 00:00:00', 'Asia/Istanbul') + number%(23*3600) from numbers(1000000);" diff --git a/tests/queries/0_stateless/01079_order_by_pk.sql b/tests/queries/0_stateless/01079_order_by_pk.sql index 78e304b3118..0b442bf78c9 100644 --- a/tests/queries/0_stateless/01079_order_by_pk.sql +++ b/tests/queries/0_stateless/01079_order_by_pk.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS mt_pk; -CREATE TABLE mt_pk ENGINE = MergeTree PARTITION BY d ORDER BY x +CREATE TABLE mt_pk ENGINE = MergeTree PARTITION BY d ORDER BY x SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi' AS SELECT toDate(number % 32) AS d, number AS x FROM system.numbers LIMIT 10000010; SELECT x FROM mt_pk ORDER BY x ASC LIMIT 10000000, 1; diff --git a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh index 26c2bf133ac..bfdea95fa9e 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh @@ -15,7 +15,7 @@ done for i in $(seq $REPLICAS); do - $CLICKHOUSE_CLIENT --query "CREATE TABLE concurrent_alter_add_drop_$i (key UInt64, value0 UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_alter_add_drop_column', '$i') ORDER BY key SETTINGS max_replicated_mutations_in_queue=1000, number_of_free_entries_in_pool_to_execute_mutation=0,max_replicated_merges_in_queue=1000" + $CLICKHOUSE_CLIENT --query "CREATE TABLE concurrent_alter_add_drop_$i (key UInt64, value0 UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_alter_add_drop_column', '$i') ORDER BY key SETTINGS max_replicated_mutations_in_queue = 1000, number_of_free_entries_in_pool_to_execute_mutation = 0, max_replicated_merges_in_queue = 1000, index_granularity = 8192, index_granularity_bytes = '10Mi'" done $CLICKHOUSE_CLIENT --query "INSERT INTO concurrent_alter_add_drop_1 SELECT number, number + 10 from numbers(100000)" diff --git a/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh index e508b77a0c2..8133f866c58 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-parallel, no-fasttest, no-upgrade-check +# Tags: zookeeper, no-parallel, no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01080_check_for_error_incorrect_size_of_nested_column.sql b/tests/queries/0_stateless/01080_check_for_error_incorrect_size_of_nested_column.sql index 1eb6e849851..d7b05bb7d78 100644 --- a/tests/queries/0_stateless/01080_check_for_error_incorrect_size_of_nested_column.sql +++ b/tests/queries/0_stateless/01080_check_for_error_incorrect_size_of_nested_column.sql @@ -1,40 +1,33 @@ --- Tags: no-parallel --- TODO: can't just remove default prefix, it breaks the test! +drop table if exists {CLICKHOUSE_DATABASE:Identifier}.test_table_01080; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.test_table_01080 (dim_key Int64, dim_id String) ENGINE = MergeTree Order by (dim_key); +insert into {CLICKHOUSE_DATABASE:Identifier}.test_table_01080 values(1,'test1'); -drop database if exists db_01080; -create database db_01080; +drop DICTIONARY if exists {CLICKHOUSE_DATABASE:Identifier}.test_dict_01080; -drop table if exists db_01080.test_table_01080; -CREATE TABLE db_01080.test_table_01080 (dim_key Int64, dim_id String) ENGINE = MergeTree Order by (dim_key); -insert into db_01080.test_table_01080 values(1,'test1'); - -drop DICTIONARY if exists db_01080.test_dict_01080; - -CREATE DICTIONARY db_01080.test_dict_01080 ( dim_key Int64, dim_id String ) +CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.test_dict_01080 ( dim_key Int64, dim_id String ) PRIMARY KEY dim_key -source(clickhouse(host 'localhost' port tcpPort() user 'default' password '' db 'db_01080' table 'test_table_01080')) +source(clickhouse(host 'localhost' port tcpPort() user 'default' password '' db currentDatabase() table 'test_table_01080')) LIFETIME(MIN 0 MAX 0) LAYOUT(complex_key_hashed()); -SELECT dictGetString('db_01080.test_dict_01080', 'dim_id', tuple(toInt64(1))); +SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.test_dict_01080', 'dim_id', tuple(toInt64(1))); -SELECT dictGetString('db_01080.test_dict_01080', 'dim_id', tuple(toInt64(0))); +SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.test_dict_01080', 'dim_id', tuple(toInt64(0))); -select dictGetString('db_01080.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(0)) as x); +select dictGetString({CLICKHOUSE_DATABASE:String} || '.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(0)) as x); -select dictGetString('db_01080.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(1)) as x); +select dictGetString({CLICKHOUSE_DATABASE:String} || '.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(1)) as x); -select dictGetString('db_01080.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(number)) as x from numbers(5)); +select dictGetString({CLICKHOUSE_DATABASE:String} || '.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(number)) as x from numbers(5)); -select dictGetString('db_01080.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(rand64()*0)) as x); +select dictGetString({CLICKHOUSE_DATABASE:String} || '.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(rand64()*0)) as x); -select dictGetString('db_01080.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(blockSize()=0)) as x); +select dictGetString({CLICKHOUSE_DATABASE:String} || '.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(blockSize()=0)) as x); -select dictGetString('db_01080.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(materialize(0))) as x); +select dictGetString({CLICKHOUSE_DATABASE:String} || '.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(materialize(0))) as x); -select dictGetString('db_01080.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(materialize(1))) as x); +select dictGetString({CLICKHOUSE_DATABASE:String} || '.test_dict_01080', 'dim_id', x) from (select tuple(toInt64(materialize(1))) as x); -drop DICTIONARY db_01080.test_dict_01080; -drop table db_01080.test_table_01080; -drop database db_01080; +drop DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.test_dict_01080; +drop table {CLICKHOUSE_DATABASE:Identifier}.test_table_01080; diff --git a/tests/queries/0_stateless/01084_regexp_empty.sql b/tests/queries/0_stateless/01084_regexp_empty.sql index 3ccd4af80ab..5dd060ab58c 100644 --- a/tests/queries/0_stateless/01084_regexp_empty.sql +++ b/tests/queries/0_stateless/01084_regexp_empty.sql @@ -1,10 +1,9 @@ --- Tags: no-parallel -DROP DATABASE IF EXISTS test_01084; -CREATE DATABASE test_01084; -USE test_01084; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE_1:Identifier}; +CREATE DATABASE {CLICKHOUSE_DATABASE_1:Identifier}; +USE {CLICKHOUSE_DATABASE_1:Identifier}; CREATE TABLE t (x UInt8) ENGINE = Memory; SELECT * FROM merge('', ''); -DROP DATABASE test_01084; +DROP DATABASE {CLICKHOUSE_DATABASE_1:Identifier}; diff --git a/tests/queries/0_stateless/01085_window_view_attach.sql b/tests/queries/0_stateless/01085_window_view_attach.sql index 051557a6a76..51a88a04f95 100644 --- a/tests/queries/0_stateless/01085_window_view_attach.sql +++ b/tests/queries/0_stateless/01085_window_view_attach.sql @@ -1,30 +1,29 @@ --- Tags: no-parallel SET allow_experimental_analyzer = 0; SET allow_experimental_window_view = 1; -DROP DATABASE IF EXISTS test_01085; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; set allow_deprecated_database_ordinary=1; -CREATE DATABASE test_01085 ENGINE=Ordinary; +CREATE DATABASE {CLICKHOUSE_DATABASE:Identifier} ENGINE=Ordinary; -DROP TABLE IF EXISTS test_01085.mt; -DROP TABLE IF EXISTS test_01085.wv; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.mt; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wv; -CREATE TABLE test_01085.mt(a Int32, market Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); -CREATE WINDOW VIEW test_01085.wv ENGINE Memory WATERMARK=ASCENDING AS SELECT count(a) AS count, market, tumbleEnd(wid) AS w_end FROM test_01085.mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND) AS wid, market; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.mt(a Int32, market Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv ENGINE Memory WATERMARK=ASCENDING AS SELECT count(a) AS count, market, tumbleEnd(wid) AS w_end FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND) AS wid, market; -SHOW tables FROM test_01085; +SHOW tables FROM {CLICKHOUSE_DATABASE:Identifier}; -DROP TABLE test_01085.wv SYNC; -SHOW tables FROM test_01085; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.wv SYNC; +SHOW tables FROM {CLICKHOUSE_DATABASE:Identifier}; -CREATE WINDOW VIEW test_01085.wv ENGINE Memory WATERMARK=ASCENDING AS SELECT count(a) AS count, market, tumbleEnd(wid) AS w_end FROM test_01085.mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND) AS wid, market; +CREATE WINDOW VIEW {CLICKHOUSE_DATABASE:Identifier}.wv ENGINE Memory WATERMARK=ASCENDING AS SELECT count(a) AS count, market, tumbleEnd(wid) AS w_end FROM {CLICKHOUSE_DATABASE:Identifier}.mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND) AS wid, market; -DETACH TABLE test_01085.wv; -SHOW tables FROM test_01085; +DETACH TABLE {CLICKHOUSE_DATABASE:Identifier}.wv; +SHOW tables FROM {CLICKHOUSE_DATABASE:Identifier}; -ATTACH TABLE test_01085.wv; -SHOW tables FROM test_01085; +ATTACH TABLE {CLICKHOUSE_DATABASE:Identifier}.wv; +SHOW tables FROM {CLICKHOUSE_DATABASE:Identifier}; -DROP TABLE test_01085.wv SYNC; -SHOW tables FROM test_01085; +DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.wv SYNC; +SHOW tables FROM {CLICKHOUSE_DATABASE:Identifier}; diff --git a/tests/queries/0_stateless/01099_operators_date_and_timestamp.reference b/tests/queries/0_stateless/01099_operators_date_and_timestamp.reference index 0d8a65c3869..5654471ebc1 100644 --- a/tests/queries/0_stateless/01099_operators_date_and_timestamp.reference +++ b/tests/queries/0_stateless/01099_operators_date_and_timestamp.reference @@ -11,4 +11,8 @@ 3 Int32 2001-09-29 00:00:00 2001-09-28 00:00:00 +2001-09-29 03:25:45 DateTime +2001-09-28 20:34:15 DateTime +2001-09-29 03:25:45.000 DateTime64(3) +2001-09-28 20:34:15.000 DateTime64(3) 140400 Int32 diff --git a/tests/queries/0_stateless/01099_operators_date_and_timestamp.sql b/tests/queries/0_stateless/01099_operators_date_and_timestamp.sql index 8c3068cd36b..f17f062eda5 100644 --- a/tests/queries/0_stateless/01099_operators_date_and_timestamp.sql +++ b/tests/queries/0_stateless/01099_operators_date_and_timestamp.sql @@ -13,6 +13,23 @@ select (date '2001-10-01' - date '2001-09-28') x, toTypeName(x); select timestamp '2001-09-28 01:00:00' + interval 23 hour; select timestamp '2001-09-28 23:00:00' - interval 23 hour; +select (date '2001-09-29' + interval 12345 second) x, toTypeName(x); +select (date '2001-09-29' + interval 12345 millisecond) x, toTypeName(x); -- { serverError 43 } +select (date '2001-09-29' + interval 12345 microsecond) x, toTypeName(x); -- { serverError 43 } +select (date '2001-09-29' + interval 12345 nanosecond) x, toTypeName(x); -- { serverError 43 } +select (date '2001-09-29' - interval 12345 second) x, toTypeName(x); +select (date '2001-09-29' - interval 12345 millisecond) x, toTypeName(x); -- { serverError 43 } +select (date '2001-09-29' - interval 12345 microsecond) x, toTypeName(x); -- { serverError 43 } +select (date '2001-09-29' - interval 12345 nanosecond) x, toTypeName(x); -- { serverError 43 } +select (toDate32('2001-09-29') + interval 12345 second) x, toTypeName(x); +select (toDate32('2001-09-29') + interval 12345 millisecond) x, toTypeName(x); -- { serverError 43 } +select (toDate32('2001-09-29') + interval 12345 microsecond) x, toTypeName(x); -- { serverError 43 } +select (toDate32('2001-09-29') + interval 12345 nanosecond) x, toTypeName(x); -- { serverError 43 } +select (toDate32('2001-09-29') - interval 12345 second) x, toTypeName(x); +select (toDate32('2001-09-29') - interval 12345 millisecond) x, toTypeName(x); -- { serverError 43 } +select (toDate32('2001-09-29') - interval 12345 microsecond) x, toTypeName(x); -- { serverError 43 } +select (toDate32('2001-09-29') - interval 12345 nanosecond) x, toTypeName(x); -- { serverError 43 } + -- TODO: return interval select (timestamp '2001-12-29 03:00:00' - timestamp '2001-12-27 12:00:00') x, toTypeName(x); -- interval '1 day 15:00:00' diff --git a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh index 59899e1c14a..f61a60a0bda 100755 --- a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh +++ b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-upgrade-check +# Tags: race, zookeeper CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01114_database_atomic.reference b/tests/queries/0_stateless/01114_database_atomic.reference index 10a39087c57..93e89e3a2ec 100644 --- a/tests/queries/0_stateless/01114_database_atomic.reference +++ b/tests/queries/0_stateless/01114_database_atomic.reference @@ -1,3 +1,4 @@ +2 CREATE DATABASE test_01114_1\nENGINE = Atomic CREATE DATABASE test_01114_2\nENGINE = Atomic CREATE DATABASE test_01114_3\nENGINE = Ordinary diff --git a/tests/queries/0_stateless/01114_database_atomic.sh b/tests/queries/0_stateless/01114_database_atomic.sh index decbe136fc4..3e1f9eb1f43 100755 --- a/tests/queries/0_stateless/01114_database_atomic.sh +++ b/tests/queries/0_stateless/01114_database_atomic.sh @@ -13,6 +13,8 @@ DROP DATABASE IF EXISTS test_01114_2; DROP DATABASE IF EXISTS test_01114_3; " +$CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=0 -q "CREATE DATABASE test_01114_1 ENGINE=Ordinary" 2>&1| grep -Fac "UNKNOWN_DATABASE_ENGINE" + $CLICKHOUSE_CLIENT -q "CREATE DATABASE test_01114_1 ENGINE=Atomic" $CLICKHOUSE_CLIENT -q "CREATE DATABASE test_01114_2" $CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 -q "CREATE DATABASE test_01114_3 ENGINE=Ordinary" diff --git a/tests/queries/0_stateless/01121_remote_scalar_subquery.reference b/tests/queries/0_stateless/01121_remote_scalar_subquery.reference index 6ed281c757a..98fb6a68656 100644 --- a/tests/queries/0_stateless/01121_remote_scalar_subquery.reference +++ b/tests/queries/0_stateless/01121_remote_scalar_subquery.reference @@ -1,2 +1,4 @@ 1 1 +1 +1 diff --git a/tests/queries/0_stateless/01121_remote_scalar_subquery.sql b/tests/queries/0_stateless/01121_remote_scalar_subquery.sql index eada5ed4b59..2d0c842c5b4 100644 --- a/tests/queries/0_stateless/01121_remote_scalar_subquery.sql +++ b/tests/queries/0_stateless/01121_remote_scalar_subquery.sql @@ -1 +1,2 @@ SELECT (SELECT 1) FROM remote('127.0.0.{1,2}', system.one); +SELECT (SELECT 1) FROM remote('127.0.0.{1,2}'); diff --git a/tests/queries/0_stateless/01137_order_by_func.sql b/tests/queries/0_stateless/01137_order_by_func.sql index 682b2d391ce..536f2d1c61d 100644 --- a/tests/queries/0_stateless/01137_order_by_func.sql +++ b/tests/queries/0_stateless/01137_order_by_func.sql @@ -1,5 +1,5 @@ DROP TABLE IF EXISTS pk_func; -CREATE TABLE pk_func(d DateTime, ui UInt32) ENGINE = MergeTree ORDER BY toDate(d); +CREATE TABLE pk_func(d DateTime, ui UInt32) ENGINE = MergeTree ORDER BY toDate(d) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO pk_func SELECT '2020-05-05 01:00:00', number FROM numbers(1000000); INSERT INTO pk_func SELECT '2020-05-06 01:00:00', number FROM numbers(1000000); @@ -10,7 +10,7 @@ SELECT * FROM pk_func ORDER BY toDate(d), ui LIMIT 5; DROP TABLE pk_func; DROP TABLE IF EXISTS nORX; -CREATE TABLE nORX (`A` Int64, `B` Int64, `V` Int64) ENGINE = MergeTree ORDER BY (A, negate(B)); +CREATE TABLE nORX (`A` Int64, `B` Int64, `V` Int64) ENGINE = MergeTree ORDER BY (A, negate(B)) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO nORX SELECT 111, number, number FROM numbers(10000000); SELECT * diff --git a/tests/queries/0_stateless/01161_all_system_tables.sh b/tests/queries/0_stateless/01161_all_system_tables.sh index 47316a6a805..3ba59f9a424 100755 --- a/tests/queries/0_stateless/01161_all_system_tables.sh +++ b/tests/queries/0_stateless/01161_all_system_tables.sh @@ -19,7 +19,7 @@ function run_selects() thread_num=$1 readarray -t tables_arr < <(${CLICKHOUSE_CLIENT} -q "SELECT database || '.' || name FROM system.tables WHERE database in ('system', 'information_schema', 'INFORMATION_SCHEMA') and name != 'zookeeper' and name != 'models' - AND sipHash64(name || toString($RAND)) % $THREADS = $thread_num") + AND sipHash64(name || toString($RAND)) % $THREADS = $thread_num AND name NOT LIKE '%\\_sender' AND name NOT LIKE '%\\_watcher'") for t in "${tables_arr[@]}" do diff --git a/tests/queries/0_stateless/01161_information_schema.reference b/tests/queries/0_stateless/01161_information_schema.reference index 5331e30b899..32ad3f16abc 100644 --- a/tests/queries/0_stateless/01161_information_schema.reference +++ b/tests/queries/0_stateless/01161_information_schema.reference @@ -1,3 +1,7 @@ +COLUMNS +SCHEMATA +TABLES +VIEWS columns schemata tables @@ -6,6 +10,10 @@ COLUMNS SCHEMATA TABLES VIEWS +columns +schemata +tables +views INFORMATION_SCHEMA INFORMATION_SCHEMA default \N \N \N \N information_schema information_schema default \N \N \N \N default default mv VIEW @@ -25,3 +33,5 @@ default default v default v f 2 0 Float64 \N \N \N \N \N \N \N \N \N \N \N \N \ tmp tmp d 1 0 Date \N \N \N \N \N 0 \N \N \N \N \N \N \N \N \N Date tmp tmp dt 2 0 DateTime \N \N \N \N \N 0 \N \N \N \N \N \N \N \N \N DateTime tmp tmp dtms 3 0 DateTime64(3) \N \N \N \N \N 3 \N \N \N \N \N \N \N \N \N DateTime64(3) +1 +1 diff --git a/tests/queries/0_stateless/01161_information_schema.sql b/tests/queries/0_stateless/01161_information_schema.sql index ed77ef1c1c2..68a3b011ced 100644 --- a/tests/queries/0_stateless/01161_information_schema.sql +++ b/tests/queries/0_stateless/01161_information_schema.sql @@ -1,20 +1,31 @@ -show tables from information_schema; +SHOW TABLES FROM information_schema; SHOW TABLES FROM INFORMATION_SCHEMA; -create table t (n UInt64, f Float32, s String, fs FixedString(42), d Decimal(9, 6)) engine=Memory; -create view v (n Nullable(Int32), f Float64) as select n, f from t; -create materialized view mv engine=Null as select * from system.one; -create temporary table tmp (d Date, dt DateTime, dtms DateTime64(3)); +DROP TABLE IF EXISTS t; +DROP VIEW IF EXISTS v; +DROP VIEW IF EXISTS mv; +DROP TABLE IF EXISTS tmp; + +CREATE TABLE t (n UInt64, f Float32, s String, fs FixedString(42), d Decimal(9, 6)) ENGINE=Memory; +CREATE VIEW v (n Nullable(Int32), f Float64) AS SELECT n, f FROM t; +CREATE MATERIALIZED VIEW mv ENGINE=Null AS SELECT * FROM system.one; +CREATE TEMPORARY TABLE tmp (d Date, dt DateTime, dtms DateTime64(3)); -- FIXME #28687 -select * from information_schema.schemata where schema_name ilike 'information_schema'; +SELECT * FROM information_schema.schemata WHERE schema_name ilike 'information_schema'; -- SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE (TABLE_SCHEMA=currentDatabase() OR TABLE_SCHEMA='') AND TABLE_NAME NOT LIKE '%inner%'; SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE (table_schema=currentDatabase() OR table_schema='') AND table_name NOT LIKE '%inner%'; -select * from information_schema.views where table_schema=currentDatabase(); +SELECT * FROM information_schema.views WHERE table_schema=currentDatabase(); -- SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE (TABLE_SCHEMA=currentDatabase() OR TABLE_SCHEMA='') AND TABLE_NAME NOT LIKE '%inner%'; SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE (table_schema=currentDatabase() OR table_schema='') AND table_name NOT LIKE '%inner%'; -drop table t; -drop view v; +-- mixed upper/lowercase schema and table name: +SELECT count() FROM information_schema.TABLES WHERE table_schema=currentDatabase() AND table_name = 't'; +SELECT count() FROM INFORMATION_SCHEMA.tables WHERE table_schema=currentDatabase() AND table_name = 't'; +SELECT count() FROM INFORMATION_schema.tables WHERE table_schema=currentDatabase() AND table_name = 't'; -- { serverError UNKNOWN_DATABASE } +SELECT count() FROM information_schema.taBLES WHERE table_schema=currentDatabase() AND table_name = 't'; -- { serverError UNKNOWN_TABLE } + drop view mv; +drop view v; +drop table t; diff --git a/tests/queries/0_stateless/01168_mutations_isolation.reference b/tests/queries/0_stateless/01168_mutations_isolation.reference index f9ebd1c5f83..00859ce99b9 100644 --- a/tests/queries/0_stateless/01168_mutations_isolation.reference +++ b/tests/queries/0_stateless/01168_mutations_isolation.reference @@ -21,18 +21,20 @@ tx7 7 20 all_1_1_0_13 tx7 7 40 all_14_14_0 tx7 7 60 all_7_7_0_13 tx7 7 80 all_12_12_0_13 -tx7 8 20 all_1_14_2_13 -tx7 8 40 all_1_14_2_13 -tx7 8 60 all_1_14_2_13 -tx7 8 80 all_1_14_2_13 +tx7 8 20 all_1_14_1_13 +tx7 8 40 all_1_14_1_13 +tx7 8 60 all_1_14_1_13 +tx7 8 80 all_1_14_1_13 Serialization error INVALID_TRANSACTION -tx11 9 21 all_1_14_2_17 -tx11 9 41 all_1_14_2_17 -tx11 9 61 all_1_14_2_17 -tx11 9 81 all_1_14_2_17 +tx11 9 21 all_1_14_1_17 +tx11 9 41 all_1_14_1_17 +tx11 9 61 all_1_14_1_17 +tx11 9 81 all_1_14_1_17 1 1 RUNNING -tx14 10 22 all_1_14_2_18 -tx14 10 42 all_1_14_2_18 -tx14 10 62 all_1_14_2_18 -tx14 10 82 all_1_14_2_18 +tx14 10 22 all_1_14_1_18 +tx14 10 42 all_1_14_1_18 +tx14 10 62 all_1_14_1_18 +tx14 10 82 all_1_14_1_18 +11 2 all_2_2_0 +11 10 all_1_1_0_3 diff --git a/tests/queries/0_stateless/01168_mutations_isolation.sh b/tests/queries/0_stateless/01168_mutations_isolation.sh index 5d014e030f1..c1d70189673 100755 --- a/tests/queries/0_stateless/01168_mutations_isolation.sh +++ b/tests/queries/0_stateless/01168_mutations_isolation.sh @@ -53,9 +53,6 @@ tx 6 "alter table mt update n=n*10 wh tx 6 "insert into mt values (40)" tx 6 "commit" -function accept_both_parts() { - sed 's/all_1_14_1_1/all_1_14_2_1/g' -} tx 7 "begin transaction" tx 7 "select 7, n, _part from mt order by n" @@ -64,7 +61,7 @@ tx_async 8 "alter table mt update n = 0 whe $CLICKHOUSE_CLIENT -q "kill mutation where database=currentDatabase() and mutation_id='mutation_15.txt' format Null" 2>&1| grep -Fv "probably it finished" tx_sync 8 "rollback" tx 7 "optimize table mt final" -tx 7 "select 8, n, _part from mt order by n" | accept_both_parts +tx 7 "select 8, n, _part from mt order by n" tx 10 "begin transaction" tx 10 "alter table mt update n = 0 where 1" | grep -Eo "Serialization error" | uniq tx 7 "alter table mt update n=n+1 where 1" @@ -74,7 +71,7 @@ tx 7 "commit" tx_async 11 "begin transaction" -tx_async 11 "select 9, n, _part from mt order by n" | accept_both_parts +tx_async 11 "select 9, n, _part from mt order by n" tx_async 12 "begin transaction" tx_async 11 "alter table mt update n=n+1 where 1" >/dev/null tx_async 12 "alter table mt update n=n+1 where 1" >/dev/null @@ -91,6 +88,19 @@ $CLICKHOUSE_CLIENT -q "kill transaction where tid=$tid_to_kill format Null" tx_sync 13 "rollback" tx 14 "begin transaction" -tx 14 "select 10, n, _part from mt order by n" | accept_both_parts +tx 14 "select 10, n, _part from mt order by n" $CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=0 -q "drop table mt" + +$CLICKHOUSE_CLIENT -q "create table mt (n int) engine=MergeTree order by tuple()" +$CLICKHOUSE_CLIENT --implicit_transaction=1 -q "insert into mt values (1)" + +tx 15 "begin transaction" +tx 16 "begin transaction" +tx 16 "insert into mt values (2)" +tx 15 "alter table mt update n = 10*n where 1" +tx 15 "commit" +tx 16 "commit" +$CLICKHOUSE_CLIENT --implicit_transaction=1 -q "select 11, n, _part from mt order by n" + +$CLICKHOUSE_CLIENT -q "drop table mt" diff --git a/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh b/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh index f2348c29146..d4884cbf457 100755 --- a/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh +++ b/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh @@ -242,10 +242,10 @@ wait ||: wait_for_queries_to_finish 40 -$CLICKHOUSE_CLIENT -q "SELECT type, count(n) = countDistinct(n) FROM merge(currentDatabase(), '') GROUP BY type ORDER BY type" -$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arraySort(groupArrayIf(n, type=1)) = arraySort(groupArrayIf(n, type=2)) FROM merge(currentDatabase(), '') GROUP BY _table ORDER BY _table" -$CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM merge(currentDatabase(), '') WHERE type=4" -$CLICKHOUSE_CLIENT -q "SELECT type, count(n) == max(n), sum(n) == max(n)*(max(n)+1)/2 FROM merge(currentDatabase(), '') WHERE type IN (1, 2) GROUP BY type ORDER BY type" +$CLICKHOUSE_CLIENT --implicit_transaction=1 --throw_on_unsupported_query_inside_transaction=0 -q "SELECT type, count(n) = countDistinct(n) FROM merge(currentDatabase(), '') GROUP BY type ORDER BY type" +$CLICKHOUSE_CLIENT --implicit_transaction=1 --throw_on_unsupported_query_inside_transaction=0 -q "SELECT DISTINCT arraySort(groupArrayIf(n, type=1)) = arraySort(groupArrayIf(n, type=2)) FROM merge(currentDatabase(), '') GROUP BY _table ORDER BY _table" +$CLICKHOUSE_CLIENT --implicit_transaction=1 --throw_on_unsupported_query_inside_transaction=0 -q "SELECT count(n), sum(n) FROM merge(currentDatabase(), '') WHERE type=4" +$CLICKHOUSE_CLIENT --implicit_transaction=1 --throw_on_unsupported_query_inside_transaction=0 -q "SELECT type, count(n) == max(n), sum(n) == max(n)*(max(n)+1)/2 FROM merge(currentDatabase(), '') WHERE type IN (1, 2) GROUP BY type ORDER BY type" $CLICKHOUSE_CLIENT --query "DROP TABLE src"; $CLICKHOUSE_CLIENT --query "DROP TABLE dst"; diff --git a/tests/queries/0_stateless/01169_old_alter_partition_isolation_stress.reference b/tests/queries/0_stateless/01169_old_alter_partition_isolation_stress.reference new file mode 100644 index 00000000000..12b941eab50 --- /dev/null +++ b/tests/queries/0_stateless/01169_old_alter_partition_isolation_stress.reference @@ -0,0 +1,8 @@ +1 1 +2 1 +3 1 +4 1 +1 +10 100 +1 1 1 +2 1 1 diff --git a/tests/queries/0_stateless/01169_old_alter_partition_isolation_stress.sh b/tests/queries/0_stateless/01169_old_alter_partition_isolation_stress.sh new file mode 100755 index 00000000000..0d2016952d4 --- /dev/null +++ b/tests/queries/0_stateless/01169_old_alter_partition_isolation_stress.sh @@ -0,0 +1,123 @@ +#!/usr/bin/env bash +# Tags: long, no-replicated-database, no-ordinary-database + +# shellcheck disable=SC2015 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -e + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS src"; +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS dst"; +$CLICKHOUSE_CLIENT --query "CREATE TABLE src (n UInt64, type UInt8) ENGINE=MergeTree ORDER BY type SETTINGS old_parts_lifetime=0"; +$CLICKHOUSE_CLIENT --query "CREATE TABLE dst (n UInt64, type UInt8) ENGINE=MergeTree ORDER BY type SETTINGS old_parts_lifetime=0"; + +function thread_insert() +{ + set -e + val=1 + while true; do + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + INSERT INTO src VALUES /* ($val, 1) */ ($val, 1); + INSERT INTO src VALUES /* ($val, 2) */ ($val, 2); + COMMIT;" + val=$((val+1)) + sleep 0.$RANDOM; + done +} + + +# NOTE +# ALTER PARTITION query stops merges, +# but serialization error is still possible if some merge was assigned (and committed) between BEGIN and ALTER. +function thread_partition_src_to_dst() +{ + set -e + count=0 + sum=0 + for i in {1..20}; do + out=$( + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + INSERT INTO src VALUES /* ($i, 3) */ ($i, 3); + INSERT INTO dst SELECT * FROM src; + ALTER TABLE src DROP PARTITION ID 'all'; + SET throw_on_unsupported_query_inside_transaction=0; + SELECT throwIf((SELECT (count(), sum(n)) FROM merge(currentDatabase(), '') WHERE type=3) != ($count + 1, $sum + $i)) FORMAT Null; + COMMIT;" 2>&1) ||: + + echo "$out" | grep -Fv "SERIALIZATION_ERROR" | grep -F "Received from " && $CLICKHOUSE_CLIENT --multiquery --query " + begin transaction; + set transaction snapshot 3; + select $i, 'src', type, n, _part from src order by type, n; + select $i, 'dst', type, n, _part from dst order by type, n; + rollback" ||: + echo "$out" | grep -Fa "SERIALIZATION_ERROR" >/dev/null || count=$((count+1)) + echo "$out" | grep -Fa "SERIALIZATION_ERROR" >/dev/null || sum=$((sum+i)) + done +} + +function thread_partition_dst_to_src() +{ + set -e + for i in {1..20}; do + action="ROLLBACK" + if (( i % 2 )); then + action="COMMIT" + fi + $CLICKHOUSE_CLIENT --multiquery --query " + SYSTEM STOP MERGES dst; + ALTER TABLE dst DROP PARTITION ID 'nonexistent'; -- STOP MERGES doesn't wait for started merges to finish, so we use this trick + SYSTEM SYNC TRANSACTION LOG; + BEGIN TRANSACTION; + INSERT INTO dst VALUES /* ($i, 4) */ ($i, 4); + INSERT INTO src SELECT * FROM dst; + ALTER TABLE dst DROP PARTITION ID 'all'; + SET throw_on_unsupported_query_inside_transaction=0; + SYSTEM START MERGES dst; + SELECT throwIf((SELECT (count(), sum(n)) FROM merge(currentDatabase(), '') WHERE type=4) != (toUInt8($i/2 + 1), (select sum(number) from numbers(1, $i) where number % 2 or number=$i))) FORMAT Null; + $action;" + done +} + +function thread_select() +{ + set -e + while true; do + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + -- no duplicates + SELECT type, throwIf(count(n) != countDistinct(n)) FROM src GROUP BY type FORMAT Null; + SELECT type, throwIf(count(n) != countDistinct(n)) FROM dst GROUP BY type FORMAT Null; + -- rows inserted by thread_insert moved together + SET throw_on_unsupported_query_inside_transaction=0; + SELECT _table, throwIf(arraySort(groupArrayIf(n, type=1)) != arraySort(groupArrayIf(n, type=2))) FROM merge(currentDatabase(), '') GROUP BY _table FORMAT Null; + -- all rows are inserted in insert_thread + SELECT type, throwIf(count(n) != max(n)), throwIf(sum(n) != max(n)*(max(n)+1)/2) FROM merge(currentDatabase(), '') WHERE type IN (1, 2) GROUP BY type ORDER BY type FORMAT Null; + COMMIT;" + done +} + +thread_insert & PID_1=$! +thread_select & PID_2=$! + +thread_partition_src_to_dst & PID_3=$! +thread_partition_dst_to_src & PID_4=$! +wait $PID_3 && wait $PID_4 + +kill -TERM $PID_1 +kill -TERM $PID_2 +wait +wait_for_queries_to_finish + +$CLICKHOUSE_CLIENT --implicit_transaction=1 --throw_on_unsupported_query_inside_transaction=0 -q "SELECT type, count(n) = countDistinct(n) FROM merge(currentDatabase(), '') GROUP BY type ORDER BY type" +$CLICKHOUSE_CLIENT --implicit_transaction=1 --throw_on_unsupported_query_inside_transaction=0 -q "SELECT DISTINCT arraySort(groupArrayIf(n, type=1)) = arraySort(groupArrayIf(n, type=2)) FROM merge(currentDatabase(), '') GROUP BY _table ORDER BY _table" +$CLICKHOUSE_CLIENT --implicit_transaction=1 --throw_on_unsupported_query_inside_transaction=0 -q "SELECT count(n), sum(n) FROM merge(currentDatabase(), '') WHERE type=4" +$CLICKHOUSE_CLIENT --implicit_transaction=1 --throw_on_unsupported_query_inside_transaction=0 -q "SELECT type, count(n) == max(n), sum(n) == max(n)*(max(n)+1)/2 FROM merge(currentDatabase(), '') WHERE type IN (1, 2) GROUP BY type ORDER BY type" + + +$CLICKHOUSE_CLIENT --query "DROP TABLE src"; +$CLICKHOUSE_CLIENT --query "DROP TABLE dst"; diff --git a/tests/queries/0_stateless/01172_transaction_counters.reference b/tests/queries/0_stateless/01172_transaction_counters.reference index d088d747ee8..24083d7d40b 100644 --- a/tests/queries/0_stateless/01172_transaction_counters.reference +++ b/tests/queries/0_stateless/01172_transaction_counters.reference @@ -29,13 +29,9 @@ 4 1 Commit 1 1 1 0 5 1 Begin 1 1 1 1 5 1 AddPart 1 1 1 1 all_5_5_0 -5 1 AddPart 1 1 1 1 all_1_1_1 5 1 LockPart 1 1 1 1 all_1_1_0 -5 1 AddPart 1 1 1 1 all_3_3_1 5 1 LockPart 1 1 1 1 all_3_3_0 -5 1 AddPart 1 1 1 1 all_4_4_1 5 1 LockPart 1 1 1 1 all_4_4_0 -5 1 AddPart 1 1 1 1 all_5_5_1 5 1 LockPart 1 1 1 1 all_5_5_0 5 1 UnlockPart 1 1 1 1 all_1_1_0 5 1 UnlockPart 1 1 1 1 all_3_3_0 diff --git a/tests/queries/0_stateless/01184_long_insert_values_huge_strings.sh b/tests/queries/0_stateless/01184_long_insert_values_huge_strings.sh index 09a43d13a42..5e115e6b3af 100755 --- a/tests/queries/0_stateless/01184_long_insert_values_huge_strings.sh +++ b/tests/queries/0_stateless/01184_long_insert_values_huge_strings.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh $CLICKHOUSE_CLIENT -q "drop table if exists huge_strings" -$CLICKHOUSE_CLIENT -q "create table huge_strings (n UInt64, l UInt64, s String, h UInt64) engine=MergeTree order by n" +$CLICKHOUSE_CLIENT -q "create table huge_strings (n UInt64, l UInt64, s String, h UInt64) engine=MergeTree order by n SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" # Timeouts are increased, because test can be slow with sanitizers and parallel runs. diff --git a/tests/queries/0_stateless/01191_rename_dictionary.sql b/tests/queries/0_stateless/01191_rename_dictionary.sql index 8074e84f0ed..e9fed1dd6b2 100644 --- a/tests/queries/0_stateless/01191_rename_dictionary.sql +++ b/tests/queries/0_stateless/01191_rename_dictionary.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-upgrade-check +-- Tags: no-parallel DROP DATABASE IF EXISTS test_01191; CREATE DATABASE test_01191 ENGINE=Atomic; diff --git a/tests/queries/0_stateless/01192_rename_database_zookeeper.sh b/tests/queries/0_stateless/01192_rename_database_zookeeper.sh index 6dd7ff3cdc8..d1a7144e886 100755 --- a/tests/queries/0_stateless/01192_rename_database_zookeeper.sh +++ b/tests/queries/0_stateless/01192_rename_database_zookeeper.sh @@ -24,7 +24,7 @@ $CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 15000000 -q "INSE sleep 1 $CLICKHOUSE_CLIENT -q "RENAME DATABASE test_01192 TO default" 2>&1| grep -F "already exists" > /dev/null && echo "ok" -$CLICKHOUSE_CLIENT -q "RENAME DATABASE test_01192_notexisting TO test_01192_renamed" 2>&1| grep -F "doesn't exist" > /dev/null && echo "ok" +$CLICKHOUSE_CLIENT -q "RENAME DATABASE test_01192_notexisting TO test_01192_renamed" 2>&1| grep -F "does not exist" > /dev/null && echo "ok" $CLICKHOUSE_CLIENT -q "RENAME DATABASE test_01192 TO test_01192_renamed" && echo "renamed" wait @@ -50,7 +50,7 @@ $CLICKHOUSE_CLIENT -q "RENAME TABLE test_01192.mt TO test_01192_atomic.mt, test_ # 6. check data after RENAME $CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM test_01192_atomic.mt" $CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM test_01192_atomic.rmt" -$CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM test_01192_atomic.mv" 2>&1| grep -F "doesn't exist" > /dev/null && echo "ok" +$CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM test_01192_atomic.mv" 2>&1| grep -F "does not exist" > /dev/null && echo "ok" # 7. create dictionary and check it $CLICKHOUSE_CLIENT -q "CREATE TABLE test_01192.mt (n UInt64, _part String) ENGINE=Memory" # mock diff --git a/tests/queries/0_stateless/01231_operator_null_in.sql b/tests/queries/0_stateless/01231_operator_null_in.sql index 27ab0bbd838..0424a995b3f 100644 --- a/tests/queries/0_stateless/01231_operator_null_in.sql +++ b/tests/queries/0_stateless/01231_operator_null_in.sql @@ -1,5 +1,5 @@ DROP TABLE IF EXISTS null_in; -CREATE TABLE null_in (dt DateTime, idx int, i Nullable(int), s Nullable(String)) ENGINE = MergeTree() PARTITION BY dt ORDER BY idx; +CREATE TABLE null_in (dt DateTime, idx int, i Nullable(int), s Nullable(String)) ENGINE = MergeTree() PARTITION BY dt ORDER BY idx SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO null_in VALUES (1, 1, 1, '1') (2, 2, NULL, NULL) (3, 3, 3, '3') (4, 4, NULL, NULL) (5, 5, 5, '5'); @@ -81,7 +81,7 @@ DROP TABLE IF EXISTS null_in; DROP TABLE IF EXISTS null_in_subquery; -CREATE TABLE null_in_subquery (dt DateTime, idx int, i Nullable(UInt64)) ENGINE = MergeTree() PARTITION BY dt ORDER BY idx; +CREATE TABLE null_in_subquery (dt DateTime, idx int, i Nullable(UInt64)) ENGINE = MergeTree() PARTITION BY dt ORDER BY idx SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO null_in_subquery SELECT number % 3, number, number FROM system.numbers LIMIT 99999; SELECT count() == 33333 FROM null_in_subquery WHERE i in (SELECT i FROM null_in_subquery WHERE dt = 0); @@ -111,7 +111,7 @@ DROP TABLE IF EXISTS null_in_subquery; DROP TABLE IF EXISTS null_in_tuple; -CREATE TABLE null_in_tuple (dt DateTime, idx int, t Tuple(Nullable(UInt64), Nullable(String))) ENGINE = MergeTree() PARTITION BY dt ORDER BY idx; +CREATE TABLE null_in_tuple (dt DateTime, idx int, t Tuple(Nullable(UInt64), Nullable(String))) ENGINE = MergeTree() PARTITION BY dt ORDER BY idx SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO null_in_tuple VALUES (1, 1, (1, '1')) (2, 2, (2, NULL)) (3, 3, (NULL, '3')) (4, 4, (NULL, NULL)) SET transform_null_in = 0; diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index db0f2d8235b..a3e15f0793c 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -119,6 +119,7 @@ SYSTEM MERGES ['SYSTEM STOP MERGES','SYSTEM START MERGES','STOP MERGES','START M SYSTEM TTL MERGES ['SYSTEM STOP TTL MERGES','SYSTEM START TTL MERGES','STOP TTL MERGES','START TTL MERGES'] TABLE SYSTEM SYSTEM FETCHES ['SYSTEM STOP FETCHES','SYSTEM START FETCHES','STOP FETCHES','START FETCHES'] TABLE SYSTEM SYSTEM MOVES ['SYSTEM STOP MOVES','SYSTEM START MOVES','STOP MOVES','START MOVES'] TABLE SYSTEM +SYSTEM PULLING REPLICATION LOG ['SYSTEM STOP PULLING REPLICATION LOG','SYSTEM START PULLING REPLICATION LOG'] TABLE SYSTEM SYSTEM DISTRIBUTED SENDS ['SYSTEM STOP DISTRIBUTED SENDS','SYSTEM START DISTRIBUTED SENDS','STOP DISTRIBUTED SENDS','START DISTRIBUTED SENDS'] TABLE SYSTEM SENDS SYSTEM REPLICATED SENDS ['SYSTEM STOP REPLICATED SENDS','SYSTEM START REPLICATED SENDS','STOP REPLICATED SENDS','START REPLICATED SENDS'] TABLE SYSTEM SENDS SYSTEM SENDS ['SYSTEM STOP SENDS','SYSTEM START SENDS','STOP SENDS','START SENDS'] \N SYSTEM diff --git a/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql b/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql index ac1186284be..c1cec6ea212 100644 --- a/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql +++ b/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql @@ -5,7 +5,7 @@ SELECT parseDateTime64BestEffort('foo'); -- {serverError 41} SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 'bar'); -- {serverError 43} -- invalid scale parameter SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, 4); -- {serverError 43} -- invalid timezone parameter -SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, 'baz'); -- {serverError 1000} -- unknown timezone +SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, 'baz'); -- {serverError BAD_ARGUMENTS} -- unknown timezone SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', materialize(3), 4); -- {serverError 44} -- non-const precision SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, materialize('UTC')); -- {serverError 44} -- non-const timezone diff --git a/tests/queries/0_stateless/01289_min_execution_speed_not_too_early.sql b/tests/queries/0_stateless/01289_min_execution_speed_not_too_early.sql index 222a85094d0..1abe9bf8cd8 100644 --- a/tests/queries/0_stateless/01289_min_execution_speed_not_too_early.sql +++ b/tests/queries/0_stateless/01289_min_execution_speed_not_too_early.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS ES; -create table ES(A String) Engine=MergeTree order by tuple(); +create table ES(A String) Engine=MergeTree order by tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into ES select toString(number) from numbers(10000000); SET max_execution_time = 100, diff --git a/tests/queries/0_stateless/01292_create_user.reference b/tests/queries/0_stateless/01292_create_user.reference index eb89a5ed38c..b7c30b304bf 100644 --- a/tests/queries/0_stateless/01292_create_user.reference +++ b/tests/queries/0_stateless/01292_create_user.reference @@ -106,10 +106,10 @@ CREATE USER u2_01292 DEFAULT ROLE r1_01292, r2_01292 SETTINGS readonly = 1 CREATE USER u3_01292 HOST LIKE \'%.%.myhost.com\' DEFAULT ROLE r1_01292, r2_01292 CREATE USER u4_01292 HOST LIKE \'%.%.myhost.com\' DEFAULT ROLE r1_01292, r2_01292 -- system.users -u1_01292 local directory plaintext_password {} [] ['localhost'] [] [] 1 [] [] -u2_01292 local directory no_password {} [] [] [] ['%.%.myhost.com'] 0 [] [] -u3_01292 local directory sha256_password {} ['192.169.1.1','192.168.0.0/16'] ['localhost'] [] [] 0 ['r1_01292'] [] -u4_01292 local directory double_sha1_password {} ['::/0'] [] [] [] 1 [] ['r1_01292'] +u1_01292 local_directory plaintext_password {} [] ['localhost'] [] [] 1 [] [] +u2_01292 local_directory no_password {} [] [] [] ['%.%.myhost.com'] 0 [] [] +u3_01292 local_directory sha256_password {} ['192.169.1.1','192.168.0.0/16'] ['localhost'] [] [] 0 ['r1_01292'] [] +u4_01292 local_directory double_sha1_password {} ['::/0'] [] [] [] 1 [] ['r1_01292'] -- system.settings_profile_elements \N u1_01292 \N 0 readonly 1 \N \N \N \N \N u2_01292 \N 0 \N \N \N \N \N default diff --git a/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect b/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect index 3bfd454bb1f..25933777f9f 100755 --- a/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect +++ b/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect @@ -6,7 +6,7 @@ exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 set history_file $env(CLICKHOUSE_TMP)/$basename.history log_user 0 -set timeout 10 +set timeout 60 match_max 100000 expect_after { diff --git a/tests/queries/0_stateless/01293_create_role.reference b/tests/queries/0_stateless/01293_create_role.reference index 9b3c4eabd47..42f091bddd5 100644 --- a/tests/queries/0_stateless/01293_create_role.reference +++ b/tests/queries/0_stateless/01293_create_role.reference @@ -28,7 +28,7 @@ CREATE ROLE r2_01293 CREATE ROLE r1_01293 SETTINGS readonly = 1 CREATE ROLE r2_01293 SETTINGS readonly = 1 -- system.roles -r1_01293 local directory +r1_01293 local_directory -- system.settings_profile_elements \N \N r1_01293 0 readonly 1 \N \N \N \N \N \N r2_01293 0 \N \N \N \N \N default diff --git a/tests/queries/0_stateless/01293_create_role.sql b/tests/queries/0_stateless/01293_create_role.sql index f22edfeec3e..fd75d62964d 100644 --- a/tests/queries/0_stateless/01293_create_role.sql +++ b/tests/queries/0_stateless/01293_create_role.sql @@ -1,3 +1,5 @@ +-- Tags: no-parallel + DROP ROLE IF EXISTS r1_01293, r2_01293, r3_01293, r4_01293, r5_01293, r6_01293, r7_01293, r8_01293, r9_01293; DROP ROLE IF EXISTS r2_01293_renamed; DROP ROLE IF EXISTS r1_01293@'%', 'r2_01293@%.myhost.com'; diff --git a/tests/queries/0_stateless/01293_system_distribution_queue.sql b/tests/queries/0_stateless/01293_system_distribution_queue.sql index 9997f18f61d..f14c0d64a7d 100644 --- a/tests/queries/0_stateless/01293_system_distribution_queue.sql +++ b/tests/queries/0_stateless/01293_system_distribution_queue.sql @@ -8,7 +8,7 @@ create table null_01293 (key Int) engine=Null(); create table dist_01293 as null_01293 engine=Distributed(test_cluster_two_shards, currentDatabase(), null_01293, key); -- no rows, since no active monitor -select * from system.distribution_queue; +select * from system.distribution_queue where database = currentDatabase(); select 'INSERT'; system stop distributed sends dist_01293; diff --git a/tests/queries/0_stateless/01294_create_settings_profile.reference b/tests/queries/0_stateless/01294_create_settings_profile.reference index dbb73bca851..a10d5758752 100644 --- a/tests/queries/0_stateless/01294_create_settings_profile.reference +++ b/tests/queries/0_stateless/01294_create_settings_profile.reference @@ -53,12 +53,12 @@ CREATE SETTINGS PROFILE s4_01294 SETTINGS INHERIT readonly, INHERIT readonly CREATE SETTINGS PROFILE s5_01294 SETTINGS INHERIT readonly, readonly = 1 CREATE SETTINGS PROFILE s6_01294 SETTINGS INHERIT readonly, readonly CONST -- system.settings_profiles -s1_01294 local directory 0 0 [] [] -s2_01294 local directory 1 0 ['r1_01294'] [] -s3_01294 local directory 1 0 ['r1_01294'] [] -s4_01294 local directory 1 0 ['r1_01294'] [] -s5_01294 local directory 3 0 ['u1_01294'] [] -s6_01294 local directory 0 1 [] ['r1_01294','u1_01294'] +s1_01294 local_directory 0 0 [] [] +s2_01294 local_directory 1 0 ['r1_01294'] [] +s3_01294 local_directory 1 0 ['r1_01294'] [] +s4_01294 local_directory 1 0 ['r1_01294'] [] +s5_01294 local_directory 3 0 ['u1_01294'] [] +s6_01294 local_directory 0 1 [] ['r1_01294','u1_01294'] -- system.settings_profile_elements s2_01294 \N \N 0 readonly 0 \N \N \N \N s3_01294 \N \N 0 max_memory_usage 5000000 4000000 6000000 CONST \N diff --git a/tests/queries/0_stateless/01294_create_settings_profile.sql b/tests/queries/0_stateless/01294_create_settings_profile.sql index 565b4e70367..f71eefa6975 100644 --- a/tests/queries/0_stateless/01294_create_settings_profile.sql +++ b/tests/queries/0_stateless/01294_create_settings_profile.sql @@ -1,3 +1,5 @@ +-- Tags: no-parallel + DROP SETTINGS PROFILE IF EXISTS s1_01294, s2_01294, s3_01294, s4_01294, s5_01294, s6_01294, s7_01294, s8_01294, s9_01294, s10_01294; DROP SETTINGS PROFILE IF EXISTS s2_01294_renamed; DROP USER IF EXISTS u1_01294; diff --git a/tests/queries/0_stateless/01295_create_row_policy.reference b/tests/queries/0_stateless/01295_create_row_policy.reference index 6e3169b7fec..d73d9752bc1 100644 --- a/tests/queries/0_stateless/01295_create_row_policy.reference +++ b/tests/queries/0_stateless/01295_create_row_policy.reference @@ -30,6 +30,6 @@ CREATE ROW POLICY p5_01295 ON db2.table2 FOR SELECT USING a = b CREATE ROW POLICY p1_01295 ON db.table FOR SELECT USING 1 TO ALL CREATE ROW POLICY p2_01295 ON db.table FOR SELECT USING 1 TO ALL -- system.row_policies -p1_01295 ON db.table p1_01295 db table local directory (a < b) AND (c > d) 0 0 [] [] -p2_01295 ON db.table p2_01295 db table local directory id = currentUser() 1 0 ['u1_01295'] [] -p3_01295 ON db.table p3_01295 db table local directory 1 0 1 [] ['r1_01295'] +p1_01295 ON db.table p1_01295 db table local_directory (a < b) AND (c > d) 0 0 [] [] +p2_01295 ON db.table p2_01295 db table local_directory id = currentUser() 1 0 ['u1_01295'] [] +p3_01295 ON db.table p3_01295 db table local_directory 1 0 1 [] ['r1_01295'] diff --git a/tests/queries/0_stateless/01295_create_row_policy.sql b/tests/queries/0_stateless/01295_create_row_policy.sql index b484d0ea0f3..5ccd815c89a 100644 --- a/tests/queries/0_stateless/01295_create_row_policy.sql +++ b/tests/queries/0_stateless/01295_create_row_policy.sql @@ -1,3 +1,5 @@ +-- Tags: no-parallel + DROP ROW POLICY IF EXISTS p1_01295, p2_01295, p3_01295, p4_01295, p5_01295, p6_01295, p7_01295, p8_01295, p9_01295, p10_01295 ON db.table; DROP ROW POLICY IF EXISTS p2_01295_renamed ON db.table; DROP ROW POLICY IF EXISTS p3_01295 ON db.table, db2.table2; diff --git a/tests/queries/0_stateless/01297_create_quota.reference b/tests/queries/0_stateless/01297_create_quota.reference index 6b844e0d2f6..308bbf79024 100644 --- a/tests/queries/0_stateless/01297_create_quota.reference +++ b/tests/queries/0_stateless/01297_create_quota.reference @@ -52,10 +52,10 @@ CREATE QUOTA q2_01297 FOR INTERVAL 1 day MAX errors = 5 CREATE QUOTA q1_01297 FOR INTERVAL 1 day TRACKING ONLY TO r1_01297 CREATE QUOTA q2_01297 FOR INTERVAL 1 day TRACKING ONLY TO r1_01297 -- system.quotas -q1_01297 local directory ['user_name'] [] 0 ['r1_01297'] [] -q2_01297 local directory [] [5259492] 0 ['r1_01297','u1_01297'] [] -q3_01297 local directory ['client_key','user_name'] [5259492,15778476] 0 [] [] -q4_01297 local directory [] [604800] 1 [] ['u1_01297'] +q1_01297 local_directory ['user_name'] [] 0 ['r1_01297'] [] +q2_01297 local_directory [] [5259492] 0 ['r1_01297','u1_01297'] [] +q3_01297 local_directory ['client_key','user_name'] [5259492,15778476] 0 [] [] +q4_01297 local_directory [] [604800] 1 [] ['u1_01297'] -- system.quota_limits q2_01297 5259492 0 100 \N \N 11 1000 10000 1001 10001 2.5 \N q3_01297 5259492 0 \N \N \N \N 1002 \N \N \N \N \N diff --git a/tests/queries/0_stateless/01304_direct_io_long.sh b/tests/queries/0_stateless/01304_direct_io_long.sh index bd503f40396..97148dc268e 100755 --- a/tests/queries/0_stateless/01304_direct_io_long.sh +++ b/tests/queries/0_stateless/01304_direct_io_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long +# Tags: long, no-s3-storage-with-slow-build CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01304_polygons_sym_difference.reference b/tests/queries/0_stateless/01304_polygons_sym_difference.reference index 9344410f192..828e0d5d4d4 100644 --- a/tests/queries/0_stateless/01304_polygons_sym_difference.reference +++ b/tests/queries/0_stateless/01304_polygons_sym_difference.reference @@ -1,7 +1,7 @@ [[[(1,2.9),(1,1),(2.9,1),(3,0),(0,0),(0,3),(1,2.9)]],[[(1,2.9),(1,4),(4,4),(4,1),(2.9,1),(2.6,2),(2,2.6),(1,2.9)]]] -------- MultiPolygon with Polygon -MULTIPOLYGON(((-20 -10.3067,-20 -20,-10 -20.8791,-10 -40,-40 -40,-40 -10,-20 -10.3067)),((20 10.3067,20 -20,-10 -20.8791,-10 -10,-20 -10.3067,-20 20,10 20.8791,10 10,20 10.3067)),((20 10.3067,20 20,10 20.8791,10 40,40 40,40 10,20 10.3067))) +[(-40,-40),(-40,-10),(-20,-20),(-20,-10.307),(-20,20),(-10,-40),(-10,-20.879),(-10,-10),(10,10),(10,20.879),(10,40),(20,-20),(20,10.307),(20,20),(40,10),(40,40)] -------- MultiPolygon with Polygon with Holes -MULTIPOLYGON(((-10 -20.8791,-20 -20,-20 -10.3067,-10 -10,-10 -20.8791)),((10 20.8791,20 20,20 10.3067,10 10,10 20.8791)),((50 50,50 -50,-50 -50,-50 50,50 50),(20 10.3067,40 10,40 40,10 40,10 20.8791,-20 20,-20 -10.3067,-40 -10,-40 -40,-10 -40,-10 -20.8791,20 -20,20 10.3067))) +[(-50,-50),(-50,50),(-40,-40),(-40,-10),(-20,-20),(-20,-10.307),(-20,20),(-10,-40),(-10,-20.879),(-10,-10),(10,10),(10,20.879),(10,40),(20,-20),(20,10.307),(20,20),(40,10),(40,40),(50,-50),(50,50)] -------- Polygon with Polygon with Holes -MULTIPOLYGON(((-20 -10.3067,-10 -10,-10 -20.8791,-20 -20,-20 -10.3067)),((10 20.8791,20 20,20 10.3067,10 10,10 20.8791)),((50 50,50 -50,-50 -50,-50 50,50 50),(20 10.3067,40 10,40 40,10 40,10 20.8791,-20 20,-20 -10.3067,-40 -10,-40 -40,-10 -40,-10 -20.8791,20 -20,20 10.3067))) +[(-50,-50),(-50,50),(-40,-40),(-40,-10),(-20,-20),(-20,-10.307),(-20,20),(-10,-40),(-10,-20.879),(-10,-10),(10,10),(10,20.879),(10,40),(20,-20),(20,10.307),(20,20),(40,10),(40,40),(50,-50),(50,50)] diff --git a/tests/queries/0_stateless/01304_polygons_sym_difference.sql b/tests/queries/0_stateless/01304_polygons_sym_difference.sql index c4129676b26..d0e022e1439 100644 --- a/tests/queries/0_stateless/01304_polygons_sym_difference.sql +++ b/tests/queries/0_stateless/01304_polygons_sym_difference.sql @@ -4,12 +4,12 @@ select polygonsSymDifferenceCartesian([[[(0, 0),(0, 3),(1, 2.9),(2, 2.6),(2.6, 2 select '-------- MultiPolygon with Polygon'; select wkt(polygonsSymDifferenceSpherical([[(29.453587685533865,59.779570356240356),(29.393139070478895,52.276266797422124),(40.636581470703206,59.38168915000267),(41.21084331372543,59.103467777099866),(29.786055068336193,52.146627480315004),(31.23682182965546,52.16517054781818),(41.69443223416517,58.85424941916091),(42.51048853740727,58.47703162291134),(32.59691566839227,52.22075341251539),(34.289476889931414,52.22075341251539),(43.02430176537451,58.07974369546071),(43.02430176537451,57.25537683364851),(35.468224883503325,52.2022335126388),(37.16078610504247,52.23926559241349),(43.02430176537451,56.26136189644947),(43.02430176537451,55.326904361850836),(38.33953409861437,52.16517054781818),(40.09254393520848,52.16517054781818),(44.4146199116388,55.3097062225408),(44.47506852669377,59.80998197603594),(39.72985224487867,59.931351417569715),(30.23941968124846,53.67744677450975),(30.20919537372098,54.63314259659509),(38.73245009647167,59.94649146557819),(37.2816833351524,59.97675082987618),(30.23941968124846,55.2752875586599),(30.33009260383092,56.19415599955667),(36.28428118674541,59.96162460231375),(34.863738732953635,59.97675082987618),(30.178971066193498,56.97640788219866),(30.178971066193498,57.91957806959033),(33.65476643185424,59.94649146557819),(32.32489690064491,59.94649146557819),(30.481214141468342,58.85424941916091),(30.571887064050795,59.99187015036608),(29.453587685533865,59.779570356240356)]], [[[(33.473420586689336,58.85424941916091),(32.23422397806246,58.492830557036),(32.173775363007486,58.03176922751564),(31.508840597402823,57.499784781503735),(31.750635057622702,56.86092686957355),(31.508840597402823,55.941082594334574),(32.20399967053497,55.515591939372456),(31.84130798020516,54.998862226280465),(31.418167674820367,54.422670886434275),(32.47601843828233,53.83826377018255),(32.08310244042503,53.408048308050866),(33.171177511414484,52.82758702113742),(34.77306581037117,52.91880107773494),(34.77306581037117,53.784726518357985),(34.108131044766516,54.17574726780569),(35.07530888564602,54.59813930694554),(34.25925258240394,54.96417435716029),(35.01486027059106,55.361278263643584),(33.50364489421682,55.37845402950552),(32.7480372060297,55.90721384574556),(35.67979503619571,55.68634475630185),(32.83871012861215,56.311688992608396),(34.591719965206266,56.29492065473883),(35.7100193437232,56.311688992608396),(33.83611227701915,56.695333481003644),(32.95960735872209,56.9434497616887),(36.072711034053015,57.091531913901434),(33.171177511414484,57.33702717078384),(36.193608264162954,57.499784781503735),(33.23162612646945,57.77481561306047),(36.43540272438284,58.04776787540811),(33.62454212432676,58.27099811968307),(36.344729801800376,58.54018474404165),(33.83611227701915,58.68186423448108),(34.74284150284369,59.565911441555244),(33.473420586689336,58.85424941916091)]], [[(34.65216858026123,58.91672306881671),(37.19101041256995,58.68186423448108),(36.01226241899805,58.28688958537609),(37.16078610504247,58.04776787540811),(35.74024365125068,57.79092907387934),(37.009664567405046,57.499784781503735),(35.77046795877817,57.25537683364851),(36.979440259877556,57.07510745541089),(34.22902827487645,56.794777197297435),(36.7074214921302,56.210968525786996),(34.712617195316206,56.10998276812964),(36.55629995449277,55.63519693782703),(35.13575750070099,55.53270067649592),(36.43540272438284,55.34409504165558),(34.83351442542614,55.01619492319591),(35.61934642114075,54.49294870011772),(34.89396304048112,54.12264226523038),(35.37755196092087,53.046178687628185),(37.43280487278982,52.95523300597458),(35.92158949641559,53.80257986695776),(36.91899164482259,53.856094327816805),(36.01226241899805,54.75541714463799),(37.765272255592166,55.189110239786885),(36.828318722240134,55.44708256557195),(38.03729102333953,55.652253637168315),(36.64697287707522,55.941082594334574),(38.21863686850443,56.05939028508024),(36.37495410932787,56.64551287174558),(38.30930979108689,56.992876013526654),(37.16078610504247,57.25537683364851),(38.127963945921984,57.516020773674256),(37.43280487278982,57.710289827306724),(38.33953409861437,57.935626886818994),(37.40258056526235,58.31865112960426),(38.58132855883426,58.744648733419496),(37.31190764267989,59.02578062465136),(34.65216858026123,58.91672306881671)]], [[(38.52087994377928,59.11898412389468),(39.54850639971376,58.713270635642914),(38.369758406141855,58.28688958537609),(38.85334732658162,58.06375936407028),(38.33953409861437,57.710289827306724),(38.73245009647167,57.48354156434209),(38.21863686850443,57.271721400459285),(38.97424455669155,56.87744603722649),(37.463029180317314,56.5623320541159),(38.94402024916407,56.05939028508024),(38.18841256097694,55.856355210835915),(38.490655636251795,55.53270067649592),(37.795496563119656,55.39562234093384),(38.30930979108689,55.154587013355666),(36.7074214921302,54.65063295250911),(37.31190764267989,53.92734063371401),(36.979440259877556,53.58783775557231),(37.855945178174615,52.91880107773497),(39.57873070724124,52.69956490610895),(38.33953409861437,53.281741738901104),(40.00187101262603,53.35396273604752),(39.54850639971376,53.58783775557231),(40.24366547284591,53.58783775557231),(39.97164670509855,53.98069568468355),(40.60635716317572,54.03398248547225),(40.39478701048334,54.44025165268903),(39.54850639971376,54.56310590284329),(39.54850639971376,54.87732350170489),(40.39478701048334,54.87732350170489),(40.39478701048334,55.24083903654295),(39.82052516746112,55.2752875586599),(39.760076552406154,55.75443792473942),(40.57613285564824,55.78844000174894),(40.425011318010824,56.19415599955667),(39.82052516746112,56.07626182891758),(39.79030085993364,56.41214455508424),(40.48545993306579,56.495655446714636),(40.33433839542836,56.95993246553937),(39.79030085993364,56.992876013526654),(39.72985224487867,57.46729112028032),(40.33433839542836,57.46729112028032),(40.24366547284591,58.04776787540811),(39.63917932229622,58.04776787540811),(39.63917932229622,58.382088724871295),(40.33433839542836,58.382088724871295),(40.45523562553831,58.9011152358548),(38.52087994377928,59.11898412389468)]]])) format Null; -SELECT wkt(arraySort(polygonsSymDifferenceSpherical([[[(10., 10.), (10., 40.), (40., 40.), (40., 10.), (10., 10.)]], [[(-10., -10.), (-10., -40.), (-40., -40.), (-40., -10.), (-10., -10.)]]], [[[(-20., -20.), (-20., 20.), (20., 20.), (20., -20.), (-20., -20.)]]]))); +SELECT arrayDistinct(arraySort(arrayMap((x, y) -> (round(x, 3), round(y, 3)), arrayFlatten(polygonsSymDifferenceSpherical([[[(10., 10.), (10., 40.), (40., 40.), (40., 10.), (10., 10.)]], [[(-10., -10.), (-10., -40.), (-40., -40.), (-40., -10.), (-10., -10.)]]], [[[(-20., -20.), (-20., 20.), (20., 20.), (20., -20.), (-20., -20.)]]]))))); select '-------- MultiPolygon with Polygon with Holes'; select wkt(polygonsSymDifferenceSpherical([[[(33.473420586689336,58.85424941916091),(32.23422397806246,58.492830557036),(32.173775363007486,58.03176922751564),(31.508840597402823,57.499784781503735),(31.750635057622702,56.86092686957355),(31.508840597402823,55.941082594334574),(32.20399967053497,55.515591939372456),(31.84130798020516,54.998862226280465),(31.418167674820367,54.422670886434275),(32.47601843828233,53.83826377018255),(32.08310244042503,53.408048308050866),(33.171177511414484,52.82758702113742),(34.77306581037117,52.91880107773494),(34.77306581037117,53.784726518357985),(34.108131044766516,54.17574726780569),(35.07530888564602,54.59813930694554),(34.25925258240394,54.96417435716029),(35.01486027059106,55.361278263643584),(33.50364489421682,55.37845402950552),(32.7480372060297,55.90721384574556),(35.67979503619571,55.68634475630185),(32.83871012861215,56.311688992608396),(34.591719965206266,56.29492065473883),(35.7100193437232,56.311688992608396),(33.83611227701915,56.695333481003644),(32.95960735872209,56.9434497616887),(36.072711034053015,57.091531913901434),(33.171177511414484,57.33702717078384),(36.193608264162954,57.499784781503735),(33.23162612646945,57.77481561306047),(36.43540272438284,58.04776787540811),(33.62454212432676,58.27099811968307),(36.344729801800376,58.54018474404165),(33.83611227701915,58.68186423448108),(34.74284150284369,59.565911441555244),(33.473420586689336,58.85424941916091)]], [[(34.65216858026123,58.91672306881671),(37.19101041256995,58.68186423448108),(36.01226241899805,58.28688958537609),(37.16078610504247,58.04776787540811),(35.74024365125068,57.79092907387934),(37.009664567405046,57.499784781503735),(35.77046795877817,57.25537683364851),(36.979440259877556,57.07510745541089),(34.22902827487645,56.794777197297435),(36.7074214921302,56.210968525786996),(34.712617195316206,56.10998276812964),(36.55629995449277,55.63519693782703),(35.13575750070099,55.53270067649592),(36.43540272438284,55.34409504165558),(34.83351442542614,55.01619492319591),(35.61934642114075,54.49294870011772),(34.89396304048112,54.12264226523038),(35.37755196092087,53.046178687628185),(37.43280487278982,52.95523300597458),(35.92158949641559,53.80257986695776),(36.91899164482259,53.856094327816805),(36.01226241899805,54.75541714463799),(37.765272255592166,55.189110239786885),(36.828318722240134,55.44708256557195),(38.03729102333953,55.652253637168315),(36.64697287707522,55.941082594334574),(38.21863686850443,56.05939028508024),(36.37495410932787,56.64551287174558),(38.30930979108689,56.992876013526654),(37.16078610504247,57.25537683364851),(38.127963945921984,57.516020773674256),(37.43280487278982,57.710289827306724),(38.33953409861437,57.935626886818994),(37.40258056526235,58.31865112960426),(38.58132855883426,58.744648733419496),(37.31190764267989,59.02578062465136),(34.65216858026123,58.91672306881671)]], [[(38.52087994377928,59.11898412389468),(39.54850639971376,58.713270635642914),(38.369758406141855,58.28688958537609),(38.85334732658162,58.06375936407028),(38.33953409861437,57.710289827306724),(38.73245009647167,57.48354156434209),(38.21863686850443,57.271721400459285),(38.97424455669155,56.87744603722649),(37.463029180317314,56.5623320541159),(38.94402024916407,56.05939028508024),(38.18841256097694,55.856355210835915),(38.490655636251795,55.53270067649592),(37.795496563119656,55.39562234093384),(38.30930979108689,55.154587013355666),(36.7074214921302,54.65063295250911),(37.31190764267989,53.92734063371401),(36.979440259877556,53.58783775557231),(37.855945178174615,52.91880107773497),(39.57873070724124,52.69956490610895),(38.33953409861437,53.281741738901104),(40.00187101262603,53.35396273604752),(39.54850639971376,53.58783775557231),(40.24366547284591,53.58783775557231),(39.97164670509855,53.98069568468355),(40.60635716317572,54.03398248547225),(40.39478701048334,54.44025165268903),(39.54850639971376,54.56310590284329),(39.54850639971376,54.87732350170489),(40.39478701048334,54.87732350170489),(40.39478701048334,55.24083903654295),(39.82052516746112,55.2752875586599),(39.760076552406154,55.75443792473942),(40.57613285564824,55.78844000174894),(40.425011318010824,56.19415599955667),(39.82052516746112,56.07626182891758),(39.79030085993364,56.41214455508424),(40.48545993306579,56.495655446714636),(40.33433839542836,56.95993246553937),(39.79030085993364,56.992876013526654),(39.72985224487867,57.46729112028032),(40.33433839542836,57.46729112028032),(40.24366547284591,58.04776787540811),(39.63917932229622,58.04776787540811),(39.63917932229622,58.382088724871295),(40.33433839542836,58.382088724871295),(40.45523562553831,58.9011152358548),(38.52087994377928,59.11898412389468)]]], [[(24.367675781249993,61.45977057029751),(19.577636718749993,58.67693767258692),(19.577636718749993,57.492213666700735),(19.445800781249996,55.87531083569678),(19.445800781249996,54.085173420886775),(17.468261718749996,53.014783245859235),(20.017089843749993,51.563412328675895),(21.203613281249993,50.205033264943324),(26.125488281249993,50.40151532278236),(27.22412109374999,48.980216985374994),(32.80517578124999,49.525208341974405),(35.26611328124999,48.74894534343292),(36.93603515624999,49.66762782262194),(42.56103515625,48.77791275550183),(43.92333984374999,49.8096315635631),(47.17529296875,49.152969656170455),(49.28466796875,50.54136296522162),(48.05419921875,51.17934297928929),(51.39404296875,52.48278022207825),(50.64697265625,53.014783245859235),(52.88818359375,53.93021986394004),(51.65771484374999,54.29088164657006),(52.66845703125,55.825973254619015),(50.25146484375,56.145549500679095),(51.92138671875,57.914847767009206),(49.15283203125,58.17070248348605),(49.59228515625,60.086762746260064),(47.043457031249986,59.88893689676584),(43.57177734375,61.37567331572748),(42.64892578125,60.630101766266705),(36.89208984374999,62.000904713685856),(36.01318359374999,61.143235250840576),(31.398925781249993,62.02152819100766),(30.563964843749996,61.05828537037917),(26.872558593749993,61.71070595883174),(26.652832031249993,61.10078883158897),(24.367675781249993,61.45977057029751)], [(24.455566406249993,59.42272750081452),(21.203613281249993,58.49369382056807),(21.335449218749993,56.89700392127261),(21.599121093749993,55.92458580482949),(25.202636718749993,55.998380955359636),(28.850097656249993,57.06463027327854),(27.09228515625,57.844750992890994),(28.806152343749996,59.17592824927138),(26.257324218749993,59.17592824927138),(24.455566406249993,59.42272750081452)], [(35.13427734375,59.84481485969107),(31.970214843749993,58.97266715450152),(33.20068359374999,56.776808316568406),(36.67236328125,56.41390137600675),(39.08935546874999,57.25528054528888),(42.69287109374999,58.03137242177638),(40.89111328124999,59.26588062825809),(37.28759765625,58.722598828043374),(37.11181640624999,59.66774058164964),(35.13427734375,59.84481485969107)], [(29.157714843749993,55.75184939173528),(22.565917968749993,55.128649068488784),(22.565917968749993,53.54030739150019),(22.038574218749996,51.48138289610097),(26.257324218749993,51.42661449707484),(30.124511718749993,50.54136296522162),(32.18994140624999,51.17934297928929),(30.124511718749993,53.173119202640635),(35.09033203124999,53.173119202640635),(33.11279296875,54.085173420886775),(29.597167968749993,55.50374985927513),(29.157714843749993,55.75184939173528)], [(42.82470703125,56.58369172128337),(36.584472656249986,55.329144408405085),(37.99072265625,53.592504809039355),(34.95849609374999,51.48138289610097),(36.54052734374999,50.40151532278236),(39.66064453124999,50.289339253291786),(39.79248046875,52.13348804077148),(41.77001953125,50.68079714532166),(44.49462890624999,51.97134580885171),(47.30712890624999,52.509534770327264),(44.05517578125,53.54030739150019),(46.60400390625,53.696706475303245),(47.61474609375,55.40406982700608),(45.37353515625,55.40406982700608),(42.82470703125,56.58369172128337)]])) format Null; -SELECT wkt(arraySort(polygonsSymDifferenceSpherical([[(50.,50.),(50.,-50.),(-50.,-50.),(-50.,50.),(50.,50.)],[(10.,10.),(10.,40.),(40.,40.),(40.,10.),(10.,10.)],[(-10.,-10.),(-10.,-40.),(-40.,-40.),(-40.,-10.),(-10.,-10.)]], [[[(-20.,-20.),(-20.,20.),(20.,20.),(20.,-20.),(-20.,-20.)]]]))); +SELECT arrayDistinct(arraySort(arrayMap((x, y) -> (round(x, 3), round(y, 3)), arrayFlatten(polygonsSymDifferenceSpherical([[(50.,50.),(50.,-50.),(-50.,-50.),(-50.,50.),(50.,50.)],[(10.,10.),(10.,40.),(40.,40.),(40.,10.),(10.,10.)],[(-10.,-10.),(-10.,-40.),(-40.,-40.),(-40.,-10.),(-10.,-10.)]], [[[(-20.,-20.),(-20.,20.),(20.,20.),(20.,-20.),(-20.,-20.)]]]))))); select '-------- Polygon with Polygon with Holes'; select wkt(polygonsSymDifferenceSpherical([[(29.453587685533865,59.779570356240356),(29.393139070478895,52.276266797422124),(40.636581470703206,59.38168915000267),(41.21084331372543,59.103467777099866),(29.786055068336193,52.146627480315004),(31.23682182965546,52.16517054781818),(41.69443223416517,58.85424941916091),(42.51048853740727,58.47703162291134),(32.59691566839227,52.22075341251539),(34.289476889931414,52.22075341251539),(43.02430176537451,58.07974369546071),(43.02430176537451,57.25537683364851),(35.468224883503325,52.2022335126388),(37.16078610504247,52.23926559241349),(43.02430176537451,56.26136189644947),(43.02430176537451,55.326904361850836),(38.33953409861437,52.16517054781818),(40.09254393520848,52.16517054781818),(44.4146199116388,55.3097062225408),(44.47506852669377,59.80998197603594),(39.72985224487867,59.931351417569715),(30.23941968124846,53.67744677450975),(30.20919537372098,54.63314259659509),(38.73245009647167,59.94649146557819),(37.2816833351524,59.97675082987618),(30.23941968124846,55.2752875586599),(30.33009260383092,56.19415599955667),(36.28428118674541,59.96162460231375),(34.863738732953635,59.97675082987618),(30.178971066193498,56.97640788219866),(30.178971066193498,57.91957806959033),(33.65476643185424,59.94649146557819),(32.32489690064491,59.94649146557819),(30.481214141468342,58.85424941916091),(30.571887064050795,59.99187015036608),(29.453587685533865,59.779570356240356)]], [[(24.367675781249993,61.45977057029751),(19.577636718749993,58.67693767258692),(19.577636718749993,57.492213666700735),(19.445800781249996,55.87531083569678),(19.445800781249996,54.085173420886775),(17.468261718749996,53.014783245859235),(20.017089843749993,51.563412328675895),(21.203613281249993,50.205033264943324),(26.125488281249993,50.40151532278236),(27.22412109374999,48.980216985374994),(32.80517578124999,49.525208341974405),(35.26611328124999,48.74894534343292),(36.93603515624999,49.66762782262194),(42.56103515625,48.77791275550183),(43.92333984374999,49.8096315635631),(47.17529296875,49.152969656170455),(49.28466796875,50.54136296522162),(48.05419921875,51.17934297928929),(51.39404296875,52.48278022207825),(50.64697265625,53.014783245859235),(52.88818359375,53.93021986394004),(51.65771484374999,54.29088164657006),(52.66845703125,55.825973254619015),(50.25146484375,56.145549500679095),(51.92138671875,57.914847767009206),(49.15283203125,58.17070248348605),(49.59228515625,60.086762746260064),(47.043457031249986,59.88893689676584),(43.57177734375,61.37567331572748),(42.64892578125,60.630101766266705),(36.89208984374999,62.000904713685856),(36.01318359374999,61.143235250840576),(31.398925781249993,62.02152819100766),(30.563964843749996,61.05828537037917),(26.872558593749993,61.71070595883174),(26.652832031249993,61.10078883158897),(24.367675781249993,61.45977057029751)], [(24.455566406249993,59.42272750081452),(21.203613281249993,58.49369382056807),(21.335449218749993,56.89700392127261),(21.599121093749993,55.92458580482949),(25.202636718749993,55.998380955359636),(28.850097656249993,57.06463027327854),(27.09228515625,57.844750992890994),(28.806152343749996,59.17592824927138),(26.257324218749993,59.17592824927138),(24.455566406249993,59.42272750081452)], [(35.13427734375,59.84481485969107),(31.970214843749993,58.97266715450152),(33.20068359374999,56.776808316568406),(36.67236328125,56.41390137600675),(39.08935546874999,57.25528054528888),(42.69287109374999,58.03137242177638),(40.89111328124999,59.26588062825809),(37.28759765625,58.722598828043374),(37.11181640624999,59.66774058164964),(35.13427734375,59.84481485969107)], [(29.157714843749993,55.75184939173528),(22.565917968749993,55.128649068488784),(22.565917968749993,53.54030739150019),(22.038574218749996,51.48138289610097),(26.257324218749993,51.42661449707484),(30.124511718749993,50.54136296522162),(32.18994140624999,51.17934297928929),(30.124511718749993,53.173119202640635),(35.09033203124999,53.173119202640635),(33.11279296875,54.085173420886775),(29.597167968749993,55.50374985927513),(29.157714843749993,55.75184939173528)], [(42.82470703125,56.58369172128337),(36.584472656249986,55.329144408405085),(37.99072265625,53.592504809039355),(34.95849609374999,51.48138289610097),(36.54052734374999,50.40151532278236),(39.66064453124999,50.289339253291786),(39.79248046875,52.13348804077148),(41.77001953125,50.68079714532166),(44.49462890624999,51.97134580885171),(47.30712890624999,52.509534770327264),(44.05517578125,53.54030739150019),(46.60400390625,53.696706475303245),(47.61474609375,55.40406982700608),(45.37353515625,55.40406982700608),(42.82470703125,56.58369172128337)]])) format Null; -SELECT wkt(arraySort(polygonsSymDifferenceSpherical([[(50., 50.), (50., -50.), (-50., -50.), (-50., 50.), (50., 50.)], [(10., 10.), (10., 40.), (40., 40.), (40., 10.), (10., 10.)], [(-10., -10.), (-10., -40.), (-40., -40.), (-40., -10.), (-10., -10.)]], [[(-20., -20.), (-20., 20.), (20., 20.), (20., -20.), (-20., -20.)]]))); +SELECT arrayDistinct(arraySort(arrayMap((x, y) -> (round(x, 3), round(y, 3)), arrayFlatten(polygonsSymDifferenceSpherical([[(50., 50.), (50., -50.), (-50., -50.), (-50., 50.), (50., 50.)], [(10., 10.), (10., 40.), (40., 40.), (40., 10.), (10., 10.)], [(-10., -10.), (-10., -40.), (-40., -40.), (-40., -10.), (-10., -10.)]], [[(-20., -20.), (-20., 20.), (20., 20.), (20., -20.), (-20., -20.)]]))))); diff --git a/tests/queries/0_stateless/01306_benchmark_json.sh b/tests/queries/0_stateless/01306_benchmark_json.sh deleted file mode 100755 index ae361826250..00000000000 --- a/tests/queries/0_stateless/01306_benchmark_json.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-fasttest - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -set -e - -$CLICKHOUSE_BENCHMARK --iterations 10 --json "${CLICKHOUSE_TMP}"/out.json <<< "SELECT 1" 2>/dev/null && cat "${CLICKHOUSE_TMP}"/out.json | - $CLICKHOUSE_LOCAL --input-format JSONAsString --structure "s String" --query "SELECT isValidJSON(s) FROM table" diff --git a/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh b/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh index f9a2ec8a34c..42941b486d6 100755 --- a/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh +++ b/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, zookeeper, no-parallel, no-upgrade-check +# Tags: long, zookeeper, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01373_summing_merge_tree_exclude_partition_key.sql b/tests/queries/0_stateless/01373_summing_merge_tree_exclude_partition_key.sql index c5a874efe09..f1e1ab7c70f 100644 --- a/tests/queries/0_stateless/01373_summing_merge_tree_exclude_partition_key.sql +++ b/tests/queries/0_stateless/01373_summing_merge_tree_exclude_partition_key.sql @@ -4,7 +4,7 @@ DROP TABLE IF EXISTS tt_01373; CREATE TABLE tt_01373 (a Int64, d Int64, val Int64) -ENGINE = SummingMergeTree PARTITION BY (a) ORDER BY (d); +ENGINE = SummingMergeTree PARTITION BY (a) ORDER BY (d) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; SYSTEM STOP MERGES tt_01373; diff --git a/tests/queries/0_stateless/01378_alter_rename_with_ttl_zookeeper.sql b/tests/queries/0_stateless/01378_alter_rename_with_ttl_zookeeper.sql index 43c9fa43104..b6982910ace 100644 --- a/tests/queries/0_stateless/01378_alter_rename_with_ttl_zookeeper.sql +++ b/tests/queries/0_stateless/01378_alter_rename_with_ttl_zookeeper.sql @@ -1,4 +1,4 @@ --- Tags: zookeeper, no-upgrade-check +-- Tags: zookeeper DROP TABLE IF EXISTS table_rename_with_ttl; diff --git a/tests/queries/0_stateless/01391_join_on_dict_crash.sql b/tests/queries/0_stateless/01391_join_on_dict_crash.sql index 5321e03767f..854da04b334 100644 --- a/tests/queries/0_stateless/01391_join_on_dict_crash.sql +++ b/tests/queries/0_stateless/01391_join_on_dict_crash.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-upgrade-check +-- Tags: no-parallel DROP DATABASE IF EXISTS db_01391; CREATE DATABASE db_01391; diff --git a/tests/queries/0_stateless/01414_mutations_and_errors_zookeeper.sh b/tests/queries/0_stateless/01414_mutations_and_errors_zookeeper.sh index f92ab265eb8..dc88808d73b 100755 --- a/tests/queries/0_stateless/01414_mutations_and_errors_zookeeper.sh +++ b/tests/queries/0_stateless/01414_mutations_and_errors_zookeeper.sh @@ -18,19 +18,19 @@ $CLICKHOUSE_CLIENT --query " PARTITION BY date " -$CLICKHOUSE_CLIENT --query "INSERT INTO replicated_mutation_table SELECT toDate('2019-10-02'), number, '42' FROM numbers(4)" +$CLICKHOUSE_CLIENT --query "INSERT INTO replicated_mutation_table SELECT toDate('2019-10-02'), number, '42' FROM numbers(10)" -$CLICKHOUSE_CLIENT --query "INSERT INTO replicated_mutation_table SELECT toDate('2019-10-02'), number, 'Hello' FROM numbers(4)" +$CLICKHOUSE_CLIENT --query "INSERT INTO replicated_mutation_table SELECT toDate('2019-10-02'), number, 'Hello' FROM numbers(10)" $CLICKHOUSE_CLIENT --query "ALTER TABLE replicated_mutation_table UPDATE key = key + 1 WHERE sleepEachRow(1) == 0 SETTINGS mutations_sync = 2" 2>&1 | grep -o 'Mutation 0000000000 was killed' | head -n 1 & check_query="SELECT count() FROM system.mutations WHERE table='replicated_mutation_table' and database='$CLICKHOUSE_DATABASE' and mutation_id='0000000000'" -query_result=$($CLICKHOUSE_CLIENT --query="$check_query" 2>&1) +query_result=$(curl $CLICKHOUSE_URL --silent --fail --data "$check_query") while [ "$query_result" != "1" ] do - query_result=$($CLICKHOUSE_CLIENT --query="$check_query" 2>&1) + query_result=$(curl $CLICKHOUSE_URL --silent --fail --data "$check_query") sleep 0.1 done @@ -38,7 +38,7 @@ $CLICKHOUSE_CLIENT --query "KILL MUTATION WHERE table='replicated_mutation_table while [ "$query_result" != "0" ] do - query_result=$($CLICKHOUSE_CLIENT --query="$check_query" 2>&1) + query_result=$(curl $CLICKHOUSE_URL --silent --fail --data "$check_query") sleep 0.5 done @@ -49,11 +49,11 @@ $CLICKHOUSE_CLIENT --query "ALTER TABLE replicated_mutation_table MODIFY COLUMN check_query="SELECT type = 'UInt64' FROM system.columns WHERE table='replicated_mutation_table' and database='$CLICKHOUSE_DATABASE' and name='value'" -query_result=$($CLICKHOUSE_CLIENT --query="$check_query" 2>&1) +query_result=$(curl $CLICKHOUSE_URL --silent --fail --data "$check_query") while [ "$query_result" != "1" ] do - query_result=$($CLICKHOUSE_CLIENT --query="$check_query" 2>&1) + query_result=$(curl $CLICKHOUSE_URL --silent --fail --data "$check_query") sleep 0.5 done @@ -66,7 +66,7 @@ $CLICKHOUSE_CLIENT --query "KILL MUTATION WHERE table='replicated_mutation_table while [ "$query_result" != "0" ] do - query_result=$($CLICKHOUSE_CLIENT --query="$check_query" 2>&1) + query_result=$(curl $CLICKHOUSE_URL --silent --fail --data "$check_query") sleep 0.5 done diff --git a/tests/queries/0_stateless/01441_low_cardinality_array_index.sql b/tests/queries/0_stateless/01441_low_cardinality_array_index.sql index 8febe8f2e44..4b31a86edfb 100644 --- a/tests/queries/0_stateless/01441_low_cardinality_array_index.sql +++ b/tests/queries/0_stateless/01441_low_cardinality_array_index.sql @@ -4,7 +4,7 @@ CREATE TABLE t_01411( str LowCardinality(String), arr Array(LowCardinality(String)) default [str] ) ENGINE = MergeTree() -ORDER BY tuple(); +ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO t_01411 (str) SELECT concat('asdf', toString(number % 10000)) FROM numbers(1000000); @@ -24,7 +24,7 @@ CREATE TABLE t_01411_num( num UInt8, arr Array(LowCardinality(Int64)) default [num] ) ENGINE = MergeTree() -ORDER BY tuple(); +ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO t_01411_num (num) SELECT number % 1000 FROM numbers(1000000); diff --git a/tests/queries/0_stateless/01460_DistributedFilesToInsert.reference b/tests/queries/0_stateless/01460_DistributedFilesToInsert.reference deleted file mode 100644 index 7a123a39978..00000000000 --- a/tests/queries/0_stateless/01460_DistributedFilesToInsert.reference +++ /dev/null @@ -1,12 +0,0 @@ -INSERT -0 -0 -STOP/START DISTRIBUTED SENDS -1 -0 -FLUSH DISTRIBUTED -1 -0 -DROP TABLE -1 -0 diff --git a/tests/queries/0_stateless/01460_DistributedFilesToInsert.sql b/tests/queries/0_stateless/01460_DistributedFilesToInsert.sql deleted file mode 100644 index 9b7a98f30bc..00000000000 --- a/tests/queries/0_stateless/01460_DistributedFilesToInsert.sql +++ /dev/null @@ -1,45 +0,0 @@ --- Tags: no-parallel, no-fasttest --- Tag no-fasttest: Look at DistributedFilesToInsert, so cannot run in parallel. - --- otherwise SYSTEM STOP DISTRIBUTED SENDS does not makes any effect (for localhost) --- (i.e. no .bin files and hence no sending is required) -set prefer_localhost_replica=0; - -drop table if exists data_01460; -drop table if exists dist_01460; - -create table data_01460 as system.one engine=Null(); -create table dist_01460 as data_01460 engine=Distributed(test_shard_localhost, currentDatabase(), data_01460) settings monitor_sleep_time_ms=50; - -select 'INSERT'; -select value from system.metrics where metric = 'DistributedFilesToInsert'; -insert into dist_01460 select * from system.one; -select sleep(1) format Null; -- monitor_sleep_time_ms -select value from system.metrics where metric = 'DistributedFilesToInsert'; - -select 'STOP/START DISTRIBUTED SENDS'; -system stop distributed sends dist_01460; -insert into dist_01460 select * from system.one; -select sleep(1) format Null; -- monitor_sleep_time_ms -select value from system.metrics where metric = 'DistributedFilesToInsert'; -system start distributed sends dist_01460; -select sleep(1) format Null; -- monitor_sleep_time_ms -select value from system.metrics where metric = 'DistributedFilesToInsert'; - -select 'FLUSH DISTRIBUTED'; -system stop distributed sends dist_01460; -insert into dist_01460 select * from system.one; -select sleep(1) format Null; -- monitor_sleep_time_ms -select value from system.metrics where metric = 'DistributedFilesToInsert'; -system flush distributed dist_01460; -select value from system.metrics where metric = 'DistributedFilesToInsert'; - -select 'DROP TABLE'; -system stop distributed sends dist_01460; -insert into dist_01460 select * from system.one; -select sleep(1) format Null; -- monitor_sleep_time_ms -select value from system.metrics where metric = 'DistributedFilesToInsert'; -drop table dist_01460; -select value from system.metrics where metric = 'DistributedFilesToInsert'; - -drop table data_01460; diff --git a/tests/queries/0_stateless/01472_obfuscator_uuid.sh b/tests/queries/0_stateless/01472_obfuscator_uuid.sh index 6654dcaad71..eae9c1e3081 100755 --- a/tests/queries/0_stateless/01472_obfuscator_uuid.sh +++ b/tests/queries/0_stateless/01472_obfuscator_uuid.sh @@ -4,9 +4,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS t_uuid" $CLICKHOUSE_CLIENT --query="CREATE TABLE t_uuid(Id UUID) ENGINE=MergeTree ORDER BY (Id)" $CLICKHOUSE_CLIENT --query="INSERT INTO t_uuid VALUES ('3f5ffba3-19ff-4f3d-8861-60ae6e1fc1aa'),('4bd62524-e33c-43e5-882d-f1d96cf5561e'),('7a8b45d2-c18b-4e8c-89eb-abf5bee88931'),('45bb7333-965b-4526-870e-4f941edb025b'),('a4e72d0e-f9fa-465e-8d9d-151b9ced94df'),('cb5818ab-83b5-48a8-94b0-5177e30176d9'),('701e8006-fc9f-4496-80ba-efa6817b917b'),('e0936acf-6e8f-42aa-8f56-d1363476eece'),('239bb790-5293-40df-92ae-472294b6e178'),('508d0e80-729f-4e3b-9336-4c5c8792f6be'),('94abef70-f2d6-4f7b-ad60-3889409f1dac'),('b6f1ec08-8473-4fa2-b134-73db040b0d82'),('7e54dcae-0bb4-4c4f-a636-54a705fb8b40'),('d1d258c2-a35f-4c00-abfa-8addbcbc5471'),('7c74fbd8-bf79-46ee-adfe-96271040a4f7'),('41e3a274-eea9-41d8-a128-de5a6658fcfd'),('a72dc048-f72f-470e-b0f9-60cfad6e1157'),('40634f4f-37bf-44e4-ac7c-6f024ad19990')" -$CLICKHOUSE_CLIENT --query="SELECT Id FROM t_uuid FORMAT TSV" > "${CLICKHOUSE_TMP}"/data.tsv +$CLICKHOUSE_CLIENT --query="SELECT Id FROM t_uuid ORDER BY (Id) FORMAT TSV" > "${CLICKHOUSE_TMP}"/data.tsv echo FROM RAW DATA && cat "${CLICKHOUSE_TMP}"/data.tsv echo TRANSFORMED TO && $CLICKHOUSE_OBFUSCATOR --structure "Id UUID" --input-format TSV --output-format TSV --seed dsrub < "${CLICKHOUSE_TMP}"/data.tsv 2>/dev/null diff --git a/tests/queries/0_stateless/01485_256_bit_multiply.sql b/tests/queries/0_stateless/01485_256_bit_multiply.sql index f3f6cc05058..5c8c47c9127 100644 --- a/tests/queries/0_stateless/01485_256_bit_multiply.sql +++ b/tests/queries/0_stateless/01485_256_bit_multiply.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-settings, no-asan, no-msan, no-tsan, no-ubsan, no-debug + select count() from ( select toInt128(number) * number x, toInt256(number) * number y from numbers_mt(100000000) where x != y diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql index e3bc8cf6e72..3ce1c3aa131 100644 --- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql +++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql @@ -4,7 +4,7 @@ SET allow_asynchronous_read_from_io_pool_for_merge_tree = 0; SET do_not_merge_across_partitions_select_final = 1; SET max_threads = 16; -CREATE TABLE select_final (t DateTime, x Int32, string String) ENGINE = ReplacingMergeTree() PARTITION BY toYYYYMM(t) ORDER BY (x, t); +CREATE TABLE select_final (t DateTime, x Int32, string String) ENGINE = ReplacingMergeTree() PARTITION BY toYYYYMM(t) ORDER BY (x, t) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO select_final SELECT toDate('2000-01-01'), number, '' FROM numbers(2); INSERT INTO select_final SELECT toDate('2000-01-01'), number + 1, '' FROM numbers(2); diff --git a/tests/queries/0_stateless/01550_create_map_type.sql b/tests/queries/0_stateless/01550_create_map_type.sql index 26bbf3c7dde..92362f5596b 100644 --- a/tests/queries/0_stateless/01550_create_map_type.sql +++ b/tests/queries/0_stateless/01550_create_map_type.sql @@ -9,14 +9,14 @@ drop table if exists table_map; drop table if exists table_map; -create table table_map (a Map(String, UInt64)) engine = MergeTree() order by a; +create table table_map (a Map(String, UInt64)) engine = MergeTree() order by a SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into table_map select map('key1', number, 'key2', number * 2) from numbers(1111, 3); select a['key1'], a['key2'] from table_map; drop table if exists table_map; -- MergeTree Engine drop table if exists table_map; -create table table_map (a Map(String, String), b String) engine = MergeTree() order by a; +create table table_map (a Map(String, String), b String) engine = MergeTree() order by a SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into table_map values ({'name':'zhangsan', 'gender':'male'}, 'name'), ({'name':'lisi', 'gender':'female'}, 'gender'); select a[b] from table_map; select b from table_map where a = map('name','lisi', 'gender', 'female'); @@ -24,21 +24,21 @@ drop table if exists table_map; -- Big Integer type -create table table_map (d DATE, m Map(Int8, UInt256)) ENGINE = MergeTree() order by d; +create table table_map (d DATE, m Map(Int8, UInt256)) ENGINE = MergeTree() order by d SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into table_map values ('2020-01-01', map(1, 0, 2, 1)); select * from table_map; drop table table_map; -- Integer type -create table table_map (d DATE, m Map(Int8, Int8)) ENGINE = MergeTree() order by d; +create table table_map (d DATE, m Map(Int8, Int8)) ENGINE = MergeTree() order by d SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into table_map values ('2020-01-01', map(1, 0, 2, -1)); select * from table_map; drop table table_map; -- Unsigned Int type drop table if exists table_map; -create table table_map(a Map(UInt8, UInt64), b UInt8) Engine = MergeTree() order by b; +create table table_map(a Map(UInt8, UInt64), b UInt8) Engine = MergeTree() order by b SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into table_map select map(number, number+5), number from numbers(1111,4); select a[b] from table_map; drop table if exists table_map; @@ -46,7 +46,7 @@ drop table if exists table_map; -- Array Type drop table if exists table_map; -create table table_map(a Map(String, Array(UInt8))) Engine = MergeTree() order by a; +create table table_map(a Map(String, Array(UInt8))) Engine = MergeTree() order by a SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into table_map values(map('k1', [1,2,3], 'k2', [4,5,6])), (map('k0', [], 'k1', [100,20,90])); insert into table_map select map('k1', [number, number + 2, number * 2]) from numbers(6); insert into table_map select map('k2', [number, number + 2, number * 2]) from numbers(6); @@ -56,7 +56,7 @@ drop table if exists table_map; SELECT CAST(([1, 2, 3], ['1', '2', 'foo']), 'Map(UInt8, String)') AS map, map[1]; CREATE TABLE table_map (n UInt32, m Map(String, Int)) -ENGINE = MergeTree ORDER BY n SETTINGS min_bytes_for_wide_part = 0; +ENGINE = MergeTree ORDER BY n SETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192, index_granularity_bytes = '10Mi'; -- coversion from Tuple(Array(K), Array(V)) INSERT INTO table_map SELECT number, (arrayMap(x -> toString(x), range(number % 10 + 2)), range(number % 10 + 2)) FROM numbers(100000); @@ -67,7 +67,7 @@ SELECT sum(m['1']), sum(m['7']), sum(m['100']) FROM table_map; DROP TABLE IF EXISTS table_map; CREATE TABLE table_map (n UInt32, m Map(String, Int)) -ENGINE = MergeTree ORDER BY n; +ENGINE = MergeTree ORDER BY n SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; -- coversion from Tuple(Array(K), Array(V)) INSERT INTO table_map SELECT number, (arrayMap(x -> toString(x), range(number % 10 + 2)), range(number % 10 + 2)) FROM numbers(100000); diff --git a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql index 61083c3ae14..472e042a18b 100644 --- a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql +++ b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check -- force data path with the user/pass in it set use_compact_format_in_distributed_parts_names=0; diff --git a/tests/queries/0_stateless/01576_alter_low_cardinality_and_select.sh b/tests/queries/0_stateless/01576_alter_low_cardinality_and_select.sh index 4a9b4beee5b..fcea7f57cd3 100755 --- a/tests/queries/0_stateless/01576_alter_low_cardinality_and_select.sh +++ b/tests/queries/0_stateless/01576_alter_low_cardinality_and_select.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-upgrade-check CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference index 8939ea1111d..ce9c6f4589e 100644 --- a/tests/queries/0_stateless/01591_window_functions.reference +++ b/tests/queries/0_stateless/01591_window_functions.reference @@ -1193,10 +1193,10 @@ select count() over () from numbers(4) where number < 2; 2 -- floating point RANGE frame select - count(*) over (order by toFloat32(number) range 5. preceding), - count(*) over (order by toFloat64(number) range 5. preceding), - count(*) over (order by toFloat32(number) range between current row and 5. following), - count(*) over (order by toFloat64(number) range between current row and 5. following) + count(*) over (order by toFloat32(number) range 5 preceding), + count(*) over (order by toFloat64(number) range 5 preceding), + count(*) over (order by toFloat32(number) range between current row and 5 following), + count(*) over (order by toFloat64(number) range between current row and 5 following) from numbers(7) ; 1 1 6 6 diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql index 3c9c1f9cea7..07e323b3c40 100644 --- a/tests/queries/0_stateless/01591_window_functions.sql +++ b/tests/queries/0_stateless/01591_window_functions.sql @@ -474,10 +474,10 @@ select count() over () from numbers(4) where number < 2; -- floating point RANGE frame select - count(*) over (order by toFloat32(number) range 5. preceding), - count(*) over (order by toFloat64(number) range 5. preceding), - count(*) over (order by toFloat32(number) range between current row and 5. following), - count(*) over (order by toFloat64(number) range between current row and 5. following) + count(*) over (order by toFloat32(number) range 5 preceding), + count(*) over (order by toFloat64(number) range 5 preceding), + count(*) over (order by toFloat32(number) range between current row and 5 following), + count(*) over (order by toFloat64(number) range between current row and 5 following) from numbers(7) ; diff --git a/tests/queries/0_stateless/01592_long_window_functions1.sql b/tests/queries/0_stateless/01592_long_window_functions1.sql index 4911b7aa792..c63c651fb0b 100644 --- a/tests/queries/0_stateless/01592_long_window_functions1.sql +++ b/tests/queries/0_stateless/01592_long_window_functions1.sql @@ -7,7 +7,7 @@ set max_insert_threads = 4; create table stack(item_id Int64, brand_id Int64, rack_id Int64, dt DateTime, expiration_dt DateTime, quantity UInt64) Engine = MergeTree partition by toYYYYMM(dt) -order by (brand_id, toStartOfHour(dt)); +order by (brand_id, toStartOfHour(dt)) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into stack select number%99991, number%11, number%1111, toDateTime('2020-01-01 00:00:00')+number/100, diff --git a/tests/queries/0_stateless/01593_insert_settings.sql b/tests/queries/0_stateless/01593_insert_settings.sql index 88a58b2152e..6493ecf649b 100644 --- a/tests/queries/0_stateless/01593_insert_settings.sql +++ b/tests/queries/0_stateless/01593_insert_settings.sql @@ -3,6 +3,8 @@ create table data_01593 (key Int) engine=MergeTree() order by key partition by k insert into data_01593 select * from numbers_mt(10); insert into data_01593 select * from numbers_mt(10) settings max_partitions_per_insert_block=1; -- { serverError TOO_MANY_PARTS } +-- throw_on_max_partitions_per_insert_block=false means we'll just log that the limit was reached rather than throw +insert into data_01593 select * from numbers_mt(10) settings max_partitions_per_insert_block=1, throw_on_max_partitions_per_insert_block=false; -- settings for INSERT is prefered insert into data_01593 settings max_partitions_per_insert_block=100 select * from numbers_mt(10) settings max_partitions_per_insert_block=1; diff --git a/tests/queries/0_stateless/01600_detach_permanently.sh b/tests/queries/0_stateless/01600_detach_permanently.sh index c32a255448e..4b270265cdc 100755 --- a/tests/queries/0_stateless/01600_detach_permanently.sh +++ b/tests/queries/0_stateless/01600_detach_permanently.sh @@ -108,7 +108,7 @@ clickhouse_local "INSERT INTO db_ordinary.src SELECT * FROM numbers(10)" clickhouse_local "SELECT if(count() = 10, 'MV is working', 'MV failed') FROM db_ordinary.src_mv_with_inner" clickhouse_local "DETACH VIEW db_ordinary.src_mv_with_inner PERMANENTLY; INSERT INTO db_ordinary.src SELECT * FROM numbers(10)" --stacktrace -clickhouse_local "SELECT if(count() = 10, 'MV can be detached permanently', 'MV detach failed') FROM db_ordinary.src_mv_with_inner" 2>&1 | grep -c "db_ordinary.src_mv_with_inner doesn't exist" +clickhouse_local "SELECT if(count() = 10, 'MV can be detached permanently', 'MV detach failed') FROM db_ordinary.src_mv_with_inner" 2>&1 | grep -c "db_ordinary.src_mv_with_inner does not exist" ## Quite silly: ATTACH MATERIALIZED VIEW don't work with short syntax (w/o select), but i can attach it using ATTACH TABLE ... clickhouse_local "ATTACH TABLE db_ordinary.src_mv_with_inner" clickhouse_local "INSERT INTO db_ordinary.src SELECT * FROM numbers(10)" diff --git a/tests/queries/0_stateless/01600_parts_types_metrics_long.reference b/tests/queries/0_stateless/01600_parts_types_metrics_long.reference index 98fb6a68656..e8183f05f5d 100644 --- a/tests/queries/0_stateless/01600_parts_types_metrics_long.reference +++ b/tests/queries/0_stateless/01600_parts_types_metrics_long.reference @@ -1,4 +1,3 @@ 1 1 1 -1 diff --git a/tests/queries/0_stateless/01600_parts_types_metrics_long.sh b/tests/queries/0_stateless/01600_parts_types_metrics_long.sh index 5f724e81042..f4a17152f84 100755 --- a/tests/queries/0_stateless/01600_parts_types_metrics_long.sh +++ b/tests/queries/0_stateless/01600_parts_types_metrics_long.sh @@ -10,14 +10,14 @@ set -o pipefail # NOTE: database = $CLICKHOUSE_DATABASE is unwanted verify_sql="SELECT - (SELECT sumIf(value, metric = 'PartsInMemory'), sumIf(value, metric = 'PartsCompact'), sumIf(value, metric = 'PartsWide') FROM system.metrics) = - (SELECT countIf(part_type == 'InMemory'), countIf(part_type == 'Compact'), countIf(part_type == 'Wide') - FROM (SELECT part_type FROM system.parts UNION ALL SELECT part_type FROM system.projection_parts))" + (SELECT sumIf(value, metric = 'PartsCompact'), sumIf(value, metric = 'PartsWide') FROM system.metrics) = + (SELECT countIf(part_type = 'Compact'), countIf(part_type = 'Wide') + FROM (SELECT part_type FROM system.parts UNION ALL SELECT part_type FROM system.projection_parts))" # The query is not atomic - it can compare states between system.parts and system.metrics from different points in time. # So, there is inherent race condition (especially in fasttest that runs tests in parallel). # -# But it should get expected result eventually. +# But it should get the expected result eventually. # In case of test failure, this code will do infinite loop and timeout. verify() { @@ -32,21 +32,16 @@ verify() } $CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE IF EXISTS data_01600" -# InMemory - [0..5] # Compact - (5..10] # Wide - >10 -$CLICKHOUSE_CLIENT --query="CREATE TABLE data_01600 (part_type String, key Int) ENGINE = MergeTree PARTITION BY part_type ORDER BY key SETTINGS min_bytes_for_wide_part=0, min_rows_for_wide_part=10" - -# InMemory -$CLICKHOUSE_CLIENT --query="INSERT INTO data_01600 SELECT 'InMemory', number FROM system.numbers LIMIT 1" -verify +$CLICKHOUSE_CLIENT --query="CREATE TABLE data_01600 (part_type String, key Int) ENGINE = MergeTree PARTITION BY part_type ORDER BY key SETTINGS min_bytes_for_wide_part=0, min_rows_for_wide_part=10, index_granularity = 8192, index_granularity_bytes = '10Mi'" # Compact -$CLICKHOUSE_CLIENT --query="INSERT INTO data_01600 SELECT 'Compact', number FROM system.numbers LIMIT 6 OFFSET 1" +$CLICKHOUSE_CLIENT --query="INSERT INTO data_01600 SELECT 'Compact', number FROM system.numbers LIMIT 6" verify # Wide -$CLICKHOUSE_CLIENT --query="INSERT INTO data_01600 SELECT 'Wide', number FROM system.numbers LIMIT 11 OFFSET 7" +$CLICKHOUSE_CLIENT --query="INSERT INTO data_01600 SELECT 'Wide', number FROM system.numbers LIMIT 11 OFFSET 6" verify # DROP and check diff --git a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql index 0c8f80f2b63..1cf52c0288b 100644 --- a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql +++ b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql @@ -1,9 +1,11 @@ -- Tags: no-tsan -- Tag no-tsan: Too long for TSan +set enable_filesystem_cache=0; +set enable_filesystem_cache_on_write_operations=0; drop table if exists t; -create table t (x UInt64, s String) engine = MergeTree order by x; +create table t (x UInt64, s String) engine = MergeTree order by x SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO t SELECT number, if(number < (8129 * 1024), arrayStringConcat(arrayMap(x -> toString(x), range(number % 128)), ' '), '') diff --git a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql index 4623c456475..f12a61055c4 100644 --- a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql +++ b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql @@ -1,4 +1,5 @@ --- Tags: no-random-merge-tree-settings +-- Tags: no-random-merge-tree-settings, no-tsan, no-debug +-- no-tsan: too slow SET use_uncompressed_cache = 0; SET allow_prefetched_read_pool_for_remote_filesystem=0; diff --git a/tests/queries/0_stateless/01606_merge_from_wide_to_compact.sql b/tests/queries/0_stateless/01606_merge_from_wide_to_compact.sql index 0f2fbcaa76d..de3b79eec76 100644 --- a/tests/queries/0_stateless/01606_merge_from_wide_to_compact.sql +++ b/tests/queries/0_stateless/01606_merge_from_wide_to_compact.sql @@ -5,7 +5,8 @@ CREATE TABLE wide_to_comp (a Int, b Int, c Int) settings vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 1, min_bytes_for_wide_part = 0, - min_rows_for_wide_part = 0; + min_rows_for_wide_part = 0, + index_granularity = 8192, index_granularity_bytes = '10Mi'; SYSTEM STOP merges wide_to_comp; diff --git a/tests/queries/0_stateless/01650_fetch_patition_with_macro_in_zk_path_long.sql b/tests/queries/0_stateless/01650_fetch_patition_with_macro_in_zk_path_long.sql index b45a1974611..f4afcb8d55e 100644 --- a/tests/queries/0_stateless/01650_fetch_patition_with_macro_in_zk_path_long.sql +++ b/tests/queries/0_stateless/01650_fetch_patition_with_macro_in_zk_path_long.sql @@ -1,4 +1,4 @@ --- Tags: long, no-upgrade-check +-- Tags: long DROP TABLE IF EXISTS test_01640; DROP TABLE IF EXISTS restore_01640; diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference index be42a656c66..54ca55d2068 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations.reference +++ b/tests/queries/0_stateless/01655_plan_optimizations.reference @@ -168,19 +168,23 @@ Filter 3 > one condition of filter is pushed down before LEFT JOIN Join +Join Filter column: notEquals(number, 1) Join > (analyzer) one condition of filter is pushed down before LEFT JOIN Join +Join Filter column: notEquals(number_0, 1_UInt8) 0 0 3 3 > one condition of filter is pushed down before INNER JOIN Join +Join Filter column: notEquals(number, 1) Join > (analyzer) one condition of filter is pushed down before INNER JOIN Join +Join Filter column: notEquals(number_0, 1_UInt8) 3 3 > filter is pushed down before UNION diff --git a/tests/queries/0_stateless/01683_text_log_deadlock.reference b/tests/queries/0_stateless/01683_text_log_deadlock.reference index 76de47c4a46..4cf61460252 100644 --- a/tests/queries/0_stateless/01683_text_log_deadlock.reference +++ b/tests/queries/0_stateless/01683_text_log_deadlock.reference @@ -1 +1 @@ -queries 25000 +queries: 25000 diff --git a/tests/queries/0_stateless/01683_text_log_deadlock.sh b/tests/queries/0_stateless/01683_text_log_deadlock.sh index debb7a0017f..af7f348a6a2 100755 --- a/tests/queries/0_stateless/01683_text_log_deadlock.sh +++ b/tests/queries/0_stateless/01683_text_log_deadlock.sh @@ -5,4 +5,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_BENCHMARK --secure -i 25000 -c 32 --query 'SELECT 1' 2>&1 | grep -oF 'queries 25000' +$CLICKHOUSE_BENCHMARK --secure -i 25000 -c 32 --query 'SELECT 1' 2>&1 | grep -oF 'queries: 25000' diff --git a/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.reference b/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.reference index 682652152dc..de5a62159ef 100644 --- a/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.reference +++ b/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.reference @@ -13,6 +13,7 @@ SELECT covarPop(1, 1), covarSamp(1, 1), currentDatabase(), + currentDatabase(), dateDiff('DAY', toDate('2020-10-24'), toDate('2019-10-24')), exp(1), arrayFlatten([[1]]), diff --git a/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.sql b/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.sql index 9b35087182c..dda2e045e76 100644 --- a/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.sql +++ b/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.sql @@ -1 +1 @@ -EXPLAIN SYNTAX SELECT CAST(1 AS INT), CEIL(1), CEILING(1), CHAR(49), CHAR_LENGTH('1'), CHARACTER_LENGTH('1'), COALESCE(1), CONCAT('1', '1'), CORR(1, 1), COS(1), COUNT(1), COVAR_POP(1, 1), COVAR_SAMP(1, 1), DATABASE(), DATEDIFF('DAY', toDate('2020-10-24'), toDate('2019-10-24')), EXP(1), FLATTEN([[1]]), FLOOR(1), FQDN(), GREATEST(1), IF(1, 1, 1), IFNULL(1, 1), LCASE('A'), LEAST(1), LENGTH('1'), LN(1), LOCATE('1', '1'), LOG(1), LOG10(1), LOG2(1), LOWER('A'), MAX(1), MID('123', 1, 1), MIN(1), MOD(1, 1), NOT(1), NOW(), NOW64(), NULLIF(1, 1), PI(), POSITION('123', '2'), POW(1, 1), POWER(1, 1), RAND(), REPLACE('1', '1', '2'), REVERSE('123'), ROUND(1), SIN(1), SQRT(1), STDDEV_POP(1), STDDEV_SAMP(1), SUBSTR('123', 2), SUBSTRING('123', 2), SUM(1), TAN(1), TANH(1), TRUNC(1), TRUNCATE(1), UCASE('A'), UPPER('A'), USER(), VAR_POP(1), VAR_SAMP(1), WEEK(toDate('2020-10-24')), YEARWEEK(toDate('2020-10-24')) format TSVRaw; +EXPLAIN SYNTAX SELECT CAST(1 AS INT), CEIL(1), CEILING(1), CHAR(49), CHAR_LENGTH('1'), CHARACTER_LENGTH('1'), COALESCE(1), CONCAT('1', '1'), CORR(1, 1), COS(1), COUNT(1), COVAR_POP(1, 1), COVAR_SAMP(1, 1), DATABASE(), SCHEMA(), DATEDIFF('DAY', toDate('2020-10-24'), toDate('2019-10-24')), EXP(1), FLATTEN([[1]]), FLOOR(1), FQDN(), GREATEST(1), IF(1, 1, 1), IFNULL(1, 1), LCASE('A'), LEAST(1), LENGTH('1'), LN(1), LOCATE('1', '1'), LOG(1), LOG10(1), LOG2(1), LOWER('A'), MAX(1), MID('123', 1, 1), MIN(1), MOD(1, 1), NOT(1), NOW(), NOW64(), NULLIF(1, 1), PI(), POSITION('123', '2'), POW(1, 1), POWER(1, 1), RAND(), REPLACE('1', '1', '2'), REVERSE('123'), ROUND(1), SIN(1), SQRT(1), STDDEV_POP(1), STDDEV_SAMP(1), SUBSTR('123', 2), SUBSTRING('123', 2), SUM(1), TAN(1), TANH(1), TRUNC(1), TRUNCATE(1), UCASE('A'), UPPER('A'), USER(), VAR_POP(1), VAR_SAMP(1), WEEK(toDate('2020-10-24')), YEARWEEK(toDate('2020-10-24')) format TSVRaw; diff --git a/tests/queries/0_stateless/01710_normal_projection_format.reference b/tests/queries/0_stateless/01710_normal_projection_format.reference new file mode 100644 index 00000000000..318c7253724 --- /dev/null +++ b/tests/queries/0_stateless/01710_normal_projection_format.reference @@ -0,0 +1 @@ +CREATE TABLE default.test\n(\n `uuid` FixedString(16),\n `id` Int32,\n `ns` FixedString(16),\n `dt` DateTime64(6),\n PROJECTION mtlog_proj_source_reference\n (\n SELECT *\n ORDER BY substring(ns, 1, 5)\n )\n)\nENGINE = MergeTree\nORDER BY (id, dt, uuid)\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01710_normal_projection_format.sql b/tests/queries/0_stateless/01710_normal_projection_format.sql new file mode 100644 index 00000000000..6210be166b3 --- /dev/null +++ b/tests/queries/0_stateless/01710_normal_projection_format.sql @@ -0,0 +1,17 @@ +DROP TABLE if exists test; + +CREATE TABLE test +( + uuid FixedString(16), + id int, + ns FixedString(16), + dt DateTime64(6), +) +ENGINE = MergeTree +ORDER BY (id, dt, uuid); + +ALTER TABLE test ADD PROJECTION mtlog_proj_source_reference (SELECT * ORDER BY substring(ns, 1, 5)); + +SHOW CREATE test; + +drop table test; diff --git a/tests/queries/0_stateless/01739_index_hint.reference b/tests/queries/0_stateless/01739_index_hint.reference index 766dff8c7b0..21673bf698b 100644 --- a/tests/queries/0_stateless/01739_index_hint.reference +++ b/tests/queries/0_stateless/01739_index_hint.reference @@ -23,12 +23,12 @@ select * from tbl WHERE indexHint(p in (select toInt64(number) - 2 from numbers( 0 3 0 drop table tbl; drop table if exists XXXX; -create table XXXX (t Int64, f Float64) Engine=MergeTree order by t settings index_granularity=128; +create table XXXX (t Int64, f Float64) Engine=MergeTree order by t settings index_granularity=128, index_granularity_bytes = '10Mi'; insert into XXXX select number*60, 0 from numbers(100000); SELECT sum(t) FROM XXXX WHERE indexHint(t = 42); 487680 drop table if exists XXXX; -create table XXXX (t Int64, f Float64) Engine=MergeTree order by t settings index_granularity=8192; +create table XXXX (t Int64, f Float64) Engine=MergeTree order by t settings index_granularity=8192, index_granularity_bytes = '10Mi'; insert into XXXX select number*60, 0 from numbers(100000); SELECT count() FROM XXXX WHERE indexHint(t = toDateTime(0)) SETTINGS optimize_use_implicit_projections = 1; 100000 diff --git a/tests/queries/0_stateless/01739_index_hint.sql b/tests/queries/0_stateless/01739_index_hint.sql index 77c2760535d..cde46a5a2bf 100644 --- a/tests/queries/0_stateless/01739_index_hint.sql +++ b/tests/queries/0_stateless/01739_index_hint.sql @@ -18,7 +18,7 @@ drop table tbl; drop table if exists XXXX; -create table XXXX (t Int64, f Float64) Engine=MergeTree order by t settings index_granularity=128; +create table XXXX (t Int64, f Float64) Engine=MergeTree order by t settings index_granularity=128, index_granularity_bytes = '10Mi'; insert into XXXX select number*60, 0 from numbers(100000); @@ -26,7 +26,7 @@ SELECT sum(t) FROM XXXX WHERE indexHint(t = 42); drop table if exists XXXX; -create table XXXX (t Int64, f Float64) Engine=MergeTree order by t settings index_granularity=8192; +create table XXXX (t Int64, f Float64) Engine=MergeTree order by t settings index_granularity=8192, index_granularity_bytes = '10Mi'; insert into XXXX select number*60, 0 from numbers(100000); diff --git a/tests/queries/0_stateless/01746_test_for_tupleElement_must_be_constant_issue.sql b/tests/queries/0_stateless/01746_test_for_tupleElement_must_be_constant_issue.sql index 72ba6a036df..585640665d1 100644 --- a/tests/queries/0_stateless/01746_test_for_tupleElement_must_be_constant_issue.sql +++ b/tests/queries/0_stateless/01746_test_for_tupleElement_must_be_constant_issue.sql @@ -1,5 +1,5 @@ DROP TABLE IF EXISTS ttt01746; -CREATE TABLE ttt01746 (d Date, n UInt64) ENGINE = MergeTree() PARTITION BY toMonday(d) ORDER BY n; +CREATE TABLE ttt01746 (d Date, n UInt64) ENGINE = MergeTree() PARTITION BY toMonday(d) ORDER BY n SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO ttt01746 SELECT toDate('2021-02-14') + (number % 30) AS d, number AS n FROM numbers(1500000); set optimize_move_to_prewhere=0; SELECT arraySort(x -> x.2, [tuple('a', 10)]) AS X FROM ttt01746 WHERE d >= toDate('2021-03-03') - 2 ORDER BY n LIMIT 1; diff --git a/tests/queries/0_stateless/01747_system_session_log_long.sh b/tests/queries/0_stateless/01747_system_session_log_long.sh index c6e93f4abd7..6bd58facb51 100755 --- a/tests/queries/0_stateless/01747_system_session_log_long.sh +++ b/tests/queries/0_stateless/01747_system_session_log_long.sh @@ -33,8 +33,10 @@ set -eu # Since there is no way to cleanup system.session_log table, # make sure that we can identify log entries from this test by a random user name. -readonly BASE_USERNAME="session_log_test_user_$(cat /dev/urandom | tr -cd 'a-f0-9' | head -c 32)" -readonly TMP_QUERY_FILE=$(mktemp /tmp/tmp_query.log.XXXXXX) +BASE_USERNAME="session_log_test_user_$(tr -cd 'a-f0-9' < /dev/urandom | head -c 32)" +readonly BASE_USERNAME +TMP_QUERY_FILE=$(mktemp /tmp/tmp_query.log.XXXXXX) +readonly TMP_QUERY_FILE declare -a ALL_USERNAMES ALL_USERNAMES+=("${BASE_USERNAME}") @@ -80,7 +82,7 @@ trap "cleanup" EXIT function executeQueryExpectError() { cat - > "${TMP_QUERY_FILE}" - ! ${CLICKHOUSE_CLIENT} --multiquery --queries-file "${TMP_QUERY_FILE}" "${@}" 2>&1 | tee -a ${TMP_QUERY_FILE} + ! ${CLICKHOUSE_CLIENT} --multiquery --queries-file "${TMP_QUERY_FILE}" "${@}" 2>&1 | tee -a "${TMP_QUERY_FILE}" } function createUser() @@ -95,7 +97,8 @@ function createUser() elif [[ "${auth_type}" == "plaintext_password" ]] then - password="${password}" + # password="${password}" + : elif [[ "${auth_type}" == "sha256_password" ]] then @@ -198,7 +201,7 @@ function testHTTPNamedSession() { echo "HTTP endpoint with named session" local HTTP_SESSION_ID - HTTP_SESSION_ID="session_id_$(cat /dev/urandom | tr -cd 'a-f0-9' | head -c 32)" + HTTP_SESSION_ID="session_id_$(tr -cd 'a-f0-9' < /dev/urandom | head -c 32)" if [ -v CLICKHOUSE_URL_PARAMS ] then CLICKHOUSE_URL_WITH_SESSION_ID="${CLICKHOUSE_URL}&session_id=${HTTP_SESSION_ID}" @@ -343,7 +346,8 @@ SET DEFAULT ROLE session_log_test_role, session_log_test_role2 TO ${username}; } # to cut off previous runs -readonly start_time="$(executeQuery <<< 'SELECT now64(6);')" +start_time="$(executeQuery <<< 'SELECT now64(6);')" +readonly start_time # Special case: user and profile are both defined in XML runEndpointTests "User with profile from XML" "no_password" "session_log_test_xml_user" '' diff --git a/tests/queries/0_stateless/01754_cluster_all_replicas_shard_num.reference b/tests/queries/0_stateless/01754_cluster_all_replicas_shard_num.reference index d308efd8662..8c39c3bf41a 100644 --- a/tests/queries/0_stateless/01754_cluster_all_replicas_shard_num.reference +++ b/tests/queries/0_stateless/01754_cluster_all_replicas_shard_num.reference @@ -1,9 +1,18 @@ 1 1 1 +1 +1 +2 +1 +2 +1 2 1 2 1 1 +1 +2 +1 2 diff --git a/tests/queries/0_stateless/01754_cluster_all_replicas_shard_num.sql b/tests/queries/0_stateless/01754_cluster_all_replicas_shard_num.sql index 59e0ca60f8f..ea88e304c42 100644 --- a/tests/queries/0_stateless/01754_cluster_all_replicas_shard_num.sql +++ b/tests/queries/0_stateless/01754_cluster_all_replicas_shard_num.sql @@ -1,10 +1,16 @@ -- Tags: replica, shard SELECT _shard_num FROM cluster('test_shard_localhost', system.one); +SELECT _shard_num FROM cluster('test_shard_localhost'); SELECT _shard_num FROM clusterAllReplicas('test_shard_localhost', system.one); +SELECT _shard_num FROM clusterAllReplicas('test_shard_localhost'); SELECT _shard_num FROM cluster('test_cluster_two_shards', system.one) ORDER BY _shard_num; +SELECT _shard_num FROM cluster('test_cluster_two_shards') ORDER BY _shard_num; SELECT _shard_num FROM clusterAllReplicas('test_cluster_two_shards', system.one) ORDER BY _shard_num; +SELECT _shard_num FROM clusterAllReplicas('test_cluster_two_shards') ORDER BY _shard_num; SELECT _shard_num FROM cluster('test_cluster_one_shard_two_replicas', system.one) ORDER BY _shard_num; +SELECT _shard_num FROM cluster('test_cluster_one_shard_two_replicas') ORDER BY _shard_num; SELECT _shard_num FROM clusterAllReplicas('test_cluster_one_shard_two_replicas', system.one) ORDER BY _shard_num; +SELECT _shard_num FROM clusterAllReplicas('test_cluster_one_shard_two_replicas') ORDER BY _shard_num; diff --git a/tests/queries/0_stateless/01763_filter_push_down_bugs.sql b/tests/queries/0_stateless/01763_filter_push_down_bugs.sql index 8470b4a3379..367baef142b 100644 --- a/tests/queries/0_stateless/01763_filter_push_down_bugs.sql +++ b/tests/queries/0_stateless/01763_filter_push_down_bugs.sql @@ -9,6 +9,7 @@ CREATE TABLE Test ENGINE = MergeTree() PRIMARY KEY (String1,String2) ORDER BY (String1,String2) +SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi' AS SELECT 'String1_' || toString(number) as String1, @@ -39,15 +40,15 @@ DROP TABLE IF EXISTS Test; select x, y from (select [0, 1, 2] as y, 1 as a, 2 as b) array join y as x where a = 1 and b = 2 and (x = 1 or x != 1) and x = 1; DROP TABLE IF EXISTS t; -create table t(a UInt8) engine=MergeTree order by a; +create table t(a UInt8) engine=MergeTree order by a SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into t select * from numbers(2); select a from t t1 join t t2 on t1.a = t2.a where t1.a; DROP TABLE IF EXISTS t; DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; -CREATE TABLE t1 (id Int64, create_time DateTime) ENGINE = MergeTree ORDER BY id; -CREATE TABLE t2 (delete_time DateTime) ENGINE = MergeTree ORDER BY delete_time; +CREATE TABLE t1 (id Int64, create_time DateTime) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +CREATE TABLE t2 (delete_time DateTime) ENGINE = MergeTree ORDER BY delete_time SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into t1 values (101, '2023-05-28 00:00:00'), (102, '2023-05-28 00:00:00'); insert into t2 values ('2023-05-31 00:00:00'); diff --git a/tests/queries/0_stateless/01771_bloom_filter_not_has.sql b/tests/queries/0_stateless/01771_bloom_filter_not_has.sql index f945cbde56b..00b71d6feeb 100644 --- a/tests/queries/0_stateless/01771_bloom_filter_not_has.sql +++ b/tests/queries/0_stateless/01771_bloom_filter_not_has.sql @@ -1,6 +1,6 @@ -- Tags: no-parallel, long DROP TABLE IF EXISTS bloom_filter_null_array; -CREATE TABLE bloom_filter_null_array (v Array(Int32), INDEX idx v TYPE bloom_filter GRANULARITY 3) ENGINE = MergeTree() ORDER BY v; +CREATE TABLE bloom_filter_null_array (v Array(Int32), INDEX idx v TYPE bloom_filter GRANULARITY 3) ENGINE = MergeTree() ORDER BY v SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO bloom_filter_null_array SELECT [number] FROM numbers(10000000); SELECT COUNT() FROM bloom_filter_null_array; SELECT COUNT() FROM bloom_filter_null_array WHERE has(v, 0); diff --git a/tests/queries/0_stateless/01780_column_sparse_alter.sql b/tests/queries/0_stateless/01780_column_sparse_alter.sql index bc2f6f7c91f..f33573e503a 100644 --- a/tests/queries/0_stateless/01780_column_sparse_alter.sql +++ b/tests/queries/0_stateless/01780_column_sparse_alter.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check SET mutations_sync = 2; diff --git a/tests/queries/0_stateless/01780_column_sparse_filter.sql b/tests/queries/0_stateless/01780_column_sparse_filter.sql index 45958b5c4e0..f52beba50b0 100644 --- a/tests/queries/0_stateless/01780_column_sparse_filter.sql +++ b/tests/queries/0_stateless/01780_column_sparse_filter.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS t_sparse; CREATE TABLE t_sparse (id UInt64, u UInt64, s String) ENGINE = MergeTree ORDER BY id -SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9; +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9, index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO t_sparse SELECT number, diff --git a/tests/queries/0_stateless/01787_map_remote.reference b/tests/queries/0_stateless/01787_map_remote.reference index 1c488d4418e..c7828769f9f 100644 --- a/tests/queries/0_stateless/01787_map_remote.reference +++ b/tests/queries/0_stateless/01787_map_remote.reference @@ -1,2 +1,4 @@ {'a':1,'b':2} {'a':1,'b':2} +{'a':1,'b':2} +{'a':1,'b':2} diff --git a/tests/queries/0_stateless/01787_map_remote.sql b/tests/queries/0_stateless/01787_map_remote.sql index 748316c8044..217308e5141 100644 --- a/tests/queries/0_stateless/01787_map_remote.sql +++ b/tests/queries/0_stateless/01787_map_remote.sql @@ -1 +1,2 @@ SELECT map('a', 1, 'b', 2) FROM remote('127.0.0.{1,2}', system, one); +SELECT map('a', 1, 'b', 2) FROM remote('127.0.0.{1,2}'); diff --git a/tests/queries/0_stateless/01825_type_json_sparse.sql b/tests/queries/0_stateless/01825_type_json_sparse.sql index 343013cb3da..cc7c66382a3 100644 --- a/tests/queries/0_stateless/01825_type_json_sparse.sql +++ b/tests/queries/0_stateless/01825_type_json_sparse.sql @@ -7,7 +7,7 @@ SET allow_experimental_object_type = 1; CREATE TABLE t_json_sparse (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS ratio_of_defaults_for_sparse_serialization = 0.1, -min_bytes_for_wide_part = 0; +min_bytes_for_wide_part = 0, index_granularity = 8192, index_granularity_bytes = '10Mi'; SYSTEM STOP MERGES t_json_sparse; diff --git a/tests/queries/0_stateless/01848_http_insert_segfault.sh b/tests/queries/0_stateless/01848_http_insert_segfault.sh index 1f2e9eebcdc..6397068a77a 100755 --- a/tests/queries/0_stateless/01848_http_insert_segfault.sh +++ b/tests/queries/0_stateless/01848_http_insert_segfault.sh @@ -6,4 +6,4 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_LOCAL} --query "select col1, initializeAggregation('argMaxState', col2, insertTime) as col2, now() as insertTime FROM generateRandom('col1 String, col2 Array(Float64)') LIMIT 1000000 FORMAT CSV" | ${CLICKHOUSE_CURL} -s 'http://localhost:8123/?query=INSERT%20INTO%20non_existing_table%20SELECT%20col1%2C%20initializeAggregation(%27argMaxState%27%2C%20col2%2C%20insertTime)%20as%20col2%2C%20now()%20as%20insertTime%20FROM%20input(%27col1%20String%2C%20col2%20Array(Float64)%27)%20FORMAT%20CSV' --data-binary @- | grep -q "Table default.non_existing_table doesn't exist" && echo 'Ok.' || echo 'FAIL' ||: +${CLICKHOUSE_LOCAL} --query "select col1, initializeAggregation('argMaxState', col2, insertTime) as col2, now() as insertTime FROM generateRandom('col1 String, col2 Array(Float64)') LIMIT 1000000 FORMAT CSV" | ${CLICKHOUSE_CURL} -s 'http://localhost:8123/?query=INSERT%20INTO%20non_existing_table%20SELECT%20col1%2C%20initializeAggregation(%27argMaxState%27%2C%20col2%2C%20insertTime)%20as%20col2%2C%20now()%20as%20insertTime%20FROM%20input(%27col1%20String%2C%20col2%20Array(Float64)%27)%20FORMAT%20CSV' --data-binary @- | grep -q "Table default.non_existing_table does not exist" && echo 'Ok.' || echo 'FAIL' ||: diff --git a/tests/queries/0_stateless/01861_explain_pipeline.sql b/tests/queries/0_stateless/01861_explain_pipeline.sql index aafecf57af1..93c82b6e265 100644 --- a/tests/queries/0_stateless/01861_explain_pipeline.sql +++ b/tests/queries/0_stateless/01861_explain_pipeline.sql @@ -1,5 +1,5 @@ DROP TABLE IF EXISTS test; -CREATE TABLE test(a Int, b Int) Engine=ReplacingMergeTree order by a; +CREATE TABLE test(a Int, b Int) Engine=ReplacingMergeTree order by a SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO test select number, number from numbers(5); INSERT INTO test select number, number from numbers(5,2); set max_threads =1; diff --git a/tests/queries/0_stateless/01880_remote_ipv6.sql b/tests/queries/0_stateless/01880_remote_ipv6.sql index 057b3ad7ec6..7f15449e556 100644 --- a/tests/queries/0_stateless/01880_remote_ipv6.sql +++ b/tests/queries/0_stateless/01880_remote_ipv6.sql @@ -10,3 +10,14 @@ SELECT * FROM remote('::1', system.one) FORMAT Null; -- { serverError 36 } SELECT * FROM remote('[::1][::1]', system.one) FORMAT Null; -- { serverError 36 } SELECT * FROM remote('[::1][::1', system.one) FORMAT Null; -- { serverError 36 } SELECT * FROM remote('[::1]::1]', system.one) FORMAT Null; -- { serverError 36 } + +SELECT * FROM remote('[::1]') FORMAT Null; +SELECT * FROM remote('[::1]:9000') FORMAT Null; + +SELECT * FROM remote('[::1') FORMAT Null; -- { serverError 36 } +SELECT * FROM remote('::1]') FORMAT Null; -- { serverError 36 } +SELECT * FROM remote('::1') FORMAT Null; -- { serverError 36 } + +SELECT * FROM remote('[::1][::1]') FORMAT Null; -- { serverError 36 } +SELECT * FROM remote('[::1][::1') FORMAT Null; -- { serverError 36 } +SELECT * FROM remote('[::1]::1]') FORMAT Null; -- { serverError 36 } diff --git a/tests/queries/0_stateless/01906_lc_in_bug.sql b/tests/queries/0_stateless/01906_lc_in_bug.sql index 581053e14e1..035e1fa155f 100644 --- a/tests/queries/0_stateless/01906_lc_in_bug.sql +++ b/tests/queries/0_stateless/01906_lc_in_bug.sql @@ -8,6 +8,6 @@ select count() as c, x in ('a', 'bb') as g from tab group by g order by c; drop table if exists tab; -- https://github.com/ClickHouse/ClickHouse/issues/44503 -CREATE TABLE test(key Int32) ENGINE = MergeTree ORDER BY (key); +CREATE TABLE test(key Int32) ENGINE = MergeTree ORDER BY (key) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into test select intDiv(number,100) from numbers(10000000); SELECT COUNT() FROM test WHERE key <= 100000 AND (NOT (toLowCardinality('') IN (SELECT ''))); diff --git a/tests/queries/0_stateless/01913_quantile_deterministic.sh b/tests/queries/0_stateless/01913_quantile_deterministic.sh index 5a2c7279678..a9c57a61c33 100755 --- a/tests/queries/0_stateless/01913_quantile_deterministic.sh +++ b/tests/queries/0_stateless/01913_quantile_deterministic.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS d" -${CLICKHOUSE_CLIENT} --query "CREATE TABLE d (oid UInt64) ENGINE = MergeTree ORDER BY oid" +${CLICKHOUSE_CLIENT} --query "CREATE TABLE d (oid UInt64) ENGINE = MergeTree ORDER BY oid SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" ${CLICKHOUSE_CLIENT} --min_insert_block_size_rows 0 --min_insert_block_size_bytes 0 --max_block_size 8192 --query "insert into d select * from numbers(1000000)" # In previous ClickHouse versions there was a mistake that makes quantileDeterministic functions not really deterministic (in edge cases). diff --git a/tests/queries/0_stateless/01921_test_progress_bar.py b/tests/queries/0_stateless/01921_test_progress_bar.py index 89eecbc3987..54c7ae59894 100755 --- a/tests/queries/0_stateless/01921_test_progress_bar.py +++ b/tests/queries/0_stateless/01921_test_progress_bar.py @@ -17,3 +17,4 @@ with client(name="client1>", log=log) as client1: client1.send("SELECT number FROM numbers(1000) FORMAT Null") client1.expect("Progress: 1\.00 thousand rows, 8\.00 KB .*" + end_of_block) client1.expect("0 rows in set. Elapsed: [\\w]{1}\.[\\w]{3} sec.") + client1.expect("Peak memory usage: .*B" + end_of_block) diff --git a/tests/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.reference b/tests/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.reference deleted file mode 100644 index 9e388b62601..00000000000 --- a/tests/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.reference +++ /dev/null @@ -1,3 +0,0 @@ -Loaded 1 queries. -I/O error: Too many open files -70 diff --git a/tests/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.sh b/tests/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.sh deleted file mode 100755 index 7740ffcce7b..00000000000 --- a/tests/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-ubsan -# Tag no-ubsan: Limits RLIMIT_NOFILE, see comment in the test - -# shellcheck disable=SC2086 - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -# NOTE: Tests with limit for number of opened files cannot be run under UBsan. -# -# UBsan needs to create pipe each time it need to check the type: -# -# pipe() -# __sanitizer::IsAccessibleMemoryRange(unsigned long, unsigned long) -# __ubsan::checkDynamicType(void*, void*, unsigned long) + 271 -# HandleDynamicTypeCacheMiss(__ubsan::DynamicTypeCacheMissData*, unsigned long, unsigned long, __ubsan::ReportOptions) + 34 -# __ubsan_handle_dynamic_type_cache_miss_abort + 58 -# -# Obviously it will fail if RLIMIT_NOFILE exceeded (like in this test), and the UBsan will falsely report [1]: -# -# 01955_clickhouse_benchmark_connection_hang: [ FAIL ] 1.56 sec. - result differs with reference: -# --- /usr/share/clickhouse-test/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.reference 2021-07-21 11:14:58.000000000 +0300 -# +++ /tmp/clickhouse-test/0_stateless/01955_clickhouse_benchmark_connection_hang.stdout 2021-07-21 11:53:45.684050372 +0300 -# @@ -1,3 +1,22 @@ -# Loaded 1 queries. -# -I/O error: Too many open files -# -70 -# +../contrib/libcxx/include/memory:3212:19: runtime error: member call on address 0x00002939d5c0 which does not point to an object of type 'std::__1::__shared_weak_count' -# +0x00002939d5c0: note: object has invalid vptr -# + -# +==558==WARNING: Can't create a socket pair to start external symbolizer (errno: 24) -# +==558==WARNING: Can't create a socket pair to start external symbolizer (errno: 24) -# +==558==WARNING: Can't create a socket pair to start external symbolizer (errno: 24) -# +==558==WARNING: Can't create a socket pair to start external symbolizer (errno: 24) -# +==558==WARNING: Can't create a socket pair to start external symbolizer (errno: 24) -# +==558==WARNING: Failed to use and restart external symbolizer! -# + #0 0xfe86b57 (/usr/bin/clickhouse+0xfe86b57) -# + #1 0xfe83fd7 (/usr/bin/clickhouse+0xfe83fd7) -# + #2 0xfe89af4 (/usr/bin/clickhouse+0xfe89af4) -# + #3 0xfe81fa9 (/usr/bin/clickhouse+0xfe81fa9) -# + #4 0x1f377609 (/usr/bin/clickhouse+0x1f377609) -# + #5 0xfe7e2a1 (/usr/bin/clickhouse+0xfe7e2a1) -# + #6 0xfce1003 (/usr/bin/clickhouse+0xfce1003) -# + #7 0x7f3345bd30b2 (/lib/x86_64-linux-gnu/libc.so.6+0x270b2) -# + #8 0xfcbf0ed (/usr/bin/clickhouse+0xfcbf0ed) -# + -# +SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior ../contrib/libcxx/include/memory:3212:19 in -# +1 -# -# Stacktrace from lldb: -# -# thread #1, name = 'clickhouse-benc', stop reason = Dynamic type mismatch -# * frame #0: 0x000000000fffc070 clickhouse`__ubsan_on_report -# frame #1: 0x000000000fff6511 clickhouse`__ubsan::Diag::~Diag() + 209 -# frame #2: 0x000000000fffcb11 clickhouse`HandleDynamicTypeCacheMiss(__ubsan::DynamicTypeCacheMissData*, unsigned long, unsigned long, __ubsan::ReportOptions) + 609 -# frame #3: 0x000000000fffcf2a clickhouse`__ubsan_handle_dynamic_type_cache_miss_abort + 58 -# frame #4: 0x00000000101a33f8 clickhouse`std::__1::shared_ptr::PoolEntryHelper>::~shared_ptr(this=) + 152 at memory:3212 -# frame #5: 0x00000000101a267a clickhouse`PoolBase::Entry::~Entry(this=) + 26 at PoolBase.h:67 -# frame #6: 0x00000000101a0878 clickhouse`DB::ConnectionPool::get(this=, timeouts=0x00007fffffffc278, settings=, force_connected=true) + 664 at ConnectionPool.h:93 -# frame #7: 0x00000000101a6395 clickhouse`DB::Benchmark::runBenchmark(this=) + 981 at Benchmark.cpp:309 -# frame #8: 0x000000001019e84a clickhouse`DB::Benchmark::main(this=0x00007fffffffd8c8, (null)=) + 586 at Benchmark.cpp:128 -# frame #9: 0x000000001f5d028a clickhouse`Poco::Util::Application::run(this=0x00007fffffffd8c8) + 42 at Application.cpp:334 -# frame #10: 0x000000001019ab42 clickhouse`mainEntryClickHouseBenchmark(argc=, argv=) + 6978 at Benchmark.cpp:655 -# frame #11: 0x000000000fffdfc4 clickhouse`main(argc_=, argv_=) + 356 at main.cpp:366 -# frame #12: 0x00007ffff7de6d0a libc.so.6`__libc_start_main(main=(clickhouse`main at main.cpp:339), argc=7, argv=0x00007fffffffe1e8, init=, fini=, rtld_fini=, stack_end=0x00007fffffffe1d8) + 234 at libc-start.c:308 -# frame #13: 0x000000000ffdc0aa clickhouse`_start + 42 - -# Limit number of files to 50, and we will get EMFILE for some of socket() -prlimit --nofile=50 $CLICKHOUSE_BENCHMARK --iterations 1 --concurrency 50 --query 'select 1' 2>&1 -echo $? diff --git a/tests/queries/0_stateless/02000_table_function_cluster_macros.reference b/tests/queries/0_stateless/02000_table_function_cluster_macros.reference index 6ed281c757a..98fb6a68656 100644 --- a/tests/queries/0_stateless/02000_table_function_cluster_macros.reference +++ b/tests/queries/0_stateless/02000_table_function_cluster_macros.reference @@ -1,2 +1,4 @@ 1 1 +1 +1 diff --git a/tests/queries/0_stateless/02000_table_function_cluster_macros.sql b/tests/queries/0_stateless/02000_table_function_cluster_macros.sql index f1bc1358b55..d133f5fdc2c 100644 --- a/tests/queries/0_stateless/02000_table_function_cluster_macros.sql +++ b/tests/queries/0_stateless/02000_table_function_cluster_macros.sql @@ -1,2 +1,4 @@ SELECT _shard_num FROM cluster("{default_cluster_macro}", system.one); +SELECT _shard_num FROM cluster("{default_cluster_macro}"); SELECT _shard_num FROM clusterAllReplicas("{default_cluster_macro}", system.one); +SELECT _shard_num FROM clusterAllReplicas("{default_cluster_macro}"); diff --git a/tests/queries/0_stateless/02012_get_server_port.sql b/tests/queries/0_stateless/02012_get_server_port.sql index cc7fecb0bf0..2cf2014cfcc 100644 --- a/tests/queries/0_stateless/02012_get_server_port.sql +++ b/tests/queries/0_stateless/02012_get_server_port.sql @@ -1,3 +1,3 @@ select getServerPort('tcp_port'); -select getServerPort('unknown'); -- { serverError 170 } +select getServerPort('unknown'); -- { serverError CLUSTER_DOESNT_EXIST } diff --git a/tests/queries/0_stateless/02022_storage_filelog_one_file.sh b/tests/queries/0_stateless/02022_storage_filelog_one_file.sh index 3abf5c52031..32ce1643d4e 100755 --- a/tests/queries/0_stateless/02022_storage_filelog_one_file.sh +++ b/tests/queries/0_stateless/02022_storage_filelog_one_file.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-upgrade-check set -eu diff --git a/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh b/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh index e0f0114d030..0219a0421cb 100755 --- a/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh +++ b/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-upgrade-check set -eu diff --git a/tests/queries/0_stateless/02067_lost_part_s3.sql b/tests/queries/0_stateless/02067_lost_part_s3.sql index 7df15ab33c4..6fbde71ff98 100644 --- a/tests/queries/0_stateless/02067_lost_part_s3.sql +++ b/tests/queries/0_stateless/02067_lost_part_s3.sql @@ -1,4 +1,4 @@ --- Tags: no-upgrade-check, no-fasttest +-- Tags: no-fasttest DROP TABLE IF EXISTS partslost_0; DROP TABLE IF EXISTS partslost_1; @@ -6,15 +6,18 @@ DROP TABLE IF EXISTS partslost_2; CREATE TABLE partslost_0 (x String) ENGINE=ReplicatedMergeTree('/clickhouse/table/{database}_02067_lost/partslost', '0') ORDER BY tuple() SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, old_parts_lifetime = 1, - cleanup_delay_period = 1, cleanup_delay_period_random_add = 1, cleanup_thread_preferred_points_per_iteration=0; + cleanup_delay_period = 1, cleanup_delay_period_random_add = 1, cleanup_thread_preferred_points_per_iteration=0, + index_granularity = 8192, index_granularity_bytes = '10Mi'; CREATE TABLE partslost_1 (x String) ENGINE=ReplicatedMergeTree('/clickhouse/table/{database}_02067_lost/partslost', '1') ORDER BY tuple() SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, old_parts_lifetime = 1, - cleanup_delay_period = 1, cleanup_delay_period_random_add = 1, cleanup_thread_preferred_points_per_iteration=0; + cleanup_delay_period = 1, cleanup_delay_period_random_add = 1, cleanup_thread_preferred_points_per_iteration=0, + index_granularity = 8192, index_granularity_bytes = '10Mi'; CREATE TABLE partslost_2 (x String) ENGINE=ReplicatedMergeTree('/clickhouse/table/{database}_02067_lost/partslost', '2') ORDER BY tuple() SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, old_parts_lifetime = 1, - cleanup_delay_period = 1, cleanup_delay_period_random_add = 1, cleanup_thread_preferred_points_per_iteration=0; + cleanup_delay_period = 1, cleanup_delay_period_random_add = 1, cleanup_thread_preferred_points_per_iteration=0, + index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO partslost_0 SELECT toString(number) AS x from system.numbers LIMIT 10000; diff --git a/tests/queries/0_stateless/02102_row_binary_with_names_and_types.sh b/tests/queries/0_stateless/02102_row_binary_with_names_and_types.sh index ba7aac94ddb..48f27a4d1f3 100755 --- a/tests/queries/0_stateless/02102_row_binary_with_names_and_types.sh +++ b/tests/queries/0_stateless/02102_row_binary_with_names_and_types.sh @@ -55,7 +55,7 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02102" $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02102" -$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, [[1, 2, 3], [4, 5], []] as a FORMAT RowBinaryWithNames" | $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_with_names_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNames" 2>&1 | grep -F -c "CANNOT_SKIP_UNKNOWN_FIELD" +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, [[1, 2, 3], [4, 5], []] as a FORMAT RowBinaryWithNames" 2>&1 | $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_with_names_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNames" 2>&1 | grep -F -c "CANNOT_SKIP_UNKNOWN_FIELD" $CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, [[1, 2, 3], [4, 5], []] as a FORMAT RowBinaryWithNamesAndTypes" | $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" @@ -63,8 +63,8 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02102" $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02102" -$CLICKHOUSE_CLIENT -q "SELECT 'text' AS x, toDate('2020-01-01') AS y, toUInt32(1) AS z FORMAT RowBinaryWithNamesAndTypes" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" 2>&1 | grep -F -c "INCORRECT_DATA" +$CLICKHOUSE_CLIENT -q "SELECT 'text' AS x, toDate('2020-01-01') AS y, toUInt32(1) AS z FORMAT RowBinaryWithNamesAndTypes" 2>&1 | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" 2>&1 | grep -F -c "INCORRECT_DATA" -$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, 'text' as z, toDate('2020-01-01') AS y FORMAT RowBinaryWithNamesAndTypes" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" 2>&1 | grep -F -c "INCORRECT_DATA" +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, 'text' as z, toDate('2020-01-01') AS y FORMAT RowBinaryWithNamesAndTypes" 2>&1 | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" 2>&1 | grep -F -c "INCORRECT_DATA" $CLICKHOUSE_CLIENT -q "DROP TABLE test_02102" diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index acff6e0efb2..67fbc894ed3 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -50,7 +50,8 @@ CREATE TABLE system.clusters `estimated_recovery_time` UInt32, `database_shard_name` String, `database_replica_name` String, - `is_active` Nullable(UInt8) + `is_active` Nullable(UInt8), + `name` String ) ENGINE = SystemClusters COMMENT 'SYSTEM TABLE is built on the fly.' @@ -298,7 +299,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM LISTEN' = 140, 'SYSTEM' = 141, 'dictGet' = 142, 'displaySecretsInShowAndSelect' = 143, 'addressToLine' = 144, 'addressToLineWithInlines' = 145, 'addressToSymbol' = 146, 'demangle' = 147, 'INTROSPECTION' = 148, 'FILE' = 149, 'URL' = 150, 'REMOTE' = 151, 'MONGO' = 152, 'REDIS' = 153, 'MEILISEARCH' = 154, 'MYSQL' = 155, 'POSTGRES' = 156, 'SQLITE' = 157, 'ODBC' = 158, 'JDBC' = 159, 'HDFS' = 160, 'S3' = 161, 'HIVE' = 162, 'AZURE' = 163, 'SOURCES' = 164, 'CLUSTER' = 165, 'ALL' = 166, 'NONE' = 167), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM PULLING REPLICATION LOG' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 136, 'SYSTEM FLUSH' = 137, 'SYSTEM THREAD FUZZER' = 138, 'SYSTEM UNFREEZE' = 139, 'SYSTEM FAILPOINT' = 140, 'SYSTEM LISTEN' = 141, 'SYSTEM' = 142, 'dictGet' = 143, 'displaySecretsInShowAndSelect' = 144, 'addressToLine' = 145, 'addressToLineWithInlines' = 146, 'addressToSymbol' = 147, 'demangle' = 148, 'INTROSPECTION' = 149, 'FILE' = 150, 'URL' = 151, 'REMOTE' = 152, 'MONGO' = 153, 'REDIS' = 154, 'MEILISEARCH' = 155, 'MYSQL' = 156, 'POSTGRES' = 157, 'SQLITE' = 158, 'ODBC' = 159, 'JDBC' = 160, 'HDFS' = 161, 'S3' = 162, 'HIVE' = 163, 'AZURE' = 164, 'SOURCES' = 165, 'CLUSTER' = 166, 'ALL' = 167, 'NONE' = 168), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -364,6 +365,7 @@ CREATE TABLE system.merges `source_part_paths` Array(String), `result_part_path` String, `partition_id` String, + `partition` String, `is_mutation` UInt8, `total_size_bytes_compressed` UInt64, `total_size_bytes_uncompressed` UInt64, @@ -586,10 +588,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM LISTEN' = 140, 'SYSTEM' = 141, 'dictGet' = 142, 'displaySecretsInShowAndSelect' = 143, 'addressToLine' = 144, 'addressToLineWithInlines' = 145, 'addressToSymbol' = 146, 'demangle' = 147, 'INTROSPECTION' = 148, 'FILE' = 149, 'URL' = 150, 'REMOTE' = 151, 'MONGO' = 152, 'REDIS' = 153, 'MEILISEARCH' = 154, 'MYSQL' = 155, 'POSTGRES' = 156, 'SQLITE' = 157, 'ODBC' = 158, 'JDBC' = 159, 'HDFS' = 160, 'S3' = 161, 'HIVE' = 162, 'AZURE' = 163, 'SOURCES' = 164, 'CLUSTER' = 165, 'ALL' = 166, 'NONE' = 167), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM PULLING REPLICATION LOG' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 136, 'SYSTEM FLUSH' = 137, 'SYSTEM THREAD FUZZER' = 138, 'SYSTEM UNFREEZE' = 139, 'SYSTEM FAILPOINT' = 140, 'SYSTEM LISTEN' = 141, 'SYSTEM' = 142, 'dictGet' = 143, 'displaySecretsInShowAndSelect' = 144, 'addressToLine' = 145, 'addressToLineWithInlines' = 146, 'addressToSymbol' = 147, 'demangle' = 148, 'INTROSPECTION' = 149, 'FILE' = 150, 'URL' = 151, 'REMOTE' = 152, 'MONGO' = 153, 'REDIS' = 154, 'MEILISEARCH' = 155, 'MYSQL' = 156, 'POSTGRES' = 157, 'SQLITE' = 158, 'ODBC' = 159, 'JDBC' = 160, 'HDFS' = 161, 'S3' = 162, 'HIVE' = 163, 'AZURE' = 164, 'SOURCES' = 165, 'CLUSTER' = 166, 'ALL' = 167, 'NONE' = 168), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5, 'NAMED_COLLECTION' = 6)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM LISTEN' = 140, 'SYSTEM' = 141, 'dictGet' = 142, 'displaySecretsInShowAndSelect' = 143, 'addressToLine' = 144, 'addressToLineWithInlines' = 145, 'addressToSymbol' = 146, 'demangle' = 147, 'INTROSPECTION' = 148, 'FILE' = 149, 'URL' = 150, 'REMOTE' = 151, 'MONGO' = 152, 'REDIS' = 153, 'MEILISEARCH' = 154, 'MYSQL' = 155, 'POSTGRES' = 156, 'SQLITE' = 157, 'ODBC' = 158, 'JDBC' = 159, 'HDFS' = 160, 'S3' = 161, 'HIVE' = 162, 'AZURE' = 163, 'SOURCES' = 164, 'CLUSTER' = 165, 'ALL' = 166, 'NONE' = 167)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM PULLING REPLICATION LOG' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 136, 'SYSTEM FLUSH' = 137, 'SYSTEM THREAD FUZZER' = 138, 'SYSTEM UNFREEZE' = 139, 'SYSTEM FAILPOINT' = 140, 'SYSTEM LISTEN' = 141, 'SYSTEM' = 142, 'dictGet' = 143, 'displaySecretsInShowAndSelect' = 144, 'addressToLine' = 145, 'addressToLineWithInlines' = 146, 'addressToSymbol' = 147, 'demangle' = 148, 'INTROSPECTION' = 149, 'FILE' = 150, 'URL' = 151, 'REMOTE' = 152, 'MONGO' = 153, 'REDIS' = 154, 'MEILISEARCH' = 155, 'MYSQL' = 156, 'POSTGRES' = 157, 'SQLITE' = 158, 'ODBC' = 159, 'JDBC' = 160, 'HDFS' = 161, 'S3' = 162, 'HIVE' = 163, 'AZURE' = 164, 'SOURCES' = 165, 'CLUSTER' = 166, 'ALL' = 167, 'NONE' = 168)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' @@ -960,6 +962,7 @@ CREATE TABLE system.role_grants `user_name` Nullable(String), `role_name` Nullable(String), `granted_role_name` String, + `granted_role_id` UUID, `granted_role_is_default` UInt8, `with_admin_option` UInt8 ) diff --git a/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.sql b/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.sql index 5e662bd7842..0834b76d4ec 100644 --- a/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.sql +++ b/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.sql @@ -5,7 +5,7 @@ SET read_in_order_two_level_merge_threshold=100; DROP TABLE IF EXISTS t_read_in_order; CREATE TABLE t_read_in_order(date Date, i UInt64, v UInt64) -ENGINE = MergeTree ORDER BY (date, i); +ENGINE = MergeTree ORDER BY (date, i) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO t_read_in_order SELECT '2020-10-10', number % 10, number FROM numbers(100000); INSERT INTO t_read_in_order SELECT '2020-10-11', number % 10, number FROM numbers(100000); @@ -55,7 +55,7 @@ SELECT a, b FROM t_read_in_order WHERE a = 1 ORDER BY b DESC SETTINGS read_in_or DROP TABLE t_read_in_order; CREATE TABLE t_read_in_order(dt DateTime, d Decimal64(5), v UInt64) -ENGINE = MergeTree ORDER BY (toStartOfDay(dt), d); +ENGINE = MergeTree ORDER BY (toStartOfDay(dt), d) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO t_read_in_order SELECT toDateTime('2020-10-10 00:00:00') + number, 1 / (number % 100 + 1), number FROM numbers(1000); diff --git a/tests/queries/0_stateless/02150_index_hypothesis_race_long.sh b/tests/queries/0_stateless/02150_index_hypothesis_race_long.sh index da2dcd055ea..114f60cc393 100755 --- a/tests/queries/0_stateless/02150_index_hypothesis_race_long.sh +++ b/tests/queries/0_stateless/02150_index_hypothesis_race_long.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_index_hypothesis" -$CLICKHOUSE_CLIENT -q "CREATE TABLE t_index_hypothesis (a UInt32, b UInt32, INDEX t a != b TYPE hypothesis GRANULARITY 1) ENGINE = MergeTree ORDER BY a" +$CLICKHOUSE_CLIENT -q "CREATE TABLE t_index_hypothesis (a UInt32, b UInt32, INDEX t a != b TYPE hypothesis GRANULARITY 1) ENGINE = MergeTree ORDER BY a SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" $CLICKHOUSE_CLIENT -q "INSERT INTO t_index_hypothesis SELECT number, number + 1 FROM numbers(10000000)" diff --git a/tests/queries/0_stateless/02151_hash_table_sizes_stats.sh b/tests/queries/0_stateless/02151_hash_table_sizes_stats.sh index fd6e44577d9..bf79e5f769d 100755 --- a/tests/queries/0_stateless/02151_hash_table_sizes_stats.sh +++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats.sh @@ -17,9 +17,9 @@ prepare_table() { table_name="t_hash_table_sizes_stats_$RANDOM$RANDOM" $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS $table_name;" if [ -z "$1" ]; then - $CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY tuple();" + $CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';" else - $CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY $1;" + $CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY $1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';" fi $CLICKHOUSE_CLIENT -q "SYSTEM STOP MERGES $table_name;" for ((i = 1; i <= max_threads; i++)); do diff --git a/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh b/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh index 703b2c4357c..77b9b2942c5 100755 --- a/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh +++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, distributed, no-tsan +# Tags: long, distributed, no-tsan, no-debug # These tests don't use `current_database = currentDatabase()` condition, because database name isn't propagated during remote queries. @@ -19,9 +19,9 @@ prepare_table() { table_name="t_hash_table_sizes_stats_$RANDOM$RANDOM" $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS $table_name;" if [ -z "$1" ]; then - $CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY tuple();" + $CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';" else - $CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY $1;" + $CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY $1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';" fi $CLICKHOUSE_CLIENT -q "SYSTEM STOP MERGES $table_name;" for ((i = 1; i <= max_threads; i++)); do diff --git a/tests/queries/0_stateless/02151_lc_prefetch.sql b/tests/queries/0_stateless/02151_lc_prefetch.sql index 83d8d23264e..c2b97231145 100644 --- a/tests/queries/0_stateless/02151_lc_prefetch.sql +++ b/tests/queries/0_stateless/02151_lc_prefetch.sql @@ -1,6 +1,6 @@ -- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug drop table if exists tab_lc; -CREATE TABLE tab_lc (x UInt64, y LowCardinality(String)) engine = MergeTree order by x; +CREATE TABLE tab_lc (x UInt64, y LowCardinality(String)) engine = MergeTree order by x SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into tab_lc select number, toString(number % 10) from numbers(20000000); optimize table tab_lc; select count() from tab_lc where y == '0' settings local_filesystem_read_prefetch=1; diff --git a/tests/queries/0_stateless/02174_cte_scalar_cache.sql b/tests/queries/0_stateless/02174_cte_scalar_cache.sql index 50a10834e64..86cfff21446 100644 --- a/tests/queries/0_stateless/02174_cte_scalar_cache.sql +++ b/tests/queries/0_stateless/02174_cte_scalar_cache.sql @@ -6,7 +6,7 @@ WITH ( SELECT sleep(0.0001) FROM system.one ) as a3, ( SELECT sleep(0.0001) FROM system.one ) as a4, ( SELECT sleep(0.0001) FROM system.one ) as a5 -SELECT '02177_CTE_GLOBAL_ON', a5 FROM system.numbers LIMIT 100 +SELECT '02177_CTE_GLOBAL_ON', a1, a2, a3, a4, a5 FROM system.numbers LIMIT 100 FORMAT Null SETTINGS enable_global_with_statement = 1; @@ -16,17 +16,17 @@ WITH ( SELECT sleep(0.0001) FROM system.one ) as a3, ( SELECT sleep(0.0001) FROM system.one ) as a4, ( SELECT sleep(0.0001) FROM system.one ) as a5 -SELECT '02177_CTE_GLOBAL_OFF', a5 FROM system.numbers LIMIT 100 +SELECT '02177_CTE_GLOBAL_OFF', a1, a2, a3, a4, a5 FROM system.numbers LIMIT 100 FORMAT Null SETTINGS enable_global_with_statement = 0; WITH - ( SELECT sleep(0.0001) FROM system.one ), - ( SELECT sleep(0.0001) FROM system.one ), - ( SELECT sleep(0.0001) FROM system.one ), - ( SELECT sleep(0.0001) FROM system.one ), + ( SELECT sleep(0.0001) FROM system.one ) as a1, + ( SELECT sleep(0.0001) FROM system.one ) as a2, + ( SELECT sleep(0.0001) FROM system.one ) as a3, + ( SELECT sleep(0.0001) FROM system.one ) as a4, ( SELECT sleep(0.0001) FROM system.one ) as a5 -SELECT '02177_CTE_NEW_ANALYZER', a5 FROM system.numbers LIMIT 100 +SELECT '02177_CTE_NEW_ANALYZER', a1, a2, a3, a4, a5 FROM system.numbers LIMIT 100 FORMAT Null SETTINGS allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/02177_issue_31009.sql b/tests/queries/0_stateless/02177_issue_31009.sql index 280627954d9..f25df59f4b4 100644 --- a/tests/queries/0_stateless/02177_issue_31009.sql +++ b/tests/queries/0_stateless/02177_issue_31009.sql @@ -5,8 +5,8 @@ SET max_threads=0; DROP TABLE IF EXISTS left; DROP TABLE IF EXISTS right; -CREATE TABLE left ( key UInt32, value String ) ENGINE = MergeTree ORDER BY key; -CREATE TABLE right ( key UInt32, value String ) ENGINE = MergeTree ORDER BY tuple(); +CREATE TABLE left ( key UInt32, value String ) ENGINE = MergeTree ORDER BY key SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +CREATE TABLE right ( key UInt32, value String ) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO left SELECT number, toString(number) FROM numbers(25367182); INSERT INTO right SELECT number, toString(number) FROM numbers(23124707); diff --git a/tests/queries/0_stateless/02206_information_schema_show_database.reference b/tests/queries/0_stateless/02206_information_schema_show_database.reference index 821fddbb933..0cf7913e28e 100644 --- a/tests/queries/0_stateless/02206_information_schema_show_database.reference +++ b/tests/queries/0_stateless/02206_information_schema_show_database.reference @@ -1,4 +1,6 @@ CREATE DATABASE INFORMATION_SCHEMA\nENGINE = Memory CREATE VIEW INFORMATION_SCHEMA.COLUMNS\n(\n `table_catalog` String,\n `table_schema` String,\n `table_name` String,\n `TABLE_SCHEMA` String,\n `TABLE_NAME` String,\n `column_name` String,\n `ordinal_position` UInt64,\n `column_default` String,\n `is_nullable` String,\n `data_type` String,\n `character_maximum_length` Nullable(UInt64),\n `character_octet_length` Nullable(UInt64),\n `numeric_precision` Nullable(UInt64),\n `numeric_precision_radix` Nullable(UInt64),\n `numeric_scale` Nullable(UInt64),\n `datetime_precision` Nullable(UInt64),\n `character_set_catalog` Nullable(String),\n `character_set_schema` Nullable(String),\n `character_set_name` Nullable(String),\n `collation_catalog` Nullable(String),\n `collation_schema` Nullable(String),\n `collation_name` Nullable(String),\n `domain_catalog` Nullable(String),\n `domain_schema` Nullable(String),\n `domain_name` Nullable(String),\n `column_comment` String,\n `column_type` String,\n `TABLE_CATALOG` String ALIAS table_catalog,\n `COLUMN_NAME` String ALIAS column_name,\n `ORDINAL_POSITION` UInt64 ALIAS ordinal_position,\n `COLUMN_DEFAULT` String ALIAS column_default,\n `IS_NULLABLE` String ALIAS is_nullable,\n `DATA_TYPE` String ALIAS data_type,\n `CHARACTER_MAXIMUM_LENGTH` Nullable(UInt64) ALIAS character_maximum_length,\n `CHARACTER_OCTET_LENGTH` Nullable(UInt64) ALIAS character_octet_length,\n `NUMERIC_PRECISION` Nullable(UInt64) ALIAS numeric_precision,\n `NUMERIC_PRECISION_RADIX` Nullable(UInt64) ALIAS numeric_precision_radix,\n `NUMERIC_SCALE` Nullable(UInt64) ALIAS numeric_scale,\n `DATETIME_PRECISION` Nullable(UInt64) ALIAS datetime_precision,\n `CHARACTER_SET_CATALOG` Nullable(String) ALIAS character_set_catalog,\n `CHARACTER_SET_SCHEMA` Nullable(String) ALIAS character_set_schema,\n `CHARACTER_SET_NAME` Nullable(String) ALIAS character_set_name,\n `COLLATION_CATALOG` Nullable(String) ALIAS collation_catalog,\n `COLLATION_SCHEMA` Nullable(String) ALIAS collation_schema,\n `COLLATION_NAME` Nullable(String) ALIAS collation_name,\n `DOMAIN_CATALOG` Nullable(String) ALIAS domain_catalog,\n `DOMAIN_SCHEMA` Nullable(String) ALIAS domain_schema,\n `DOMAIN_NAME` Nullable(String) ALIAS domain_name,\n `COLUMN_COMMENT` String ALIAS column_comment,\n `COLUMN_TYPE` String ALIAS column_type\n) AS\nSELECT\n database AS table_catalog,\n database AS table_schema,\n database AS TABLE_SCHEMA,\n table AS table_name,\n table AS TABLE_NAME,\n name AS column_name,\n position AS ordinal_position,\n default_expression AS column_default,\n type LIKE \'Nullable(%)\' AS is_nullable,\n type AS data_type,\n character_octet_length AS character_maximum_length,\n character_octet_length,\n numeric_precision,\n numeric_precision_radix,\n numeric_scale,\n datetime_precision,\n NULL AS character_set_catalog,\n NULL AS character_set_schema,\n NULL AS character_set_name,\n NULL AS collation_catalog,\n NULL AS collation_schema,\n NULL AS collation_name,\n NULL AS domain_catalog,\n NULL AS domain_schema,\n NULL AS domain_name,\n comment AS column_comment,\n type AS column_type\nFROM system.columns CREATE VIEW INFORMATION_SCHEMA.TABLES (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5), `TABLE_CATALOG` String ALIAS table_catalog, `TABLE_SCHEMA` String ALIAS table_schema, `TABLE_NAME` String ALIAS table_name, `TABLE_TYPE` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5) ALIAS table_type) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, 4, engine LIKE \'%View\', 2, engine LIKE \'System%\', 5, has_own_data = 0, 3, 1) AS table_type FROM system.tables +CREATE VIEW INFORMATION_SCHEMA.tables (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5), `TABLE_CATALOG` String ALIAS table_catalog, `TABLE_SCHEMA` String ALIAS table_schema, `TABLE_NAME` String ALIAS table_name, `TABLE_TYPE` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5) ALIAS table_type) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, 4, engine LIKE \'%View\', 2, engine LIKE \'System%\', 5, has_own_data = 0, 3, 1) AS table_type FROM system.tables +CREATE VIEW information_schema.TABLES (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5), `TABLE_CATALOG` String ALIAS table_catalog, `TABLE_SCHEMA` String ALIAS table_schema, `TABLE_NAME` String ALIAS table_name, `TABLE_TYPE` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5) ALIAS table_type) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, 4, engine LIKE \'%View\', 2, engine LIKE \'System%\', 5, has_own_data = 0, 3, 1) AS table_type FROM system.tables CREATE VIEW information_schema.tables (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5), `TABLE_CATALOG` String ALIAS table_catalog, `TABLE_SCHEMA` String ALIAS table_schema, `TABLE_NAME` String ALIAS table_name, `TABLE_TYPE` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5) ALIAS table_type) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, 4, engine LIKE \'%View\', 2, engine LIKE \'System%\', 5, has_own_data = 0, 3, 1) AS table_type FROM system.tables diff --git a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh index 26646bd91a0..9cb4fb939e7 100755 --- a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh +++ b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-upgrade-check +# Tags: no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02233_set_enable_with_statement_cte_perf.sql b/tests/queries/0_stateless/02233_set_enable_with_statement_cte_perf.sql index 71321b4dfe4..3b474369c98 100644 --- a/tests/queries/0_stateless/02233_set_enable_with_statement_cte_perf.sql +++ b/tests/queries/0_stateless/02233_set_enable_with_statement_cte_perf.sql @@ -1,8 +1,8 @@ DROP TABLE IF EXISTS ev; DROP TABLE IF EXISTS idx; -CREATE TABLE ev (a Int32, b Int32) Engine=MergeTree() ORDER BY a; -CREATE TABLE idx (a Int32) Engine=MergeTree() ORDER BY a; +CREATE TABLE ev (a Int32, b Int32) Engine=MergeTree() ORDER BY a SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +CREATE TABLE idx (a Int32) Engine=MergeTree() ORDER BY a SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO ev SELECT number, number FROM numbers(10000000); INSERT INTO idx SELECT number * 5 FROM numbers(1000); diff --git a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql index 1de6447172d..dc8fceddc52 100644 --- a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql +++ b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql @@ -12,7 +12,8 @@ CREATE TABLE t_1 ) ENGINE = MergeTree PARTITION BY toYYYYMM(p_time) -ORDER BY order_0; +ORDER BY order_0 +SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; CREATE TABLE t_random_1 ( diff --git a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference index eb6c9305ebd..3976691a7dc 100644 --- a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference +++ b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference @@ -12,7 +12,7 @@ SETTINGS min_bytes_for_wide_part = 10485760, disk = disk( type = cache, max_size = '128Mi', - path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', + path = '${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', enable_bypass_cache_with_threashold = 1, bypass_cache_threashold = 100, delayed_cleanup_interval_ms = 100, diff --git a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql index 8871f8655dd..496f288eb9c 100644 --- a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql +++ b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql @@ -15,7 +15,7 @@ SETTINGS min_bytes_for_wide_part = 10485760, disk = disk( type = cache, max_size = '128Mi', - path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', + path = '${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', enable_bypass_cache_with_threashold = 1, bypass_cache_threashold = 100, delayed_cleanup_interval_ms = 100, diff --git a/tests/queries/0_stateless/02240_filesystem_query_cache.reference b/tests/queries/0_stateless/02240_filesystem_query_cache.reference index 26340c271e1..b06ab436dd8 100644 --- a/tests/queries/0_stateless/02240_filesystem_query_cache.reference +++ b/tests/queries/0_stateless/02240_filesystem_query_cache.reference @@ -14,7 +14,7 @@ SETTINGS min_bytes_for_wide_part = 10485760, disk = disk( type = cache, max_size = '128Mi', - path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', + path = '${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', cache_on_write_operations= 1, enable_filesystem_query_cache_limit = 1, delayed_cleanup_interval_ms = 100, diff --git a/tests/queries/0_stateless/02240_filesystem_query_cache.sql b/tests/queries/0_stateless/02240_filesystem_query_cache.sql index d85b3f543e1..b1a64cb47cf 100644 --- a/tests/queries/0_stateless/02240_filesystem_query_cache.sql +++ b/tests/queries/0_stateless/02240_filesystem_query_cache.sql @@ -17,7 +17,7 @@ SETTINGS min_bytes_for_wide_part = 10485760, disk = disk( type = cache, max_size = '128Mi', - path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', + path = '${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', cache_on_write_operations= 1, enable_filesystem_query_cache_limit = 1, delayed_cleanup_interval_ms = 100, diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference index b3b7d12d219..c6099b96d31 100644 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference @@ -24,8 +24,8 @@ state: DOWNLOADED 8 2014 8 2014 24 84045 -35 168815 -44 252113 +32 167243 +41 250541 5010500 18816 Using storage policy: local_cache @@ -54,7 +54,7 @@ state: DOWNLOADED 8 2014 8 2014 24 84045 -35 168815 -44 252113 +32 167243 +41 250541 5010500 18816 diff --git a/tests/queries/0_stateless/02242_delete_user_race.sh b/tests/queries/0_stateless/02242_delete_user_race.sh index 8f387333c33..2af54276469 100755 --- a/tests/queries/0_stateless/02242_delete_user_race.sh +++ b/tests/queries/0_stateless/02242_delete_user_race.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, no-fasttest, no-parallel, no-upgrade-check +# Tags: race, no-fasttest, no-parallel # Test tries to reproduce a race between threads: # - deletes user diff --git a/tests/queries/0_stateless/02243_drop_user_grant_race.sh b/tests/queries/0_stateless/02243_drop_user_grant_race.sh index 46ad776006e..4dce8e8124c 100755 --- a/tests/queries/0_stateless/02243_drop_user_grant_race.sh +++ b/tests/queries/0_stateless/02243_drop_user_grant_race.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, no-fasttest, no-parallel, no-upgrade-check +# Tags: race, no-fasttest, no-parallel set -e diff --git a/tests/queries/0_stateless/02244_make_datetime.sql b/tests/queries/0_stateless/02244_make_datetime.sql index 9b8f561994b..a3d88d89682 100644 --- a/tests/queries/0_stateless/02244_make_datetime.sql +++ b/tests/queries/0_stateless/02244_make_datetime.sql @@ -20,7 +20,7 @@ select makeDateTime(1984, 1, 41, 0, 0, 0, 'UTC'); select makeDateTime(1984, 1, 1, 25, 0, 0, 'UTC'); select makeDateTime(1984, 1, 1, 0, 70, 0, 'UTC'); select makeDateTime(1984, 1, 1, 0, 0, 70, 'UTC'); -select makeDateTime(1984, 1, 1, 0, 0, 0, 'not a timezone'); -- { serverError 1000 } +select makeDateTime(1984, 1, 1, 0, 0, 0, 'not a timezone'); -- { serverError BAD_ARGUMENTS } select makeDateTime(1984, 1, 1, 0, 0, 0, 'UTC'); select makeDateTime(1983, 2, 29, 0, 0, 0, 'UTC'); diff --git a/tests/queries/0_stateless/02245_make_datetime64.sql b/tests/queries/0_stateless/02245_make_datetime64.sql index 054cbd8956d..a7b3a3d23c5 100644 --- a/tests/queries/0_stateless/02245_make_datetime64.sql +++ b/tests/queries/0_stateless/02245_make_datetime64.sql @@ -38,7 +38,7 @@ select makeDateTime64(1984, 1, 41, 0, 0, 0, 0, 9, 'UTC'); select makeDateTime64(1984, 1, 1, 25, 0, 0, 0, 9, 'UTC'); select makeDateTime64(1984, 1, 1, 0, 70, 0, 0, 9, 'UTC'); select makeDateTime64(1984, 1, 1, 0, 0, 70, 0, 9, 'UTC'); -select makeDateTime64(1984, 1, 1, 0, 0, 0, 0, 9, 'not a timezone'); -- { serverError 1000 } +select makeDateTime64(1984, 1, 1, 0, 0, 0, 0, 9, 'not a timezone'); -- { serverError BAD_ARGUMENTS } select makeDateTime64(1984, 1, 1, 2, 3, 4, 5, 9, 'UTC'); select makeDateTime64(1984, 2, 29, 2, 3, 4, 5, 9, 'UTC'); diff --git a/tests/queries/0_stateless/02250_ON_CLUSTER_grant.reference b/tests/queries/0_stateless/02250_ON_CLUSTER_grant.reference index d43b658d5d4..f78c46e54f4 100644 --- a/tests/queries/0_stateless/02250_ON_CLUSTER_grant.reference +++ b/tests/queries/0_stateless/02250_ON_CLUSTER_grant.reference @@ -1,3 +1,3 @@ with_on_cluster_02250_ON_CLUSTER_grant_default without_on_cluster_02250_ON_CLUSTER_grant_default -Not enough privileges. To execute this query it's necessary to have grant CLUSTER ON *.*. (ACCESS_DENIED) +Not enough privileges. To execute this query, it's necessary to have the grant CLUSTER ON *.*. (ACCESS_DENIED) diff --git a/tests/queries/0_stateless/02250_ON_CLUSTER_grant.sh b/tests/queries/0_stateless/02250_ON_CLUSTER_grant.sh index 6129d0d39c6..66417e9694a 100755 --- a/tests/queries/0_stateless/02250_ON_CLUSTER_grant.sh +++ b/tests/queries/0_stateless/02250_ON_CLUSTER_grant.sh @@ -27,5 +27,5 @@ echo "with_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME" $CLICKHOUSE_CLIENT --user "with_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME" -q "CREATE DATABASE IF NOT EXISTS db_with_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME ON CLUSTER test_shard_localhost" >/dev/null echo "without_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME" $CLICKHOUSE_CLIENT --user "without_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME" -q "CREATE DATABASE IF NOT EXISTS db_without_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME ON CLUSTER test_shard_localhost" |& { - grep -m1 -F -o "Not enough privileges. To execute this query it's necessary to have grant CLUSTER ON *.*. (ACCESS_DENIED)" + grep -m1 -F -o "Not enough privileges. To execute this query, it's necessary to have the grant CLUSTER ON *.*. (ACCESS_DENIED)" } diff --git a/tests/queries/0_stateless/02253_empty_part_checksums.reference b/tests/queries/0_stateless/02253_empty_part_checksums.reference new file mode 100644 index 00000000000..65a8c9ee65e --- /dev/null +++ b/tests/queries/0_stateless/02253_empty_part_checksums.reference @@ -0,0 +1,8 @@ +1 +1000 +0 +0 +0 +1 +0 +0_0_0_0 Wide 370db59d5dcaef5d762b11d319c368c7 514a8be2dac94fd039dbd230065e58a4 b324ada5cd6bb14402c1e59200bd003a diff --git a/tests/queries/0_stateless/02253_empty_part_checksums.sh b/tests/queries/0_stateless/02253_empty_part_checksums.sh new file mode 100755 index 00000000000..5d4e750f46d --- /dev/null +++ b/tests/queries/0_stateless/02253_empty_part_checksums.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# Tags: zookeeper, no-replicated-database +# no-replicated-database because it adds extra replicas + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists rmt sync;" +$CLICKHOUSE_CLIENT -q "CREATE TABLE rmt (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) +ENGINE = ReplicatedMergeTree('/test/02253/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', '1') ORDER BY a PARTITION BY b % 10 +SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0, +cleanup_thread_preferred_points_per_iteration=0, min_bytes_for_wide_part=0, remove_empty_parts=0" + +$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "INSERT INTO rmt SELECT rand(1), 0, 1 / rand(3), toString(rand(4)), [rand(5), rand(6)], rand(7) % 2 ? NULL : generateUUIDv4(), (rand(8), rand(9)) FROM numbers(1000);" + +$CLICKHOUSE_CLIENT -q "check table rmt" +$CLICKHOUSE_CLIENT -q "select count() from rmt" + +path=$($CLICKHOUSE_CLIENT -q "select path from system.parts where database='$CLICKHOUSE_DATABASE' and table='rmt' and name='0_0_0_0'") +# ensure that path is absolute before removing +$CLICKHOUSE_CLIENT -q "select throwIf(substring('$path', 1, 1) != '/', 'Path is relative: $path')" || exit +rm -rf "$path" + +# detach the broken part, replace it with empty one +$CLICKHOUSE_CLIENT -q "check table rmt" 2>/dev/null +$CLICKHOUSE_CLIENT -q "select count() from rmt" + +$CLICKHOUSE_CLIENT --receive_timeout=60 -q "system sync replica rmt" + +# the empty part should pass the check +$CLICKHOUSE_CLIENT -q "check table rmt" +$CLICKHOUSE_CLIENT -q "select count() from rmt" + +$CLICKHOUSE_CLIENT -q "select name, part_type, hash_of_all_files, hash_of_uncompressed_files, uncompressed_hash_of_compressed_files from system.parts where database=currentDatabase()" + +$CLICKHOUSE_CLIENT -q "drop table rmt sync;" diff --git a/tests/queries/0_stateless/02263_format_insert_settings.reference b/tests/queries/0_stateless/02263_format_insert_settings.reference index 721e7960875..e2d1ec3980e 100644 --- a/tests/queries/0_stateless/02263_format_insert_settings.reference +++ b/tests/queries/0_stateless/02263_format_insert_settings.reference @@ -1,6 +1,6 @@ -insert into foo settings max_threads=1 +[multi] insert into foo settings max_threads=1 Syntax error (query): failed at position 40 (end of query): -insert into foo format tsv settings max_threads=1 +[multi] insert into foo format tsv settings max_threads=1 Can't format ASTInsertQuery with data, since data will be lost. [multi] insert into foo format tsv settings max_threads=1 INSERT INTO foo @@ -8,7 +8,7 @@ SETTINGS max_threads = 1 FORMAT tsv [oneline] insert into foo format tsv settings max_threads=1 INSERT INTO foo SETTINGS max_threads = 1 FORMAT tsv -insert into foo settings max_threads=1 format tsv settings max_threads=1 +[multi] insert into foo settings max_threads=1 format tsv settings max_threads=1 You have SETTINGS before and after FORMAT Cannot parse input: expected '\n' before: 'settings max_threads=1 1' 1 diff --git a/tests/queries/0_stateless/02263_format_insert_settings.sh b/tests/queries/0_stateless/02263_format_insert_settings.sh index 3d5f780a38c..8b156ffec83 100755 --- a/tests/queries/0_stateless/02263_format_insert_settings.sh +++ b/tests/queries/0_stateless/02263_format_insert_settings.sh @@ -8,7 +8,7 @@ function run_format() { local q="$1" && shift - echo "$q" + echo "[multi] $q" $CLICKHOUSE_FORMAT "$@" <<<"$q" } function run_format_both() @@ -22,18 +22,20 @@ function run_format_both() } # NOTE: that those queries may work slow, due to stack trace obtaining -run_format 'insert into foo settings max_threads=1' 2> >(grep -m1 -o "Syntax error (query): failed at position .* (end of query):") +run_format 'insert into foo settings max_threads=1' |& grep --max-count 2 --only-matching -e "Syntax error (query): failed at position .* (end of query):" -e '^\[.*$' + # compatibility -run_format 'insert into foo format tsv settings max_threads=1' 2> >(grep -m1 -F -o "Can't format ASTInsertQuery with data, since data will be lost.") +run_format 'insert into foo format tsv settings max_threads=1' |& grep --max-count 2 --only-matching -e "Can't format ASTInsertQuery with data, since data will be lost." -e '^\[.*$' run_format_both 'insert into foo format tsv settings max_threads=1' --allow_settings_after_format_in_insert -run_format 'insert into foo settings max_threads=1 format tsv settings max_threads=1' --allow_settings_after_format_in_insert 2> >(grep -m1 -F -o "You have SETTINGS before and after FORMAT") +run_format 'insert into foo settings max_threads=1 format tsv settings max_threads=1' --allow_settings_after_format_in_insert |& grep --max-count 2 --only-matching -e "You have SETTINGS before and after FORMAT" -e '^\[.*$' + # and via server (since this is a separate code path) $CLICKHOUSE_CLIENT -q 'drop table if exists data_02263' $CLICKHOUSE_CLIENT -q 'create table data_02263 (key Int) engine=Memory()' -$CLICKHOUSE_CLIENT -q 'insert into data_02263 format TSV settings max_threads=1 1' 2> >(grep -m1 -F -o "Cannot parse input: expected '\n' before: 'settings max_threads=1 1'") +$CLICKHOUSE_CLIENT -q 'insert into data_02263 format TSV settings max_threads=1 1' |& grep --max-count 1 -F --only-matching "Cannot parse input: expected '\n' before: 'settings max_threads=1 1'" $CLICKHOUSE_CLIENT --allow_settings_after_format_in_insert=1 -q 'insert into data_02263 format TSV settings max_threads=1 1' $CLICKHOUSE_CLIENT -q 'select * from data_02263' -$CLICKHOUSE_CLIENT --allow_settings_after_format_in_insert=1 -q 'insert into data_02263 settings max_threads=1 format tsv settings max_threads=1' 2> >(grep -m1 -F -o "You have SETTINGS before and after FORMAT") +$CLICKHOUSE_CLIENT --allow_settings_after_format_in_insert=1 -q 'insert into data_02263 settings max_threads=1 format tsv settings max_threads=1' |& grep --max-count 1 -F --only-matching "You have SETTINGS before and after FORMAT" $CLICKHOUSE_CLIENT -q 'drop table data_02263' run_format_both 'insert into foo values' diff --git a/tests/queries/0_stateless/02273_full_sort_join.sql.j2 b/tests/queries/0_stateless/02273_full_sort_join.sql.j2 index 6b6aa53836e..2a13e11e116 100644 --- a/tests/queries/0_stateless/02273_full_sort_join.sql.j2 +++ b/tests/queries/0_stateless/02273_full_sort_join.sql.j2 @@ -1,6 +1,4 @@ --- Tags: long, no-upgrade-check - --- TODO(@vdimir): remove no-upgrade-check tag after https://github.com/ClickHouse/ClickHouse/pull/51737 is released +-- Tags: long, no-random-settings DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 b/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 index 621352f9c25..53fab9d6271 100644 --- a/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 +++ b/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 @@ -2,8 +2,8 @@ DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; -CREATE TABLE t1 (key UInt32, s String) ENGINE = MergeTree ORDER BY key; -CREATE TABLE t2 (key UInt32, s String) ENGINE = MergeTree ORDER BY key; +CREATE TABLE t1 (key UInt32, s String) ENGINE = MergeTree ORDER BY key SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +CREATE TABLE t2 (key UInt32, s String) ENGINE = MergeTree ORDER BY key SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; {% set ltable_size = 10000000 -%} {% set rtable_size = 1000000 -%} diff --git a/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql b/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql index 8e0fb4a55a0..7f7285d5472 100644 --- a/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql +++ b/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check SET max_bytes_in_join = '100', join_algorithm = 'auto'; diff --git a/tests/queries/0_stateless/02306_window_move_row_number_fix.sql b/tests/queries/0_stateless/02306_window_move_row_number_fix.sql index f73525f92be..96dd8f6176b 100644 --- a/tests/queries/0_stateless/02306_window_move_row_number_fix.sql +++ b/tests/queries/0_stateless/02306_window_move_row_number_fix.sql @@ -1,2 +1 @@ --- Tags: no-upgrade-check SELECT nth_value(NULL, 1048577) OVER (Rows BETWEEN 1023 FOLLOWING AND UNBOUNDED FOLLOWING) diff --git a/tests/queries/0_stateless/02313_cross_join_dup_col_names.sql b/tests/queries/0_stateless/02313_cross_join_dup_col_names.sql index 08e8843f763..45390c0e8ef 100644 --- a/tests/queries/0_stateless/02313_cross_join_dup_col_names.sql +++ b/tests/queries/0_stateless/02313_cross_join_dup_col_names.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check -- https://github.com/ClickHouse/ClickHouse/issues/37561 diff --git a/tests/queries/0_stateless/02315_optimize_monotonous_functions_in_order_by_remote.reference b/tests/queries/0_stateless/02315_optimize_monotonous_functions_in_order_by_remote.reference index aa47d0d46d4..44e0be8e356 100644 --- a/tests/queries/0_stateless/02315_optimize_monotonous_functions_in_order_by_remote.reference +++ b/tests/queries/0_stateless/02315_optimize_monotonous_functions_in_order_by_remote.reference @@ -1,2 +1,4 @@ 0 0 +0 +0 diff --git a/tests/queries/0_stateless/02315_optimize_monotonous_functions_in_order_by_remote.sql b/tests/queries/0_stateless/02315_optimize_monotonous_functions_in_order_by_remote.sql index 6a5e4a0ae65..f0085b7660f 100644 --- a/tests/queries/0_stateless/02315_optimize_monotonous_functions_in_order_by_remote.sql +++ b/tests/queries/0_stateless/02315_optimize_monotonous_functions_in_order_by_remote.sql @@ -4,3 +4,7 @@ SET optimize_monotonous_functions_in_order_by = 1; SELECT * FROM cluster(test_cluster_two_shards_localhost, system, one) ORDER BY toDateTime(dummy); + +SELECT * +FROM cluster(test_cluster_two_shards_localhost) +ORDER BY toDateTime(dummy) diff --git a/tests/queries/0_stateless/02315_pmj_union_ubsan_35857.sql b/tests/queries/0_stateless/02315_pmj_union_ubsan_35857.sql index df20e5c42d4..cbf71f1d555 100644 --- a/tests/queries/0_stateless/02315_pmj_union_ubsan_35857.sql +++ b/tests/queries/0_stateless/02315_pmj_union_ubsan_35857.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check SET join_algorithm = 'partial_merge'; diff --git a/tests/queries/0_stateless/02316_const_string_intersact.sql b/tests/queries/0_stateless/02316_const_string_intersact.sql index 148d048952b..33629d2a56a 100644 --- a/tests/queries/0_stateless/02316_const_string_intersact.sql +++ b/tests/queries/0_stateless/02316_const_string_intersact.sql @@ -1,3 +1,2 @@ --- Tags: no-upgrade-check SELECT 'Play ClickHouse' InterSect SELECT 'Play ClickHouse' diff --git a/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.sql b/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.sql index e6bc475b081..27f58dbff5e 100644 --- a/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.sql +++ b/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.sql @@ -4,7 +4,7 @@ DROP TABLE IF EXISTS test_table; CREATE TABLE test_table (`n` UInt64, `s` String) ENGINE = MergeTree -PRIMARY KEY n ORDER BY n; +PRIMARY KEY n ORDER BY n SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO test_table SELECT number, concat('some string ', CAST(number, 'String')) FROM numbers(1000000); diff --git a/tests/queries/0_stateless/02320_mapped_array_witn_const_nullable.sql b/tests/queries/0_stateless/02320_mapped_array_witn_const_nullable.sql index 734c597051e..1dd06a26894 100644 --- a/tests/queries/0_stateless/02320_mapped_array_witn_const_nullable.sql +++ b/tests/queries/0_stateless/02320_mapped_array_witn_const_nullable.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check select arrayMap(x -> toNullable(1), range(number)) from numbers(3); select arrayFilter(x -> toNullable(1), range(number)) from numbers(3); diff --git a/tests/queries/0_stateless/02332_dist_insert_send_logs_level.sh b/tests/queries/0_stateless/02332_dist_insert_send_logs_level.sh index 503b94be715..5bbe31c9237 100755 --- a/tests/queries/0_stateless/02332_dist_insert_send_logs_level.sh +++ b/tests/queries/0_stateless/02332_dist_insert_send_logs_level.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-upgrade-check CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=trace CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/02336_sparse_columns_s3.sql b/tests/queries/0_stateless/02336_sparse_columns_s3.sql index 23512359728..bf4622adedc 100644 --- a/tests/queries/0_stateless/02336_sparse_columns_s3.sql +++ b/tests/queries/0_stateless/02336_sparse_columns_s3.sql @@ -5,7 +5,8 @@ DROP TABLE IF EXISTS t_sparse_s3; CREATE TABLE t_sparse_s3 (id UInt32, cond UInt8, s String) engine = MergeTree ORDER BY id settings ratio_of_defaults_for_sparse_serialization = 0.01, storage_policy = 's3_cache', -min_bytes_for_wide_part = 0, min_compress_block_size = 1; +min_bytes_for_wide_part = 0, min_compress_block_size = 1, +index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO t_sparse_s3 SELECT 1, number % 2, '' FROM numbers(8192); INSERT INTO t_sparse_s3 SELECT 2, number % 2, '' FROM numbers(24576); diff --git a/tests/queries/0_stateless/02344_describe_cache.reference b/tests/queries/0_stateless/02344_describe_cache.reference index da84cdabf79..5db1aa3e7f5 100644 --- a/tests/queries/0_stateless/02344_describe_cache.reference +++ b/tests/queries/0_stateless/02344_describe_cache.reference @@ -1 +1 @@ -134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/caches/s3_cache/ 100 2 0 +134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache/ 2 0 diff --git a/tests/queries/0_stateless/02344_distinct_limit_distiributed.sql b/tests/queries/0_stateless/02344_distinct_limit_distiributed.sql index d0d9b130b7e..c963199e05c 100644 --- a/tests/queries/0_stateless/02344_distinct_limit_distiributed.sql +++ b/tests/queries/0_stateless/02344_distinct_limit_distiributed.sql @@ -1,7 +1,7 @@ drop table if exists t_distinct_limit; create table t_distinct_limit (d Date, id Int64) -engine = MergeTree partition by toYYYYMM(d) order by d; +engine = MergeTree partition by toYYYYMM(d) order by d SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; set max_threads = 10; diff --git a/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql index 07f705acd84..35ec675b7c0 100644 --- a/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql +++ b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check -- Regression for PartialSortingTransform optimization that requires at least 1500 rows. SELECT * FROM (SELECT * FROM (SELECT 0 a, toNullable(number) b, toString(number) c FROM numbers(1e6)) ORDER BY a DESC, b DESC, c LIMIT 1500) limit 10; diff --git a/tests/queries/0_stateless/02352_rwlock.sh b/tests/queries/0_stateless/02352_rwlock.sh index 7505a03a382..7de2c7089b8 100755 --- a/tests/queries/0_stateless/02352_rwlock.sh +++ b/tests/queries/0_stateless/02352_rwlock.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-parallel, no-fasttest, no-debug # Tag no-parallel -- to avoid running it in parallel, this will avoid possible issues due to high pressure # Test that ensures that WRITE lock failure notifies READ. @@ -12,6 +12,11 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function wait_query_by_id_started() { + # As the query we are waiting for is running simultaneously, let's give it a little time to actually start. The + # queries are supposed to run for multiple seconds, so sleeping 0.5 seconds is not a big deal, especially when + # flushing the logs can take up to 3 to 5 seconds. Basically waiting a bit here we can increase the chance that we + # don't have spend precious time on flushing logs. + sleep 0.5 local query_id=$1 && shift # wait for query to be started while [ "$($CLICKHOUSE_CLIENT "$@" -q "select count() from system.processes where query_id = '$query_id'")" -ne 1 ]; do diff --git a/tests/queries/0_stateless/02354_annoy_index.sql b/tests/queries/0_stateless/02354_annoy_index.sql index fefb51dfcc9..dbe5d95dd1f 100644 --- a/tests/queries/0_stateless/02354_annoy_index.sql +++ b/tests/queries/0_stateless/02354_annoy_index.sql @@ -1,4 +1,4 @@ --- Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-upgrade-check +-- Tags: no-fasttest, no-ubsan, no-cpu-aarch64 SET allow_experimental_annoy_index = 1; SET allow_experimental_analyzer = 0; diff --git a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql index c8ec40bb0a7..3e181a281a0 100644 --- a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql +++ b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS t_2354_dist_with_external_aggr; -create table t_2354_dist_with_external_aggr(a UInt64, b String, c FixedString(100)) engine = MergeTree order by tuple(); +create table t_2354_dist_with_external_aggr(a UInt64, b String, c FixedString(100)) engine = MergeTree order by tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into t_2354_dist_with_external_aggr select number, toString(number) as s, toFixedString(s, 100) from numbers_mt(5e7); diff --git a/tests/queries/0_stateless/02354_usearch_index.reference b/tests/queries/0_stateless/02354_usearch_index.reference new file mode 100644 index 00000000000..9896f149d45 --- /dev/null +++ b/tests/queries/0_stateless/02354_usearch_index.reference @@ -0,0 +1,143 @@ +--- Negative tests --- +--- Test default GRANULARITY (should be 100 mio. for usearch)--- +CREATE TABLE default.tab\n(\n `id` Int32,\n `vector` Array(Float32),\n INDEX usearch_index vector TYPE usearch GRANULARITY 100000000\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `id` Int32,\n `vector` Array(Float32),\n INDEX usearch_index vector TYPE usearch GRANULARITY 100000000\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 8192 +--- Test with Array, GRANULARITY = 1, index_granularity = 5 --- +WHERE type, L2Distance, check that index is used +Expression ((Projection + Before ORDER BY)) + Limit (preliminary LIMIT (without OFFSET)) + ReadFromMergeTree (default.tab) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 3/3 + Skip + Name: usearch_index + Description: usearch GRANULARITY 1 + Parts: 1/1 + Granules: 1/3 +ORDER BY type, L2Distance, check that index is used +Expression (Projection) + Limit (preliminary LIMIT (without OFFSET)) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + ReadFromMergeTree (default.tab) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 3/3 + Skip + Name: usearch_index + Description: usearch GRANULARITY 1 + Parts: 1/1 + Granules: 3/3 +Reference ARRAYs with non-matching dimension are rejected +Special case: MaximumDistance is negative +WHERE type, L2Distance +Special case: setting max_limit_for_ann_queries +Expression (Projection) + Limit (preliminary LIMIT (without OFFSET)) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + ReadFromMergeTree (default.tab) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 3/3 +--- Test with Tuple, GRANULARITY = 1, index_granularity = 5 --- +WHERE type, L2Distance, check that index is used +Expression ((Projection + Before ORDER BY)) + Limit (preliminary LIMIT (without OFFSET)) + ReadFromMergeTree (default.tab) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 3/3 + Skip + Name: usearch_index + Description: usearch GRANULARITY 1 + Parts: 1/1 + Granules: 1/3 +ORDER BY type, L2Distance, check that index is used +Expression (Projection) + Limit (preliminary LIMIT (without OFFSET)) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + ReadFromMergeTree (default.tab) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 3/3 + Skip + Name: usearch_index + Description: usearch GRANULARITY 1 + Parts: 1/1 + Granules: 3/3 +--- Test non-default metric (cosine distance) --- +--- Test with Array, GRANULARITY = 2, index_granularity = 4 --- +WHERE type, L2Distance, check that index is used +Expression ((Projection + Before ORDER BY)) + Limit (preliminary LIMIT (without OFFSET)) + ReadFromMergeTree (default.tab) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 4/4 + Skip + Name: usearch_index + Description: usearch GRANULARITY 2 + Parts: 0/1 + Granules: 2/4 +ORDER BY type, L2Distance, check that index is used +Expression (Projection) + Limit (preliminary LIMIT (without OFFSET)) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + ReadFromMergeTree (default.tab) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 4/4 + Skip + Name: usearch_index + Description: usearch GRANULARITY 2 + Parts: 1/1 + Granules: 4/4 +--- Test with Array, GRANULARITY = 4, index_granularity = 4 --- +WHERE type, L2Distance, check that index is used +Expression ((Projection + Before ORDER BY)) + Limit (preliminary LIMIT (without OFFSET)) + ReadFromMergeTree (default.tab) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 4/4 + Skip + Name: usearch_index + Description: usearch GRANULARITY 4 + Parts: 0/1 + Granules: 3/4 +ORDER BY type, L2Distance, check that index is used +Expression (Projection) + Limit (preliminary LIMIT (without OFFSET)) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + ReadFromMergeTree (default.tab) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 4/4 + Skip + Name: usearch_index + Description: usearch GRANULARITY 4 + Parts: 1/1 + Granules: 4/4 diff --git a/tests/queries/0_stateless/02354_usearch_index.sql b/tests/queries/0_stateless/02354_usearch_index.sql new file mode 100644 index 00000000000..f21767ea6de --- /dev/null +++ b/tests/queries/0_stateless/02354_usearch_index.sql @@ -0,0 +1,230 @@ +-- Tags: no-fasttest +-- no-fasttest because needs usearch lib + +SET allow_experimental_usearch_index = 1; +SET allow_experimental_analyzer = 0; + +SELECT '--- Negative tests ---'; + +DROP TABLE IF EXISTS tab; + +-- must have at most 1 arguments +CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch('too', 'many')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY } + +-- first argument (distance_function) must be String +CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch(3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY } + +-- must be created on single column +CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index (vector, id) TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_NUMBER_OF_COLUMNS } + +-- reject unsupported distance functions +CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch('wormholeDistance')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA } + +-- must be created on Array/Tuple(Float32) columns +SET allow_suspicious_low_cardinality_types = 1; +CREATE TABLE tab(id Int32, vector Float32, INDEX usearch_index vector TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } +CREATE TABLE tab(id Int32, vector Array(Float64), INDEX usearch_index vector TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } +CREATE TABLE tab(id Int32, vector Tuple(Float64), INDEX usearch_index vector TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } +CREATE TABLE tab(id Int32, vector LowCardinality(Float32), INDEX usearch_index vector TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } +CREATE TABLE tab(id Int32, vector Nullable(Float32), INDEX usearch_index vector TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN } + +SELECT '--- Test default GRANULARITY (should be 100 mio. for usearch)---'; + +CREATE TABLE tab (id Int32, vector Array(Float32), INDEX usearch_index(vector) TYPE usearch) ENGINE=MergeTree ORDER BY id; +SHOW CREATE TABLE tab; +DROP TABLE tab; + +CREATE TABLE tab (id Int32, vector Array(Float32)) ENGINE=MergeTree ORDER BY id; +ALTER TABLE tab ADD INDEX usearch_index(vector) TYPE usearch; +SHOW CREATE TABLE tab; + +DROP TABLE tab; + + +SELECT '--- Test with Array, GRANULARITY = 1, index_granularity = 5 ---'; + +DROP TABLE IF EXISTS tab; +CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch() GRANULARITY 1) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 5; +INSERT INTO tab VALUES (1, [0.0, 0.0, 10.0]), (2, [0.0, 0.0, 10.5]), (3, [0.0, 0.0, 9.5]), (4, [0.0, 0.0, 9.7]), (5, [0.0, 0.0, 10.2]), (6, [10.0, 0.0, 0.0]), (7, [9.5, 0.0, 0.0]), (8, [9.7, 0.0, 0.0]), (9, [10.2, 0.0, 0.0]), (10, [10.5, 0.0, 0.0]), (11, [0.0, 10.0, 0.0]), (12, [0.0, 9.5, 0.0]), (13, [0.0, 9.7, 0.0]), (14, [0.0, 10.2, 0.0]), (15, [0.0, 10.5, 0.0]); + +-- rows = 15, index_granularity = 5, GRANULARITY = 1 gives 3 usearch-indexed blocks (each comprising a single granule) +-- condition 'L2Distance(vector, reference_vector) < 1.0' ensures that only one usearch-indexed block produces results --> "Granules: 1/3" + +-- SELECT 'WHERE type, L2Distance'; +-- SELECT * +-- FROM tab +-- WHERE L2Distance(vector, [0.0, 0.0, 10.0]) < 1.0 +-- LIMIT 3; + +SELECT 'WHERE type, L2Distance, check that index is used'; +EXPLAIN indexes=1 +SELECT * +FROM tab +WHERE L2Distance(vector, [0.0, 0.0, 10.0]) < 1.0 +LIMIT 3; + +-- SELECT 'ORDER BY type, L2Distance'; +-- SELECT * +-- FROM tab +-- ORDER BY L2Distance(vector, [0.0, 0.0, 10.0]) +-- LIMIT 3; + +SELECT 'ORDER BY type, L2Distance, check that index is used'; +EXPLAIN indexes=1 +SELECT * +FROM tab +ORDER BY L2Distance(vector, [0.0, 0.0, 10.0]) +LIMIT 3; + +-- Test special cases. Corresponding special case tests are omitted from later tests. + +SELECT 'Reference ARRAYs with non-matching dimension are rejected'; +SELECT * +FROM tab +ORDER BY L2Distance(vector, [0.0, 0.0]) +LIMIT 3; -- { serverError INCORRECT_QUERY } + +SELECT 'Special case: MaximumDistance is negative'; +SELECT 'WHERE type, L2Distance'; +SELECT * +FROM tab +WHERE L2Distance(vector, [0.0, 0.0, 10.0]) < -1.0 +LIMIT 3; -- { serverError INCORRECT_QUERY } + +SELECT 'Special case: setting max_limit_for_ann_queries'; +EXPLAIN indexes=1 +SELECT * +FROM tab +ORDER BY L2Distance(vector, [5.3, 7.3, 2.1]) +LIMIT 3 +SETTINGS max_limit_for_ann_queries=2; -- doesn't use the ann index + +DROP TABLE tab; + +-- Test Tuple embeddings. Triggers different logic than Array inside MergeTreeIndexUSearch but the same logic as Array above MergeTreeIndexusearch. +-- Therefore test Tuple case just once. + +SELECT '--- Test with Tuple, GRANULARITY = 1, index_granularity = 5 ---'; + +CREATE TABLE tab(id Int32, vector Tuple(Float32, Float32, Float32), INDEX usearch_index vector TYPE usearch() GRANULARITY 1) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 5; +INSERT INTO tab VALUES (1, (0.0, 0.0, 10.0)), (2, (0.0, 0.0, 10.5)), (3, (0.0, 0.0, 9.5)), (4, (0.0, 0.0, 9.7)), (5, (0.0, 0.0, 10.2)), (6, (10.0, 0.0, 0.0)), (7, (9.5, 0.0, 0.0)), (8, (9.7, 0.0, 0.0)), (9, (10.2, 0.0, 0.0)), (10, (10.5, 0.0, 0.0)), (11, (0.0, 10.0, 0.0)), (12, (0.0, 9.5, 0.0)), (13, (0.0, 9.7, 0.0)), (14, (0.0, 10.2, 0.0)), (15, (0.0, 10.5, 0.0)); + +-- SELECT 'WHERE type, L2Distance'; +-- SELECT * +-- FROM tab +-- WHERE L2Distance(vector, (0.0, 0.0, 10.0)) < 1.0 +-- LIMIT 3; + +SELECT 'WHERE type, L2Distance, check that index is used'; +EXPLAIN indexes=1 +SELECT * +FROM tab +WHERE L2Distance(vector, (0.0, 0.0, 10.0)) < 1.0 +LIMIT 3; + +-- SELECT 'ORDER BY type, L2Distance'; +-- SELECT * +-- FROM tab +-- ORDER BY L2Distance(vector, (0.0, 0.0, 10.0)) +-- LIMIT 3; + +SELECT 'ORDER BY type, L2Distance, check that index is used'; +EXPLAIN indexes=1 +SELECT * +FROM tab +ORDER BY L2Distance(vector, (0.0, 0.0, 10.0)) +LIMIT 3; + +DROP TABLE tab; + +-- Not a systematic test, just to make sure no bad things happen +SELECT '--- Test non-default metric (cosine distance) ---'; + +CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch('cosineDistance') GRANULARITY 1) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 5; +INSERT INTO tab VALUES (1, [0.0, 0.0, 10.0]), (2, [0.0, 0.0, 10.5]), (3, [0.0, 0.0, 9.5]), (4, [0.0, 0.0, 9.7]), (5, [0.0, 0.0, 10.2]), (6, [10.0, 0.0, 0.0]), (7, [9.5, 0.0, 0.0]), (8, [9.7, 0.0, 0.0]), (9, [10.2, 0.0, 0.0]), (10, [10.5, 0.0, 0.0]), (11, [0.0, 10.0, 0.0]), (12, [0.0, 9.5, 0.0]), (13, [0.0, 9.7, 0.0]), (14, [0.0, 10.2, 0.0]), (15, [0.0, 10.5, 0.0]); + +-- SELECT 'WHERE type, L2Distance'; +-- SELECT * +-- FROM tab +-- WHERE L2Distance(vector, [0.0, 0.0, 10.0]) < 1.0 +-- LIMIT 3; + +-- SELECT 'ORDER BY type, L2Distance'; +-- SELECT * +-- FROM tab +-- ORDER BY L2Distance(vector, [0.0, 0.0, 10.0]) +-- LIMIT 3; + +DROP TABLE tab; + +SELECT '--- Test with Array, GRANULARITY = 2, index_granularity = 4 ---'; + +CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch() GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 4; +INSERT INTO tab VALUES (1, [0.0, 0.0, 10.0, 0.0]), (2, [0.0, 0.0, 10.5, 0.0]), (3, [0.0, 0.0, 9.5, 0.0]), (4, [0.0, 0.0, 9.7, 0.0]), (5, [10.0, 0.0, 0.0, 0.0]), (6, [9.5, 0.0, 0.0, 0.0]), (7, [9.7, 0.0, 0.0, 0.0]), (8, [10.2, 0.0, 0.0, 0.0]), (9, [0.0, 10.0, 0.0, 0.0]), (10, [0.0, 9.5, 0.0, 0.0]), (11, [0.0, 9.7, 0.0, 0.0]), (12, [0.0, 9.7, 0.0, 0.0]), (13, [0.0, 0.0, 0.0, 10.3]), (14, [0.0, 0.0, 0.0, 9.5]), (15, [0.0, 0.0, 0.0, 10.0]), (16, [0.0, 0.0, 0.0, 10.5]); + +-- rows = 16, index_granularity = 4, GRANULARITY = 2 gives 2 usearch-indexed blocks (each comprising two granules) +-- condition 'L2Distance(vector, reference_vector) < 1.0' ensures that only one usearch-indexed block produces results --> "Granules: 2/4" + +-- SELECT 'WHERE type, L2Distance'; +-- SELECT * +-- FROM tab +-- WHERE L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) < 5.0 +-- LIMIT 3; + +SELECT 'WHERE type, L2Distance, check that index is used'; +EXPLAIN indexes=1 +SELECT * +FROM tab +WHERE L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) < 5.0 +LIMIT 3; + +-- SELECT 'ORDER BY type, L2Distance'; +-- SELECT * +-- FROM tab +-- ORDER BY L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) +-- LIMIT 3; + +SELECT 'ORDER BY type, L2Distance, check that index is used'; +EXPLAIN indexes=1 +SELECT * +FROM tab +ORDER BY L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) +LIMIT 3; + +DROP TABLE tab; + +SELECT '--- Test with Array, GRANULARITY = 4, index_granularity = 4 ---'; + +CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch() GRANULARITY 4) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 4; +INSERT INTO tab VALUES (1, [0.0, 0.0, 10.0, 0.0]), (2, [0.0, 0.0, 10.5, 0.0]), (3, [0.0, 0.0, 9.5, 0.0]), (4, [0.0, 0.0, 9.7, 0.0]), (5, [10.0, 0.0, 0.0, 0.0]), (6, [9.5, 0.0, 0.0, 0.0]), (7, [9.7, 0.0, 0.0, 0.0]), (8, [10.2, 0.0, 0.0, 0.0]), (9, [0.0, 10.0, 0.0, 0.0]), (10, [0.0, 9.5, 0.0, 0.0]), (11, [0.0, 9.7, 0.0, 0.0]), (12, [0.0, 9.7, 0.0, 0.0]), (13, [0.0, 0.0, 0.0, 10.3]), (14, [0.0, 0.0, 0.0, 9.5]), (15, [0.0, 0.0, 0.0, 10.0]), (16, [0.0, 0.0, 0.0, 10.5]); + +-- rows = 16, index_granularity = 4, GRANULARITY = 4 gives a single usearch-indexed block (comprising all granules) +-- no two matches happen to be located in the same granule, so with LIMIT = 3, we'll get "Granules: 2/4" + +-- SELECT 'WHERE type, L2Distance'; +-- SELECT * +-- FROM tab +-- WHERE L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) < 5.0 +-- LIMIT 3; + +SELECT 'WHERE type, L2Distance, check that index is used'; +EXPLAIN indexes=1 +SELECT * +FROM tab +WHERE L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) < 5.0 +LIMIT 3; + +-- SELECT 'ORDER BY type, L2Distance'; +-- SELECT * +-- FROM tab +-- ORDER BY L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) +-- LIMIT 3; + +SELECT 'ORDER BY type, L2Distance, check that index is used'; +EXPLAIN indexes=1 +SELECT * +FROM tab +ORDER BY L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) +LIMIT 3; + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02363_mapupdate_improve.sql b/tests/queries/0_stateless/02363_mapupdate_improve.sql index b4a4b8e5d91..c3cd8fff9ee 100644 --- a/tests/queries/0_stateless/02363_mapupdate_improve.sql +++ b/tests/queries/0_stateless/02363_mapupdate_improve.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check DROP TABLE IF EXISTS map_test; CREATE TABLE map_test(`tags` Map(String, String)) ENGINE = MergeTree PRIMARY KEY tags ORDER BY tags SETTINGS index_granularity = 8192; INSERT INTO map_test (tags) VALUES (map('fruit','apple','color','red')); diff --git a/tests/queries/0_stateless/02366_direct_dictionary_dict_has.sql b/tests/queries/0_stateless/02366_direct_dictionary_dict_has.sql index cf9f2971cb0..41334811464 100644 --- a/tests/queries/0_stateless/02366_direct_dictionary_dict_has.sql +++ b/tests/queries/0_stateless/02366_direct_dictionary_dict_has.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check DROP TABLE IF EXISTS test_table; CREATE TABLE test_table diff --git a/tests/queries/0_stateless/02366_with_fill_date.sql b/tests/queries/0_stateless/02366_with_fill_date.sql index 4d41facf423..aca57b127af 100644 --- a/tests/queries/0_stateless/02366_with_fill_date.sql +++ b/tests/queries/0_stateless/02366_with_fill_date.sql @@ -1,4 +1,3 @@ --- Tags: no-upgrade-check SELECT toDate('2022-02-01') AS d1 FROM numbers(18) AS number diff --git a/tests/queries/0_stateless/02377_modify_column_from_lc.sql b/tests/queries/0_stateless/02377_modify_column_from_lc.sql index a578e7cb03a..efee323e88d 100644 --- a/tests/queries/0_stateless/02377_modify_column_from_lc.sql +++ b/tests/queries/0_stateless/02377_modify_column_from_lc.sql @@ -9,7 +9,7 @@ CREATE TABLE t_modify_from_lc_1 a LowCardinality(UInt32) CODEC(NONE) ) ENGINE = MergeTree ORDER BY tuple() -SETTINGS min_bytes_for_wide_part = 0; +SETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192, index_granularity_bytes = '10Mi'; CREATE TABLE t_modify_from_lc_2 ( @@ -17,7 +17,7 @@ CREATE TABLE t_modify_from_lc_2 a LowCardinality(UInt32) CODEC(NONE) ) ENGINE = MergeTree ORDER BY tuple() -SETTINGS min_bytes_for_wide_part = 0; +SETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO t_modify_from_lc_1 SELECT number, number FROM numbers(100000); INSERT INTO t_modify_from_lc_2 SELECT number, number FROM numbers(100000); diff --git a/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql b/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql index 2fe0943745d..0c228c13f19 100644 --- a/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql +++ b/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql @@ -1,4 +1,4 @@ --- Tags: no-upgrade-check, no-random-merge-tree-settings +-- Tags: no-random-merge-tree-settings drop table if exists test_02381; create table test_02381(a UInt64, b UInt64) ENGINE = MergeTree order by (a, b) SETTINGS compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1; diff --git a/tests/queries/0_stateless/02383_join_and_filtering_set.reference b/tests/queries/0_stateless/02383_join_and_filtering_set.reference index 2ad282ca07f..822d76aafc6 100644 --- a/tests/queries/0_stateless/02383_join_and_filtering_set.reference +++ b/tests/queries/0_stateless/02383_join_and_filtering_set.reference @@ -8,3 +8,12 @@ Ok Ok Ok Ok +Ok +Ok +Ok +Ok +Ok +Ok +Ok +Ok +Ok diff --git a/tests/queries/0_stateless/02383_join_and_filtering_set.sh b/tests/queries/0_stateless/02383_join_and_filtering_set.sh index 3356be58ff7..3a6d60811c9 100755 --- a/tests/queries/0_stateless/02383_join_and_filtering_set.sh +++ b/tests/queries/0_stateless/02383_join_and_filtering_set.sh @@ -10,6 +10,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh $CLICKHOUSE_CLIENT -mn -q """ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + CREATE TABLE t1 (x UInt64, y UInt64) ENGINE = MergeTree ORDER BY y AS SELECT sipHash64(number, 't1_x') % 100 AS x, sipHash64(number, 't1_y') % 100 AS y FROM numbers(100); @@ -17,6 +20,26 @@ CREATE TABLE t2 (x UInt64, y UInt64) ENGINE = MergeTree ORDER BY y AS SELECT sipHash64(number, 't2_x') % 100 AS x, sipHash64(number, 't2_y') % 100 AS y FROM numbers(100); """ +# Arguments: +# - Query result +# - Processor name +# - Expected description +# - Check first occurrence +function match_description() { + +QUERY_RESULT=$1 +PROCESSOR_NAME=$2 +EXPECTED_DESCRIPTION=$3 +CHECK_FIRST_OCCURRENCE=${4:-true} + +SED_EXPR="/$PROCESSOR_NAME/{ n; s/^[ \t]*Description: //; p" +[ $CHECK_FIRST_OCCURRENCE = true ] && SED_EXPR+="; q }" || SED_EXPR+=" }" + +DESC=$(sed -n "$SED_EXPR" <<< "$QUERY_RESULT") +[[ "$DESC" == "$EXPECTED_DESCRIPTION" ]] && echo "Ok" || echo "Fail: ReadHeadBalancedProcessor description '$DESC' != '$EXPECTED_DESCRIPTION' " + +} + # Arguments: # - value of max_rows_in_set_to_optimize_join # - join kind @@ -37,10 +60,20 @@ RES=$( EXPECTED_PIPELINE_STEPS=$4 RES=$( $CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join=${PARAM_VALUE} --join_algorithm='full_sorting_merge' \ - -q "EXPLAIN PIPELINE SELECT count() FROM t1 ${JOIN_KIND} JOIN t2 ON t1.x = t2.x" \ - | grep -o -e ReadHeadBalancedProcessor -e FilterBySetOnTheFlyTransform -e CreatingSetsOnTheFlyTransform | wc -l + -q "EXPLAIN PIPELINE SELECT count() FROM t1 ${JOIN_KIND} JOIN t2 ON t1.x = t2.x" ) -[ "$RES" -eq "$EXPECTED_PIPELINE_STEPS" ] && echo "Ok" || echo "Fail: $RES != $EXPECTED_PIPELINE_STEPS" + +# Count match +COUNT=$(echo "$RES" | grep -o -e ReadHeadBalancedProcessor -e FilterBySetOnTheFlyTransform -e CreatingSetsOnTheFlyTransform | wc -l) +[ "$COUNT" -eq "$EXPECTED_PIPELINE_STEPS" ] && echo "Ok" || echo "Fail: $COUNT != $EXPECTED_PIPELINE_STEPS" + +# Description matchers +if [ "$EXPECTED_PIPELINE_STEPS" -ne 0 ]; then + match_description "$RES" 'ReadHeadBalancedProcessor' 'Reads rows from two streams evenly' + match_description "$RES" 'FilterBySetOnTheFlyTransform' "Filter rows using other join table side\'s set" + match_description "$RES" 'CreatingSetsOnTheFlyTransform' 'Create set and filter Left joined stream +Create set and filter Right joined stream' false +fi } diff --git a/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh b/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh index 39e513f6be4..48efc98f7c7 100755 --- a/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh +++ b/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-parallel, no-upgrade-check, disabled +# Tags: race, zookeeper, no-parallel, disabled CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02404_memory_bound_merging.reference b/tests/queries/0_stateless/02404_memory_bound_merging.reference index d9fac433189..41a3b6bf8ec 100644 --- a/tests/queries/0_stateless/02404_memory_bound_merging.reference +++ b/tests/queries/0_stateless/02404_memory_bound_merging.reference @@ -118,8 +118,7 @@ ExpressionTransform MergingAggregatedBucketTransform × 4 Resize 1 → 4 GroupingAggregatedTransform 3 → 1 - (Union) - (ReadFromRemoteParallelReplicas) + (ReadFromRemoteParallelReplicas) select a, count() from pr_t group by a order by a limit 5 offset 500; 500 1000 501 1000 diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 61a2e4e9f02..d56fb4d367d 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -302,6 +302,7 @@ formatRowNoNewline fragment fromModifiedJulianDay fromModifiedJulianDayOrNull +fromUTCTimestamp fromUnixTimestamp fromUnixTimestamp64Micro fromUnixTimestamp64Milli @@ -849,6 +850,7 @@ toUInt8 toUInt8OrDefault toUInt8OrNull toUInt8OrZero +toUTCTimestamp toUUID toUUIDOrDefault toUUIDOrNull @@ -881,11 +883,18 @@ tumble tumbleEnd tumbleStart tuple +tupleConcat tupleDivide tupleDivideByNumber tupleElement tupleHammingDistance +tupleIntDiv +tupleIntDivByNumber +tupleIntDivOrZero +tupleIntDivOrZeroByNumber tupleMinus +tupleModulo +tupleModuloByNumber tupleMultiply tupleMultiplyByNumber tupleNegate diff --git a/tests/queries/0_stateless/02417_load_marks_async.sh b/tests/queries/0_stateless/02417_load_marks_async.sh index a5cbcd08f75..72b35a565df 100755 --- a/tests/queries/0_stateless/02417_load_marks_async.sh +++ b/tests/queries/0_stateless/02417_load_marks_async.sh @@ -21,7 +21,7 @@ n8 UInt64, n9 UInt64 ) ENGINE = MergeTree -ORDER BY n0 SETTINGS min_bytes_for_wide_part = 1;" +ORDER BY n0 SETTINGS min_bytes_for_wide_part = 1, index_granularity = 8192, index_granularity_bytes = '10Mi';" ${CLICKHOUSE_CLIENT} -q "INSERT INTO test select number, number % 3, number % 5, number % 10, number % 13, number % 15, number % 17, number % 18, number % 22, number % 25 from numbers(1000000)" ${CLICKHOUSE_CLIENT} -q "SYSTEM STOP MERGES test" diff --git a/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.reference b/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.reference index 98827438920..a0689a0a090 100644 --- a/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.reference +++ b/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.reference @@ -3,8 +3,8 @@ {"operation_name":"void DB::DistributedSink::writeToLocal(const Cluster::ShardInfo &, const Block &, size_t)","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"} 1 ===2=== -{"operation_name":"void DB::DistributedAsyncInsertDirectoryQueue::processFile(const std::string &)","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"} -{"operation_name":"void DB::DistributedAsyncInsertDirectoryQueue::processFile(const std::string &)","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"} +{"operation_name":"void DB::DistributedAsyncInsertDirectoryQueue::processFile(std::string &)","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"} +{"operation_name":"void DB::DistributedAsyncInsertDirectoryQueue::processFile(std::string &)","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"} 3 2 ===3=== diff --git a/tests/queries/0_stateless/02421_decimal_in_precision_issue_41125.reference b/tests/queries/0_stateless/02421_decimal_in_precision_issue_41125.reference index 71c9a23879f..c13142dd26a 100644 --- a/tests/queries/0_stateless/02421_decimal_in_precision_issue_41125.reference +++ b/tests/queries/0_stateless/02421_decimal_in_precision_issue_41125.reference @@ -11,3 +11,16 @@ 1 1 1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02421_decimal_in_precision_issue_41125.sql b/tests/queries/0_stateless/02421_decimal_in_precision_issue_41125.sql index f5d182be3e3..fde893626c1 100644 --- a/tests/queries/0_stateless/02421_decimal_in_precision_issue_41125.sql +++ b/tests/queries/0_stateless/02421_decimal_in_precision_issue_41125.sql @@ -1,10 +1,30 @@ DROP TABLE IF EXISTS dtest; -SELECT count() == 0 FROM (SELECT '33.3' :: Decimal(9, 1) AS a WHERE a IN ('33.33' :: Decimal(9, 2))); CREATE TABLE dtest ( `a` Decimal(18, 0), `b` Decimal(18, 1), `c` Decimal(36, 0) ) ENGINE = Memory; INSERT INTO dtest VALUES ('33', '44.4', '35'); +SELECT count() == 0 FROM (SELECT '33.3' :: Decimal(9, 1) AS a WHERE a IN ('33.33' :: Decimal(9, 2))); + +SELECT count() == 0 FROM dtest WHERE a IN toDecimal32('33.3000', 4); +SELECT count() == 0 FROM dtest WHERE a IN toDecimal64('33.3000', 4); +SELECT count() == 0 FROM dtest WHERE a IN toDecimal128('33.3000', 4); +SELECT count() == 0 FROM dtest WHERE a IN toDecimal256('33.3000', 4); + +SELECT count() == 0 FROM dtest WHERE b IN toDecimal32('44.4000', 0); +SELECT count() == 0 FROM dtest WHERE b IN toDecimal64('44.4000', 0); +SELECT count() == 0 FROM dtest WHERE b IN toDecimal128('44.4000', 0); +SELECT count() == 0 FROM dtest WHERE b IN toDecimal256('44.4000', 0); + +SELECT count() == 1 FROM dtest WHERE b IN toDecimal32('44.4000', 4); +SELECT count() == 1 FROM dtest WHERE b IN toDecimal64('44.4000', 4); +SELECT count() == 1 FROM dtest WHERE b IN toDecimal128('44.4000', 4); +SELECT count() == 1 FROM dtest WHERE b IN toDecimal256('44.4000', 4); + +SET allow_experimental_analyzer = 1; + +SELECT count() == 0 FROM (SELECT '33.3' :: Decimal(9, 1) AS a WHERE a IN ('33.33' :: Decimal(9, 2))); + SELECT count() == 0 FROM dtest WHERE a IN toDecimal32('33.3000', 4); SELECT count() == 0 FROM dtest WHERE a IN toDecimal64('33.3000', 4); SELECT count() == 0 FROM dtest WHERE a IN toDecimal128('33.3000', 4); diff --git a/tests/queries/0_stateless/02421_truncate_isolation_no_merges.reference b/tests/queries/0_stateless/02421_truncate_isolation_no_merges.reference index a89ce339f6c..9f725a55439 100644 --- a/tests/queries/0_stateless/02421_truncate_isolation_no_merges.reference +++ b/tests/queries/0_stateless/02421_truncate_isolation_no_merges.reference @@ -5,19 +5,13 @@ tx21 3 UNKNOWN_TABLE concurrent_insert 2 -all_1_1_1 0 -all_2_2_1 0 -all_3_3_1 0 -all_4_4_1 0 all_5_5_0 1 -all_6_6_1 0 concurrent_drop_part_before SERIALIZATION_ERROR INVALID_TRANSACTION 1 3 all_1_1_0 1 -all_2_2_1 0 all_3_3_0 1 read_from_snapshot tx51 3 @@ -28,15 +22,9 @@ tx51 3 concurrent_drop_part_after NO_SUCH_DATA_PART INVALID_TRANSACTION -all_1_1_1 0 -all_2_2_1 0 -all_3_3_1 0 NewPart all_1_1_0 -NewPart all_1_1_1 NewPart all_2_2_0 -NewPart all_2_2_1 NewPart all_3_3_0 -NewPart all_3_3_1 concurrent_truncate_notx_after tx71 3 tx71 0 diff --git a/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.reference b/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.reference index 378b7d8cec4..66c3d28dfc0 100644 --- a/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.reference +++ b/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.reference @@ -19,3 +19,24 @@ 1 1 1 +0 +1 +1 +0 +0 +1 +1 +0 +0 +1 +1 +0 +0 +1 +1 +0 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.sql b/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.sql index 579f468ee54..a84cb5572ba 100644 --- a/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.sql +++ b/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.sql @@ -1,3 +1,8 @@ +DROP TABLE IF EXISTS decimal_in_float_test; + +CREATE TABLE decimal_in_float_test ( `a` Decimal(18, 0), `b` Decimal(36, 2) ) ENGINE = Memory; +INSERT INTO decimal_in_float_test VALUES ('33', '44.44'); + SELECT toDecimal32(1.555,3) IN (1.5551); SELECT toDecimal32(1.555,3) IN (1.5551,1.555); SELECT toDecimal32(1.555,3) IN (1.5551,1.555000); @@ -18,10 +23,36 @@ SELECT toDecimal256(1.555,3) IN (1.5551,1.555); SELECT toDecimal256(1.555,3) IN (1.5551,1.555000); SELECT toDecimal256(1.555,3) IN (1.550,1.5); -DROP TABLE IF EXISTS decimal_in_float_test; -CREATE TABLE decimal_in_float_test ( `a` Decimal(18, 0), `b` Decimal(36, 2) ) ENGINE = Memory; -INSERT INTO decimal_in_float_test VALUES ('33', '44.44'); +SELECT count() == 1 FROM decimal_in_float_test WHERE a IN (33); +SELECT count() == 1 FROM decimal_in_float_test WHERE a IN (33.0); +SELECT count() == 1 FROM decimal_in_float_test WHERE a NOT IN (33.333); +SELECT count() == 1 FROM decimal_in_float_test WHERE b IN (44.44); +SELECT count() == 1 FROM decimal_in_float_test WHERE b NOT IN (44.4,44.444); + +SET allow_experimental_analyzer = 1; + + +SELECT toDecimal32(1.555,3) IN (1.5551); +SELECT toDecimal32(1.555,3) IN (1.5551,1.555); +SELECT toDecimal32(1.555,3) IN (1.5551,1.555000); +SELECT toDecimal32(1.555,3) IN (1.550,1.5); + +SELECT toDecimal64(1.555,3) IN (1.5551); +SELECT toDecimal64(1.555,3) IN (1.5551,1.555); +SELECT toDecimal64(1.555,3) IN (1.5551,1.555000); +SELECT toDecimal64(1.555,3) IN (1.550,1.5); + +SELECT toDecimal128(1.555,3) IN (1.5551); +SELECT toDecimal128(1.555,3) IN (1.5551,1.555); +SELECT toDecimal128(1.555,3) IN (1.5551,1.555000); +SELECT toDecimal128(1.555,3) IN (1.550,1.5); + +SELECT toDecimal256(1.555,3) IN (1.5551); +SELECT toDecimal256(1.555,3) IN (1.5551,1.555); +SELECT toDecimal256(1.555,3) IN (1.5551,1.555000); +SELECT toDecimal256(1.555,3) IN (1.550,1.5); + SELECT count() == 1 FROM decimal_in_float_test WHERE a IN (33); SELECT count() == 1 FROM decimal_in_float_test WHERE a IN (33.0); diff --git a/tests/queries/0_stateless/02428_delete_with_settings.sql b/tests/queries/0_stateless/02428_delete_with_settings.sql index 071a3f74184..618c08608fc 100644 --- a/tests/queries/0_stateless/02428_delete_with_settings.sql +++ b/tests/queries/0_stateless/02428_delete_with_settings.sql @@ -1,5 +1,5 @@ drop table if exists test; -create table test (id Int32, key String) engine=MergeTree() order by tuple(); +create table test (id Int32, key String) engine=MergeTree() order by tuple() settings index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into test select number, toString(number) from numbers(1000000); delete from test where id % 2 = 0 SETTINGS mutations_sync=0; select count() from test; diff --git a/tests/queries/0_stateless/02428_index_analysis_with_null_literal.sql b/tests/queries/0_stateless/02428_index_analysis_with_null_literal.sql index 33b0ea4b818..091fbbe1711 100644 --- a/tests/queries/0_stateless/02428_index_analysis_with_null_literal.sql +++ b/tests/queries/0_stateless/02428_index_analysis_with_null_literal.sql @@ -1,7 +1,7 @@ -- From https://github.com/ClickHouse/ClickHouse/issues/41814 drop table if exists test; -create table test(a UInt64, m UInt64, d DateTime) engine MergeTree partition by toYYYYMM(d) order by (a, m, d); +create table test(a UInt64, m UInt64, d DateTime) engine MergeTree partition by toYYYYMM(d) order by (a, m, d) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into test select number, number, '2022-01-01 00:00:00' from numbers(1000000); @@ -12,7 +12,7 @@ drop table test; -- From https://github.com/ClickHouse/ClickHouse/issues/34063 drop table if exists test_null_filter; -create table test_null_filter(key UInt64, value UInt32) engine MergeTree order by key; +create table test_null_filter(key UInt64, value UInt32) engine MergeTree order by key SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into test_null_filter select number, number from numbers(10000000); diff --git a/tests/queries/0_stateless/02429_low_cardinality_trash.sh b/tests/queries/0_stateless/02429_low_cardinality_trash.sh index 91618cb2796..e115ee0824e 100755 --- a/tests/queries/0_stateless/02429_low_cardinality_trash.sh +++ b/tests/queries/0_stateless/02429_low_cardinality_trash.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-upgrade-check +# Tags: long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh index 5e2da509314..f85aaed7716 100755 --- a/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh +++ b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-random-settings +# Tags: no-random-settings, no-asan, no-msan, no-tsan, no-debug # shellcheck disable=SC2009 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/02439_merge_selecting_partitions.sql b/tests/queries/0_stateless/02439_merge_selecting_partitions.sql index dfdbbf5d597..1d01fde56d6 100644 --- a/tests/queries/0_stateless/02439_merge_selecting_partitions.sql +++ b/tests/queries/0_stateless/02439_merge_selecting_partitions.sql @@ -24,6 +24,11 @@ system flush logs; select * from system.zookeeper_log where path like '/test/02439/s1/' || currentDatabase() || '/block_numbers/%' and op_num in ('List', 'SimpleList', 'FilteredList') and path not like '%/block_numbers/1' and path not like '%/block_numbers/123' - and event_time >= now() - interval 1 minute; + and event_time >= now() - interval 1 minute + -- avoid race with tests like 02311_system_zookeeper_insert + and (query_id is null or query_id='' or query_id in + (select query_id from system.query_log + where event_time >= now() - interval 1 minute and current_database=currentDatabase()) + ); drop table rmt; diff --git a/tests/queries/0_stateless/02443_detach_attach_partition.reference b/tests/queries/0_stateless/02443_detach_attach_partition.reference new file mode 100644 index 00000000000..77cfb77479d --- /dev/null +++ b/tests/queries/0_stateless/02443_detach_attach_partition.reference @@ -0,0 +1,4 @@ +default begin inserts +default end inserts +30 465 +30 465 diff --git a/tests/queries/0_stateless/02443_detach_attach_partition.sh b/tests/queries/0_stateless/02443_detach_attach_partition.sh new file mode 100755 index 00000000000..3b1e4c1aa1f --- /dev/null +++ b/tests/queries/0_stateless/02443_detach_attach_partition.sh @@ -0,0 +1,94 @@ +#!/usr/bin/env bash +# Tags: race, zookeeper, long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh +# shellcheck source=./replication.lib +. "$CURDIR"/replication.lib + + +$CLICKHOUSE_CLIENT -n -q " + DROP TABLE IF EXISTS alter_table0; + DROP TABLE IF EXISTS alter_table1; + + CREATE TABLE alter_table0 (a UInt8, b Int16) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r1') ORDER BY a; + CREATE TABLE alter_table1 (a UInt8, b Int16) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r2') ORDER BY a; +" || exit 1 + +function thread_detach() +{ + while true; do + $CLICKHOUSE_CLIENT -mn -q "ALTER TABLE alter_table$(($RANDOM % 2)) DETACH PARTITION ID 'all'; SELECT sleep($RANDOM / 32000) format Null;" 2>/dev/null ||: + done +} +function thread_attach() +{ + while true; do + $CLICKHOUSE_CLIENT -mn -q "ALTER TABLE alter_table$(($RANDOM % 2)) ATTACH PARTITION ID 'all'; SELECT sleep($RANDOM / 32000) format Null;" 2>/dev/null ||: + done +} + +insert_type=$(($RANDOM % 3)) + +engine=$($CLICKHOUSE_CLIENT -q "SELECT engine FROM system.tables WHERE database=currentDatabase() AND table='alter_table0'") +if [[ "$engine" == "ReplicatedMergeTree" ]]; then + insert_type=$(($RANDOM % 2)) +fi +$CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'insert_type $insert_type' FORMAT Null" + +function insert() +{ + # Fault injection may lead to duplicates + if [[ "$insert_type" -eq 0 ]]; then + $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "INSERT INTO alter_table$(($RANDOM % 2)) SELECT $RANDOM, $1" 2>/dev/null + elif [[ "$insert_type" -eq 1 ]]; then + $CLICKHOUSE_CLIENT -q "INSERT INTO alter_table$(($RANDOM % 2)) SELECT $1, $1" 2>/dev/null + else + # It may reproduce something interesting: if the insert status is unknown (due to fault injection in retries) + # and the part was committed locally but not in zk, then it will be active and DETACH may detach it. + # And we will ATTACH it later. But the next INSERT attempt will not be deduplicated because the first one failed. + # So we will get duplicates. + $CLICKHOUSE_CLIENT --insert_deduplication_token=$1 -q "INSERT INTO alter_table$(($RANDOM % 2)) SELECT $RANDOM, $1" 2>/dev/null + fi +} + +thread_detach & PID_1=$! +thread_attach & PID_2=$! +thread_detach & PID_3=$! +thread_attach & PID_4=$! + +function do_inserts() +{ + for i in {1..30}; do + while ! insert $i; do $CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'retrying insert $i' FORMAT Null"; done + done +} + +$CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'begin inserts'" +do_inserts 2>&1| grep -Fa "Exception: " | grep -Fv "was cancelled by concurrent ALTER PARTITION" +$CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'end inserts'" + +kill -TERM $PID_1 && kill -TERM $PID_2 && kill -TERM $PID_3 && kill -TERM $PID_4 +wait + +$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table0" +$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1" +query_with_retry "ALTER TABLE alter_table0 ATTACH PARTITION ID 'all'" 2>/dev/null; +$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table1 ATTACH PARTITION ID 'all'" 2>/dev/null +$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1" +$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table1 ATTACH PARTITION ID 'all'" +$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table0" +$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1" + +if [[ "$engine" == "ReplicatedMergeTree" ]]; then + # ReplicatedMergeTree may duplicate data on ATTACH PARTITION (when one replica has a merged part and another replica has source parts only) + $CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE alter_table0 FINAL DEDUPLICATE" + $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1" +fi + +$CLICKHOUSE_CLIENT -q "SELECT count(), sum(b) FROM alter_table0" +$CLICKHOUSE_CLIENT -q "SELECT count(), sum(b) FROM alter_table1" + +$CLICKHOUSE_CLIENT -q "DROP TABLE alter_table0" +$CLICKHOUSE_CLIENT -q "DROP TABLE alter_table1" diff --git a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh index abcf1bf4c5b..03c43843d3a 100755 --- a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh +++ b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-upgrade-check +# Tags: long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Test that running distributed query and cancel it ASAP, # this can trigger a hung/deadlock in ProcessorList. -for i in {1..100}; do +for i in {1..50}; do query_id="$CLICKHOUSE_TEST_UNIQUE_NAME-$i" $CLICKHOUSE_CLIENT --format Null --query_id "$query_id" -q "select * from remote('127.{1|2|3|4|5|6}', numbers(1e12))" 2>/dev/null & while :; do diff --git a/tests/queries/0_stateless/02457_morton_coding.sql b/tests/queries/0_stateless/02457_morton_coding.sql index 4fc26f255f4..955cb2e053b 100644 --- a/tests/queries/0_stateless/02457_morton_coding.sql +++ b/tests/queries/0_stateless/02457_morton_coding.sql @@ -11,7 +11,7 @@ create table morton_numbers_02457( n8 UInt8 ) Engine=MergeTree() - ORDER BY n1; + ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; SELECT '----- CONST -----'; select mortonEncode(1,2,3,4); @@ -45,7 +45,7 @@ create table morton_numbers_1_02457( n8 UInt64 ) Engine=MergeTree() - ORDER BY n1; + ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into morton_numbers_1_02457 select untuple(mortonDecode(8, mortonEncode(n1, n2, n3, n4, n5, n6, n7, n8))) @@ -80,7 +80,7 @@ create table morton_numbers_2_02457( n4 UInt64 ) Engine=MergeTree() - ORDER BY n1; + ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into morton_numbers_2_02457 select untuple(mortonDecode(4, mortonEncode(n1, n2, n3, n4))) @@ -114,7 +114,7 @@ create table morton_numbers_3_02457( n2 UInt64 ) Engine=MergeTree() - ORDER BY n1; + ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into morton_numbers_3_02457 select untuple(mortonDecode(2, mortonEncode(n1, n2))) diff --git a/tests/queries/0_stateless/02457_morton_coding_with_mask.sql b/tests/queries/0_stateless/02457_morton_coding_with_mask.sql index 5aeb1f380be..c95205769d2 100644 --- a/tests/queries/0_stateless/02457_morton_coding_with_mask.sql +++ b/tests/queries/0_stateless/02457_morton_coding_with_mask.sql @@ -20,7 +20,7 @@ create table morton_numbers_mask_02457( n4 UInt8 ) Engine=MergeTree() - ORDER BY n1; + ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into morton_numbers_mask_02457 select n1.number, n2.number, n3.number, n4.number @@ -37,7 +37,7 @@ create table morton_numbers_mask_1_02457( n4 UInt64 ) Engine=MergeTree() - ORDER BY n1; + ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into morton_numbers_mask_1_02457 select untuple(mortonDecode((1,2,1,2), mortonEncode((1,2,1,2), n1, n2, n3, n4))) @@ -64,7 +64,7 @@ create table morton_numbers_mask_02457( n2 UInt8 ) Engine=MergeTree() - ORDER BY n1; + ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into morton_numbers_mask_02457 select n1.number, n2.number @@ -77,7 +77,7 @@ create table morton_numbers_mask_2_02457( n2 UInt64 ) Engine=MergeTree() - ORDER BY n1; + ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into morton_numbers_mask_2_02457 select untuple(mortonDecode((1,4), mortonEncode((1,4), n1, n2))) @@ -105,7 +105,7 @@ create table morton_numbers_mask_02457( n3 UInt8, ) Engine=MergeTree() - ORDER BY n1; + ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into morton_numbers_mask_02457 select n1.number, n2.number, n3.number @@ -120,7 +120,7 @@ create table morton_numbers_mask_3_02457( n3 UInt64 ) Engine=MergeTree() - ORDER BY n1; + ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into morton_numbers_mask_3_02457 select untuple(mortonDecode((1,1,2), mortonEncode((1,1,2), n1, n2, n3))) diff --git a/tests/queries/0_stateless/02481_merge_array_join_sample_by.sql b/tests/queries/0_stateless/02481_merge_array_join_sample_by.sql index 39fc751f331..1c2123a99d5 100644 --- a/tests/queries/0_stateless/02481_merge_array_join_sample_by.sql +++ b/tests/queries/0_stateless/02481_merge_array_join_sample_by.sql @@ -1,7 +1,7 @@ DROP TABLE IF EXISTS 02481_mergetree; DROP TABLE IF EXISTS 02481_merge; -CREATE TABLE 02481_mergetree(x UInt64, y UInt64, arr Array(String)) ENGINE = MergeTree ORDER BY x SAMPLE BY x; +CREATE TABLE 02481_mergetree(x UInt64, y UInt64, arr Array(String)) ENGINE = MergeTree ORDER BY x SAMPLE BY x SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; CREATE TABLE 02481_merge(x UInt64, y UInt64, arr Array(String)) ENGINE = Merge(currentDatabase(), '^(02481_mergetree)$'); diff --git a/tests/queries/0_stateless/02481_pk_analysis_with_enum_to_string.sql b/tests/queries/0_stateless/02481_pk_analysis_with_enum_to_string.sql index 91402bbed60..021a55ef2e8 100644 --- a/tests/queries/0_stateless/02481_pk_analysis_with_enum_to_string.sql +++ b/tests/queries/0_stateless/02481_pk_analysis_with_enum_to_string.sql @@ -10,7 +10,7 @@ CREATE TABLE gen ) ENGINE = GenerateRandom; -CREATE TABLE github_events AS gen ENGINE=MergeTree ORDER BY (event_type, repo_name, created_at); +CREATE TABLE github_events AS gen ENGINE=MergeTree ORDER BY (event_type, repo_name, created_at) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO github_events SELECT * FROM gen LIMIT 100000; diff --git a/tests/queries/0_stateless/02482_load_parts_refcounts.sh b/tests/queries/0_stateless/02482_load_parts_refcounts.sh index 4d588dabeb9..fe3cee1359e 100755 --- a/tests/queries/0_stateless/02482_load_parts_refcounts.sh +++ b/tests/queries/0_stateless/02482_load_parts_refcounts.sh @@ -5,23 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -function query_with_retry -{ - retry=0 - until [ $retry -ge 5 ] - do - result=$($CLICKHOUSE_CLIENT $2 --query="$1" 2>&1) - if [ "$?" == 0 ]; then - echo -n "$result" - return - else - retry=$(($retry + 1)) - sleep 3 - fi - done - echo "Query '$1' failed with '$result'" -} - $CLICKHOUSE_CLIENT -n --query " DROP TABLE IF EXISTS load_parts_refcounts SYNC; diff --git a/tests/queries/0_stateless/02483_elapsed_time.sh b/tests/queries/0_stateless/02483_elapsed_time.sh index e3b983129fb..fdb23d6da01 100755 --- a/tests/queries/0_stateless/02483_elapsed_time.sh +++ b/tests/queries/0_stateless/02483_elapsed_time.sh @@ -32,7 +32,7 @@ OK_QUERY_JSON=" WITH ( SELECT sleepEachRow(1.0) ) AS sub -SELECT * +SELECT *, sub FROM ( SELECT * @@ -50,7 +50,7 @@ WITH ( SELECT * FROM ( - SELECT * + SELECT *, sub FROM system.one ) FORMAT XML diff --git a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh index 229c68bf8ec..63f912c6bff 100755 --- a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh +++ b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh @@ -22,7 +22,7 @@ SETTINGS min_bytes_for_wide_part = 0, type = cache, max_size = '128Mi', max_file_segment_size = '10Ki', - path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', + path = '${CLICKHOUSE_TEST_UNIQUE_NAME}', cache_on_write_operations = 1, enable_filesystem_query_cache_limit = 1, delayed_cleanup_interval_ms = 100, diff --git a/tests/queries/0_stateless/02505_forbid_paths_in_datetime_timezone.sql b/tests/queries/0_stateless/02505_forbid_paths_in_datetime_timezone.sql index b08abcb8a19..63edad6c9e3 100644 --- a/tests/queries/0_stateless/02505_forbid_paths_in_datetime_timezone.sql +++ b/tests/queries/0_stateless/02505_forbid_paths_in_datetime_timezone.sql @@ -1,6 +1,6 @@ -select toDateTime(0, '/abc'); -- { serverError POCO_EXCEPTION } -select toDateTime(0, './abc'); -- { serverError POCO_EXCEPTION } -select toDateTime(0, '../abc'); -- { serverError POCO_EXCEPTION } -select toDateTime(0, '~/abc'); -- { serverError POCO_EXCEPTION } -select toDateTime(0, 'abc/../../cba'); -- { serverError POCO_EXCEPTION } +select toDateTime(0, '/abc'); -- { serverError BAD_ARGUMENTS } +select toDateTime(0, './abc'); -- { serverError BAD_ARGUMENTS } +select toDateTime(0, '../abc'); -- { serverError BAD_ARGUMENTS } +select toDateTime(0, '~/abc'); -- { serverError BAD_ARGUMENTS } +select toDateTime(0, 'abc/../../cba'); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02521_aggregation_by_partitions.sql b/tests/queries/0_stateless/02521_aggregation_by_partitions.sql index b7d4a6ee93a..73d58bb6d6c 100644 --- a/tests/queries/0_stateless/02521_aggregation_by_partitions.sql +++ b/tests/queries/0_stateless/02521_aggregation_by_partitions.sql @@ -18,7 +18,7 @@ select count() from (select throwIf(count() != 2) from t1 group by a); drop table t1; -create table t2(a UInt32) engine=MergeTree order by tuple() partition by a % 8; +create table t2(a UInt32) engine=MergeTree order by tuple() partition by a % 8 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; system stop merges t2; @@ -31,7 +31,7 @@ select count() from (select throwIf(count() != 2) from t2 group by a); drop table t2; -create table t3(a UInt32) engine=MergeTree order by tuple() partition by a % 16; +create table t3(a UInt32) engine=MergeTree order by tuple() partition by a % 16 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; system stop merges t3; @@ -53,7 +53,7 @@ drop table t3; set optimize_aggregation_in_order = 1; -create table t4(a UInt32) engine=MergeTree order by a partition by a % 4; +create table t4(a UInt32) engine=MergeTree order by a partition by a % 4 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; system stop merges t4; @@ -66,7 +66,7 @@ select count() from (select throwIf(count() != 2) from t4 group by a); drop table t4; -create table t5(a UInt32) engine=MergeTree order by a partition by a % 8; +create table t5(a UInt32) engine=MergeTree order by a partition by a % 8 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; system stop merges t5; @@ -79,7 +79,7 @@ select count() from (select throwIf(count() != 2) from t5 group by a); drop table t5; -create table t6(a UInt32) engine=MergeTree order by a partition by a % 16; +create table t6(a UInt32) engine=MergeTree order by a partition by a % 16 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; system stop merges t6; @@ -94,7 +94,7 @@ drop table t6; set optimize_aggregation_in_order = 0; -create table t7(a UInt32) engine=MergeTree order by a partition by intDiv(a, 2); +create table t7(a UInt32) engine=MergeTree order by a partition by intDiv(a, 2) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into t7 select number from numbers_mt(100); @@ -104,7 +104,7 @@ select replaceRegexpOne(explain, '^[ ]*(.*)', '\\1') from ( drop table t7; -create table t8(a UInt32) engine=MergeTree order by a partition by intDiv(a, 2) * 2 + 1; +create table t8(a UInt32) engine=MergeTree order by a partition by intDiv(a, 2) * 2 + 1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into t8 select number from numbers_mt(100); @@ -114,7 +114,7 @@ select replaceRegexpOne(explain, '^[ ]*(.*)', '\\1') from ( drop table t8; -create table t9(a UInt32) engine=MergeTree order by a partition by intDiv(a, 2); +create table t9(a UInt32) engine=MergeTree order by a partition by intDiv(a, 2) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into t9 select number from numbers_mt(100); @@ -124,7 +124,7 @@ select replaceRegexpOne(explain, '^[ ]*(.*)', '\\1') from ( drop table t9; -create table t10(a UInt32, b UInt32) engine=MergeTree order by a partition by (intDiv(a, 2), intDiv(b, 3)); +create table t10(a UInt32, b UInt32) engine=MergeTree order by a partition by (intDiv(a, 2), intDiv(b, 3)) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into t10 select number, number from numbers_mt(100); @@ -135,7 +135,7 @@ select replaceRegexpOne(explain, '^[ ]*(.*)', '\\1') from ( drop table t10; -- multiplication by 2 is not injective, so optimization is not applicable -create table t11(a UInt32, b UInt32) engine=MergeTree order by a partition by (intDiv(a, 2), intDiv(b, 3)); +create table t11(a UInt32, b UInt32) engine=MergeTree order by a partition by (intDiv(a, 2), intDiv(b, 3)) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into t11 select number, number from numbers_mt(100); @@ -155,7 +155,7 @@ select replaceRegexpOne(explain, '^[ ]*(.*)', '\\1') from ( drop table t12; -create table t13(a UInt32, b UInt32) engine=MergeTree order by a partition by (intDiv(a, 2), intDiv(b, 3)); +create table t13(a UInt32, b UInt32) engine=MergeTree order by a partition by (intDiv(a, 2), intDiv(b, 3)) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into t13 select number, number from numbers_mt(100); @@ -165,7 +165,7 @@ select replaceRegexpOne(explain, '^[ ]*(.*)', '\\1') from ( drop table t13; -create table t14(a UInt32, b UInt32) engine=MergeTree order by a partition by intDiv(a, 2) + intDiv(b, 3); +create table t14(a UInt32, b UInt32) engine=MergeTree order by a partition by intDiv(a, 2) + intDiv(b, 3) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into t14 select number, number from numbers_mt(100); @@ -176,7 +176,7 @@ select replaceRegexpOne(explain, '^[ ]*(.*)', '\\1') from ( drop table t14; -- to few partitions -- -create table t15(a UInt32, b UInt32) engine=MergeTree order by a partition by a < 90; +create table t15(a UInt32, b UInt32) engine=MergeTree order by a partition by a < 90 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into t15 select number, number from numbers_mt(100); @@ -188,7 +188,7 @@ settings force_aggregate_partitions_independently = 0; drop table t15; -- to many partitions -- -create table t16(a UInt32, b UInt32) engine=MergeTree order by a partition by a % 16; +create table t16(a UInt32, b UInt32) engine=MergeTree order by a partition by a % 16 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into t16 select number, number from numbers_mt(100); @@ -200,7 +200,7 @@ settings force_aggregate_partitions_independently = 0, max_number_of_partitions_ drop table t16; -- to big skew -- -create table t17(a UInt32, b UInt32) engine=MergeTree order by a partition by a < 90; +create table t17(a UInt32, b UInt32) engine=MergeTree order by a partition by a < 90 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into t17 select number, number from numbers_mt(100); @@ -211,7 +211,7 @@ settings force_aggregate_partitions_independently = 0, max_threads = 4; drop table t17; -create table t18(a UInt32, b UInt32) engine=MergeTree order by a partition by a; +create table t18(a UInt32, b UInt32) engine=MergeTree order by a partition by a SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into t18 select number, number from numbers_mt(50); @@ -221,7 +221,7 @@ select replaceRegexpOne(explain, '^[ ]*(.*)', '\\1') from ( drop table t18; -create table t19(a UInt32, b UInt32) engine=MergeTree order by a partition by a; +create table t19(a UInt32, b UInt32) engine=MergeTree order by a partition by a SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into t19 select number, number from numbers_mt(50); @@ -231,7 +231,7 @@ select replaceRegexpOne(explain, '^[ ]*(.*)', '\\1') from ( drop table t19; -create table t20(a UInt32, b UInt32) engine=MergeTree order by a partition by a; +create table t20(a UInt32, b UInt32) engine=MergeTree order by a partition by a SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into t20 select number, number from numbers_mt(50); @@ -241,7 +241,7 @@ select replaceRegexpOne(explain, '^[ ]*(.*)', '\\1') from ( drop table t20; -create table t21(a UInt64, b UInt64) engine=MergeTree order by a partition by a % 16; +create table t21(a UInt64, b UInt64) engine=MergeTree order by a partition by a % 16 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into t21 select number, number from numbers_mt(1e6); @@ -249,7 +249,7 @@ select a from t21 group by a limit 10 format Null; drop table t21; -create table t22(a UInt32, b UInt32) engine=SummingMergeTree order by a partition by a % 16; +create table t22(a UInt32, b UInt32) engine=SummingMergeTree order by a partition by a % 16 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into t22 select number, number from numbers_mt(1e6); diff --git a/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.sql b/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.sql index 1600761bb84..6bb8b5444e5 100644 --- a/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.sql +++ b/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.sql @@ -3,7 +3,7 @@ DROP TABLE IF EXISTS lwd_test_02521; CREATE TABLE lwd_test_02521 (id UInt64, value String, event_time DateTime) ENGINE MergeTree() ORDER BY id -SETTINGS min_bytes_for_wide_part = 0; +SETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO lwd_test_02521 SELECT number, randomString(10), now() - INTERVAL 2 MONTH FROM numbers(50000); INSERT INTO lwd_test_02521 SELECT number, randomString(10), now() FROM numbers(50000); @@ -42,4 +42,4 @@ SELECT 'Count', count() FROM lwd_test_02521; -- { echoOff } -DROP TABLE lwd_test_02521; \ No newline at end of file +DROP TABLE lwd_test_02521; diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference index ccc514e7ea2..e3fae07333a 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.reference +++ b/tests/queries/0_stateless/02534_keyed_siphash.reference @@ -191,9 +191,48 @@ E51B38608EF25F57 1 1 E28DBDE7FE22E41C -1 +1CE422FEE7BD8DE20000000000000000 E28DBDE7FE22E41C -1 +1CE422FEE7BD8DE20000000000000000 Check bug with hashing of const integer values 11862823756610506724 11862823756610506724 +86AE90BB6A238D3F6221457630142C9B +86AE90BB6A238D3F6221457630142C9B +Check memsan bug +18096612095653370192 +20AF99D3A87829E0 +12489502208762728797 +Check const columns +15080046610211022027 +15080046610211022027 +15080046610211022027 +15080046610211022027 +2E779C73D13981AA1AE19AFF9617EA49 +2E779C73D13981AA1AE19AFF9617EA49 +2E779C73D13981AA1AE19AFF9617EA49 +2E779C73D13981AA1AE19AFF9617EA49 +Check multiple keys as tuple from a table +11862823756610506724 +9357996107237883963 +86AE90BB6A238D3F6221457630142C9B +F6D93D8FEA6D7DECCDD95A7A0A2AA36D +Check multiple keys as separate ints from a table +11862823756610506724 +9357996107237883963 +86AE90BB6A238D3F6221457630142C9B +F6D93D8FEA6D7DECCDD95A7A0A2AA36D +Check constant key and data from a table +11862823756610506724 +11862823756610506724 +86AE90BB6A238D3F6221457630142C9B +86AE90BB6A238D3F6221457630142C9B +Check multiple keys as separate ints from a table with constant data +11862823756610506724 +9357996107237883963 +86AE90BB6A238D3F6221457630142C9B +F6D93D8FEA6D7DECCDD95A7A0A2AA36D +Check asan bug +0 +Check bug found fuzzing +9042C6691B1A75F0EA3314B6F55728BB diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql index 900b99f548a..112ae15bf46 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.sql +++ b/tests/queries/0_stateless/02534_keyed_siphash.sql @@ -263,15 +263,15 @@ select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)); select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)); -select sipHash64Keyed((0, 0), '1'); -- { serverError 48 } -select sipHash128Keyed((0, 0), '1'); -- { serverError 48 } -select sipHash64Keyed(toUInt64(0), '1'); -- { serverError 48 } -select sipHash128Keyed(toUInt64(0), '1'); -- { serverError 48 } +select sipHash64Keyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED } +select sipHash128Keyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED } +select sipHash64Keyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED } +select sipHash128Keyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED } select hex(sipHash64()); -SELECT hex(sipHash128()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128()) = '1CE422FEE7BD8DE20000000000000000'; +SELECT hex(sipHash128()); select hex(sipHash64Keyed()); -SELECT hex(sipHash128Keyed()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128Keyed()) = '1CE422FEE7BD8DE20000000000000000'; +SELECT hex(sipHash128Keyed()); SELECT 'Check bug with hashing of const integer values'; DROP TABLE IF EXISTS tab; @@ -280,4 +280,61 @@ INSERT INTO tab VALUES ((2, 2), 4); -- these two statements must produce the same result SELECT sipHash64Keyed(key, val) FROM tab; SELECT sipHash64Keyed(key, 4::UInt64) FROM tab; +SELECT hex(sipHash128Keyed(key, val)) FROM tab; +SELECT hex(sipHash128Keyed(key, 4::UInt64)) FROM tab; DROP TABLE tab; + +SELECT 'Check memsan bug'; +SELECT sipHash64Keyed((2::UInt64, toUInt64(2)), 4) GROUP BY toUInt64(2); +SELECT hex(sipHash64Keyed((toUInt64(9223372036854775806), toUInt64(-9223372036854775808)), char(2147483646, -2147483648, 1, 3, 4, 7, 2147483647))) GROUP BY toUInt64(257), (toUInt64(9223372036854775806), toUInt64(2147483646)); +SELECT sipHash64Keyed((toUInt64(9223372036854775806), 9223372036854775808::UInt64), char(2)) GROUP BY toUInt64(9223372036854775806); + +SELECT 'Check const columns'; +DROP TABLE IF EXISTS sipHashKeyed_test; +CREATE TABLE sipHashKeyed_test ENGINE = Memory() AS SELECT 1 a, 'test' b; +SELECT sipHash64Keyed((toUInt64(0), toUInt64(0)), 1, 'test'); +SELECT sipHash64(tuple(*)) FROM sipHashKeyed_test; +SELECT sipHash64Keyed((toUInt64(0), toUInt64(0)), tuple(*)) FROM sipHashKeyed_test; +SELECT sipHash64Keyed((toUInt64(0), toUInt64(0)), a, b) FROM sipHashKeyed_test; +SELECT hex(sipHash128Keyed((toUInt64(0), toUInt64(0)), 1, 'test')); +SELECT hex(sipHash128(tuple(*))) FROM sipHashKeyed_test; +SELECT hex(sipHash128Keyed((toUInt64(0), toUInt64(0)), tuple(*))) FROM sipHashKeyed_test; +SELECT hex(sipHash128Keyed((toUInt64(0), toUInt64(0)), a, b)) FROM sipHashKeyed_test; +DROP TABLE sipHashKeyed_test; + +SELECT 'Check multiple keys as tuple from a table'; +DROP TABLE IF EXISTS sipHashKeyed_keys; +CREATE TABLE sipHashKeyed_keys (key Tuple(UInt64, UInt64), val UInt64) ENGINE=Memory; +INSERT INTO sipHashKeyed_keys VALUES ((2, 2), 4); +INSERT INTO sipHashKeyed_keys VALUES ((4, 4), 4); +SELECT sipHash64Keyed(key, val) FROM sipHashKeyed_keys ORDER by key; +SELECT hex(sipHash128Keyed(key, val)) FROM sipHashKeyed_keys ORDER by key; +DROP TABLE sipHashKeyed_keys; + +SELECT 'Check multiple keys as separate ints from a table'; +DROP TABLE IF EXISTS sipHashKeyed_keys; +CREATE TABLE sipHashKeyed_keys (key0 UInt64, key1 UInt64, val UInt64) ENGINE=Memory; +INSERT INTO sipHashKeyed_keys VALUES (2, 2, 4); +INSERT INTO sipHashKeyed_keys VALUES (4, 4, 4); +SELECT sipHash64Keyed((key0, key1), val) FROM sipHashKeyed_keys ORDER by key0; +SELECT hex(sipHash128Keyed((key0, key1), val)) FROM sipHashKeyed_keys ORDER by key0; +SELECT 'Check constant key and data from a table'; +SELECT sipHash64Keyed((2::UInt64, 2::UInt64), val) FROM sipHashKeyed_keys ORDER by val; +SELECT hex(sipHash128Keyed((2::UInt64, 2::UInt64), val)) FROM sipHashKeyed_keys ORDER by val; +DROP TABLE sipHashKeyed_keys; + +SELECT 'Check multiple keys as separate ints from a table with constant data'; +DROP TABLE IF EXISTS sipHashKeyed_keys; +CREATE TABLE sipHashKeyed_keys (key0 UInt64, key1 UInt64) ENGINE=Memory; +INSERT INTO sipHashKeyed_keys VALUES (2, 2); +INSERT INTO sipHashKeyed_keys VALUES (4, 4); +SELECT sipHash64Keyed((key0, key1), 4::UInt64) FROM sipHashKeyed_keys ORDER by key0; +SELECT hex(sipHash128Keyed((key0, key1), 4::UInt64)) FROM sipHashKeyed_keys ORDER by key0; +DROP TABLE sipHashKeyed_keys; + +SELECT 'Check asan bug'; +SELECT sipHash128((toUInt64(9223372036854775806), 1)) = sipHash128(1) GROUP BY sipHash128(1::UInt8), toUInt64(9223372036854775806); + +SELECT 'Check bug found fuzzing'; +SELECT [(255, 1048575)], sipHash128ReferenceKeyed((toUInt64(2147483646), toUInt64(9223372036854775807)), ([(NULL, 100), (NULL, NULL), (1024, 10)], toUInt64(2), toUInt64(1024)), ''), hex(sipHash128ReferenceKeyed((-9223372036854775807, 1.), '-1', NULL)), ('', toUInt64(65535), [(9223372036854775807, 9223372036854775806)], toUInt64(65536)), arrayJoin((NULL, 65537, 255), [(NULL, NULL)]) GROUP BY tupleElement((NULL, NULL, NULL, -1), toUInt64(2), 2) = NULL; -- { serverError NOT_IMPLEMENTED } +SELECT hex(sipHash128ReferenceKeyed((0::UInt64, 0::UInt64), ([1, 1]))); diff --git a/tests/queries/0_stateless/02536_delta_gorilla_corruption.sql b/tests/queries/0_stateless/02536_delta_gorilla_corruption.sql index 197a8ad7221..a4e0965e329 100644 --- a/tests/queries/0_stateless/02536_delta_gorilla_corruption.sql +++ b/tests/queries/0_stateless/02536_delta_gorilla_corruption.sql @@ -11,7 +11,7 @@ drop table if exists bug_delta_gorilla; create table bug_delta_gorilla (value_bug UInt64 codec (Delta, Gorilla)) engine = MergeTree -order by tuple() +order by tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi' as (select 0 from numbers(30000000)); select count(*) @@ -32,7 +32,7 @@ select 'The same issue in a much smaller repro happens also in Debug builds'; create table bug_delta_gorilla (val UInt64 codec (Delta, Gorilla)) engine = MergeTree -order by val; +order by val SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into bug_delta_gorilla values (0)(1)(3); select * from bug_delta_gorilla; diff --git a/tests/queries/0_stateless/02552_siphash128_reference.reference b/tests/queries/0_stateless/02552_siphash128_reference.reference index d00491fd7e5..ece9f6a4615 100644 --- a/tests/queries/0_stateless/02552_siphash128_reference.reference +++ b/tests/queries/0_stateless/02552_siphash128_reference.reference @@ -1 +1,152 @@ +A3817F04BA25A8E66DF67214C7550293 +DA87C1D86B99AF44347659119B22FC45 +8177228DA4A45DC7FCA38BDEF60AFFE4 +9C70B60C5267A94E5F33B6B02985ED51 +F88164C12D9C8FAF7D0F6E7C7BCD5579 +1368875980776F8854527A07690E9627 +14EECA338B208613485EA0308FD7A15E +A1F1EBBED8DBC153C0B84AA61FF08239 +3B62A9BA6258F5610F83E264F31497B4 +264499060AD9BAABC47F8B02BB6D71ED +00110DC378146956C95447D3F3D0FBBA +0151C568386B6677A2B4DC6F81E5DC18 +D626B266905EF35882634DF68532C125 +9869E247E9C08B10D029934FC4B952F7 +31FCEFAC66D7DE9C7EC7485FE4494902 +5493E99933B0A8117E08EC0F97CFC3D9 +6EE2A4CA67B054BBFD3315BF85230577 +473D06E8738DB89854C066C47AE47740 +A426E5E423BF4885294DA481FEAEF723 +78017731CF65FAB074D5208952512EB1 +9E25FC833F2290733E9344A5E83839EB +568E495ABE525A218A2214CD3E071D12 +4A29B54552D16B9A469C10528EFF0AAE +C9D184DDD5A9F5E0CF8CE29A9ABF691C +2DB479AE78BD50D8882A8A178A6132AD +8ECE5F042D5E447B5051B9EACB8D8F6F +9C0B53B4B3C307E87EAEE08678141F66 +ABF248AF69A6EAE4BFD3EB2F129EEB94 +0664DA1668574B88B935F3027358AEF4 +AA4B9DC4BF337DE90CD4FD3C467C6AB7 +EA5C7F471FAF6BDE2B1AD7D4686D2287 +2939B0183223FAFC1723DE4F52C43D35 +7C3956CA5EEAFC3E363E9D556546EB68 +77C6077146F01C32B6B69D5F4EA9FFCF +37A6986CB8847EDF0925F0F1309B54DE +A705F0E69DA9A8F907241A2E923C8CC8 +3DC47D1F29C448461E9E76ED904F6711 +0D62BF01E6FC0E1A0D3C4751C5D3692B +8C03468BCA7C669EE4FD5E084BBEE7B5 +528A5BB93BAF2C9C4473CCE5D0D22BD9 +DF6A301E95C95DAD97AE0CC8C6913BD8 +801189902C857F39E73591285E70B6DB +E617346AC9C231BB3650AE34CCCA0C5B +27D93437EFB721AA401821DCEC5ADF89 +89237D9DED9C5E78D8B1C9B166CC7342 +4A6D8091BF5E7D651189FA94A250B14C +0E33F96055E7AE893FFC0E3DCF492902 +E61C432B720B19D18EC8D84BDC63151B +F7E5AEF549F782CF379055A608269B16 +438D030FD0B7A54FA837F2AD201A6403 +A590D3EE4FBF04E3247E0D27F286423F +5FE2C1A172FE93C4B15CD37CAEF9F538 +2C97325CBD06B36EB2133DD08B3A017C +92C814227A6BCA949FF0659F002AD39E +DCE850110BD8328CFBD50841D6911D87 +67F14984C7DA791248E32BB5922583DA +1938F2CF72D54EE97E94166FA91D2A36 +74481E9646ED49FE0F6224301604698E +57FCA5DE98A9D6D8006438D0583D8A1D +9FECDE1CEFDC1CBED4763674D9575359 +E3040C00EB28F15366CA73CBD872E740 +7697009A6A831DFECCA91C5993670F7A +5853542321F567A005D547A4F04759BD +5150D1772F50834A503E069A973FBD7C 1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +Check bug with hashing of const integer values +E940B12600C844966162FF8FE7A16AAE +E940B12600C844966162FF8FE7A16AAE +Check memsan bug +1CE422FEE7BD8DE20000000000000000 +Check const columns +B66B53476BDBEB8549A257E3B1766C30 +B66B53476BDBEB8549A257E3B1766C30 +B66B53476BDBEB8549A257E3B1766C30 +B66B53476BDBEB8549A257E3B1766C30 +Check multiple keys as tuple from a table +E940B12600C844966162FF8FE7A16AAE +EC58946A98A0D37F4E3FAC02FBBA9480 +Check multiple keys as separate ints from a table +E940B12600C844966162FF8FE7A16AAE +EC58946A98A0D37F4E3FAC02FBBA9480 +Check constant key and data from a table +E940B12600C844966162FF8FE7A16AAE +E940B12600C844966162FF8FE7A16AAE +Check multiple keys as separate ints from a table with constant data +E940B12600C844966162FF8FE7A16AAE +EC58946A98A0D37F4E3FAC02FBBA9480 diff --git a/tests/queries/0_stateless/02552_siphash128_reference.sql b/tests/queries/0_stateless/02552_siphash128_reference.sql index 200954c3b57..f7324ed0ee4 100644 --- a/tests/queries/0_stateless/02552_siphash128_reference.sql +++ b/tests/queries/0_stateless/02552_siphash128_reference.sql @@ -1 +1,254 @@ +-- Test Vectors from the SipHash reference C implementation: +-- Written by +-- Jean-Philippe Aumasson +-- Daniel J. Bernstein +-- Released under CC0 +-- https://github.com/veorq/SipHash/blob/eee7d0d84dc7731df2359b243aa5e75d85f6eaef/vectors.h#L645 + +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + '')); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61))); +select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62))); + +-- CH tests +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0)) == sipHash128Reference(char(0)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1)) == sipHash128Reference(char(0, 1)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2)) == sipHash128Reference(char(0, 1, 2)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3)) == sipHash128Reference(char(0, 1, 2, 3)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4)) == sipHash128Reference(char(0, 1, 2, 3, 4)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)); +select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)); + +select sipHash128ReferenceKeyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED } +select sipHash128ReferenceKeyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED } + SELECT hex(sipHash128Reference()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128()) = '1CE422FEE7BD8DE20000000000000000'; +SELECT hex(sipHash128ReferenceKeyed()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128Keyed()) = '1CE422FEE7BD8DE20000000000000000'; + +SELECT 'Check bug with hashing of const integer values'; +DROP TABLE IF EXISTS tab; +CREATE TABLE tab (key Tuple(UInt64, UInt64), val UInt64) ENGINE=Memory; +INSERT INTO tab VALUES ((2, 2), 4); +-- these two statements must produce the same result +SELECT hex(sipHash128ReferenceKeyed(key, val)) FROM tab; +SELECT hex(sipHash128ReferenceKeyed(key, 4::UInt64)) FROM tab; +DROP TABLE tab; + +SELECT 'Check memsan bug'; +SELECT hex(sipHash128ReferenceKeyed((toUInt64(2), toUInt64(-9223372036854775807)))) GROUP BY (toUInt64(506097522914230528), toUInt64(now64(2, NULL + NULL), 1084818905618843912)), toUInt64(2), NULL + NULL, char(-2147483649, 1); + +SELECT 'Check const columns'; +DROP TABLE IF EXISTS sipHashKeyed_test; +CREATE TABLE sipHashKeyed_test ENGINE = Memory() AS SELECT 1 a, 'test' b; +SELECT hex(sipHash128ReferenceKeyed((toUInt64(0), toUInt64(0)), 1, 'test')); +SELECT hex(sipHash128Reference(tuple(*))) FROM sipHashKeyed_test; +SELECT hex(sipHash128ReferenceKeyed((toUInt64(0), toUInt64(0)), tuple(*))) FROM sipHashKeyed_test; +SELECT hex(sipHash128ReferenceKeyed((toUInt64(0), toUInt64(0)), a, b)) FROM sipHashKeyed_test; +DROP TABLE sipHashKeyed_test; + +SELECT 'Check multiple keys as tuple from a table'; +DROP TABLE IF EXISTS sipHashKeyed_keys; +CREATE TABLE sipHashKeyed_keys (key Tuple(UInt64, UInt64), val UInt64) ENGINE=Memory; +INSERT INTO sipHashKeyed_keys VALUES ((2, 2), 4); +INSERT INTO sipHashKeyed_keys VALUES ((4, 4), 4); +SELECT hex(sipHash128ReferenceKeyed(key, val)) FROM sipHashKeyed_keys ORDER by key; +DROP TABLE sipHashKeyed_keys; + +SELECT 'Check multiple keys as separate ints from a table'; +DROP TABLE IF EXISTS sipHashKeyed_keys; +CREATE TABLE sipHashKeyed_keys (key0 UInt64, key1 UInt64, val UInt64) ENGINE=Memory; +INSERT INTO sipHashKeyed_keys VALUES (2, 2, 4); +INSERT INTO sipHashKeyed_keys VALUES (4, 4, 4); +SELECT hex(sipHash128ReferenceKeyed((key0, key1), val)) FROM sipHashKeyed_keys ORDER by key0; +SELECT 'Check constant key and data from a table'; +SELECT hex(sipHash128ReferenceKeyed((2::UInt64, 2::UInt64), val)) FROM sipHashKeyed_keys ORDER by val; +DROP TABLE sipHashKeyed_keys; + +SELECT 'Check multiple keys as separate ints from a table with constant data'; +DROP TABLE IF EXISTS sipHashKeyed_keys; +CREATE TABLE sipHashKeyed_keys (key0 UInt64, key1 UInt64) ENGINE=Memory; +INSERT INTO sipHashKeyed_keys VALUES (2, 2); +INSERT INTO sipHashKeyed_keys VALUES (4, 4); +SELECT hex(sipHash128ReferenceKeyed((key0, key1), 4::UInt64)) FROM sipHashKeyed_keys ORDER by key0; +DROP TABLE sipHashKeyed_keys; diff --git a/tests/queries/0_stateless/02554_rewrite_count_distinct_if_with_count_distinct_implementation.reference b/tests/queries/0_stateless/02554_rewrite_count_distinct_if_with_count_distinct_implementation.reference new file mode 100644 index 00000000000..efc3cd99060 --- /dev/null +++ b/tests/queries/0_stateless/02554_rewrite_count_distinct_if_with_count_distinct_implementation.reference @@ -0,0 +1,6 @@ +2 +SELECT countDistinctIf(number % 10, (number % 5) = 2) +FROM numbers(1000) +2 +SELECT uniqExactIf(number % 10, (number % 5) = 2) +FROM numbers(1000) diff --git a/tests/queries/0_stateless/02554_rewrite_count_distinct_if_with_count_distinct_implementation.sql b/tests/queries/0_stateless/02554_rewrite_count_distinct_if_with_count_distinct_implementation.sql new file mode 100644 index 00000000000..a81f53c1ef6 --- /dev/null +++ b/tests/queries/0_stateless/02554_rewrite_count_distinct_if_with_count_distinct_implementation.sql @@ -0,0 +1,8 @@ +-- Tags: no-parallel +SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers(1000); +EXPLAIN SYNTAX SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers(1000); + +-- disable by default +SET rewrite_count_distinct_if_with_count_distinct_implementation = 1; +SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers(1000); +EXPLAIN SYNTAX SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers(1000); diff --git a/tests/queries/0_stateless/02561_sorting_constants_and_distinct_crash.sql b/tests/queries/0_stateless/02561_sorting_constants_and_distinct_crash.sql index 9b117773b9b..93a47c6736a 100644 --- a/tests/queries/0_stateless/02561_sorting_constants_and_distinct_crash.sql +++ b/tests/queries/0_stateless/02561_sorting_constants_and_distinct_crash.sql @@ -1,5 +1,5 @@ drop table if exists test_table; -CREATE TABLE test_table (string_value String) ENGINE = MergeTree ORDER BY string_value; +CREATE TABLE test_table (string_value String) ENGINE = MergeTree ORDER BY string_value SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; system stop merges test_table; insert into test_table select * from ( select 'test_value_1' diff --git a/tests/queries/0_stateless/02561_temporary_table_grants.sh b/tests/queries/0_stateless/02561_temporary_table_grants.sh index 6e0c96786e8..33784f1d536 100755 --- a/tests/queries/0_stateless/02561_temporary_table_grants.sh +++ b/tests/queries/0_stateless/02561_temporary_table_grants.sh @@ -10,24 +10,24 @@ user=user_$CLICKHOUSE_TEST_UNIQUE_NAME $CLICKHOUSE_CLIENT --query "DROP USER IF EXISTS $user" $CLICKHOUSE_CLIENT --query "CREATE USER $user IDENTIFIED WITH PLAINTEXT_PASSWORD BY 'hello'" -$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_memory_02561(name String)" 2>&1 | grep -F "Not enough privileges. To execute this query it's necessary to have grant CREATE TEMPORARY TABLE" > /dev/null && echo "OK" +$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_memory_02561(name String)" 2>&1 | grep -F "Not enough privileges. To execute this query, it's necessary to have the grant CREATE TEMPORARY TABLE" > /dev/null && echo "OK" $CLICKHOUSE_CLIENT --query "GRANT CREATE TEMPORARY TABLE ON *.* TO $user" $CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_memory_02561(name String)" -$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_merge_tree_02561(name String) ENGINE = MergeTree() ORDER BY name" 2>&1 | grep -F "Not enough privileges. To execute this query it's necessary to have grant CREATE ARBITRARY TEMPORARY TABLE" > /dev/null && echo "OK" +$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_merge_tree_02561(name String) ENGINE = MergeTree() ORDER BY name" 2>&1 | grep -F "Not enough privileges. To execute this query, it's necessary to have the grant CREATE ARBITRARY TEMPORARY TABLE" > /dev/null && echo "OK" $CLICKHOUSE_CLIENT --query "GRANT CREATE ARBITRARY TEMPORARY TABLE ON *.* TO $user" $CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_merge_tree_02561(name String) ENGINE = MergeTree() ORDER BY name" -$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_file_02561(name String) ENGINE = File(TabSeparated)" 2>&1 | grep -F "Not enough privileges. To execute this query it's necessary to have grant FILE" > /dev/null && echo "OK" +$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_file_02561(name String) ENGINE = File(TabSeparated)" 2>&1 | grep -F "Not enough privileges. To execute this query, it's necessary to have the grant FILE" > /dev/null && echo "OK" $CLICKHOUSE_CLIENT --query "GRANT FILE ON *.* TO $user" $CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_file_02561(name String) ENGINE = File(TabSeparated)" -$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_url_02561(name String) ENGINE = URL('http://127.0.0.1:8123?query=select+12', 'RawBLOB')" 2>&1 | grep -F "Not enough privileges. To execute this query it's necessary to have grant URL" > /dev/null && echo "OK" +$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_url_02561(name String) ENGINE = URL('http://127.0.0.1:8123?query=select+12', 'RawBLOB')" 2>&1 | grep -F "Not enough privileges. To execute this query, it's necessary to have the grant URL" > /dev/null && echo "OK" $CLICKHOUSE_CLIENT --query "GRANT URL ON *.* TO $user" diff --git a/tests/queries/0_stateless/02565_update_empty_nested.sql b/tests/queries/0_stateless/02565_update_empty_nested.sql index ca1c1f5f36e..33316847601 100644 --- a/tests/queries/0_stateless/02565_update_empty_nested.sql +++ b/tests/queries/0_stateless/02565_update_empty_nested.sql @@ -7,7 +7,7 @@ CREATE TABLE t_update_empty_nested ) ENGINE = MergeTree ORDER BY id -SETTINGS min_bytes_for_wide_part = 0; +SETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192, index_granularity_bytes = '10Mi'; SET mutations_sync = 2; diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.sql b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.sql index 7b52a89b16f..eff9e0fa825 100644 --- a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.sql +++ b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS 02581_trips; -CREATE TABLE 02581_trips(id UInt32, id2 UInt32, description String) ENGINE=MergeTree ORDER BY id; +CREATE TABLE 02581_trips(id UInt32, id2 UInt32, description String) ENGINE=MergeTree ORDER BY id SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; -- Make multiple parts INSERT INTO 02581_trips SELECT number, number, '' FROM numbers(10000); diff --git a/tests/queries/0_stateless/02596_build_set_and_remote.reference b/tests/queries/0_stateless/02596_build_set_and_remote.reference index 8d12196ae33..b506eaf574f 100644 --- a/tests/queries/0_stateless/02596_build_set_and_remote.reference +++ b/tests/queries/0_stateless/02596_build_set_and_remote.reference @@ -4,16 +4,32 @@ SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM system.one; SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one); 1 1 +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}'); +1 +1 SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one) GROUP BY NULL; 1 +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}') GROUP BY NULL; +1 SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one) GROUP BY 1; 1 +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}') GROUP BY 1; +1 SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one) GROUP BY 'A'; 1 +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}') GROUP BY 'A'; +1 SELECT 1 IN ( SELECT 1 ) FROM remote('127.0.0.{1,2}', system.one) GROUP BY dummy; 1 +SELECT 1 IN ( SELECT 1 ) FROM remote('127.0.0.{1,2}') GROUP BY dummy; +1 SELECT 1000.0001, toUInt64(arrayJoin([NULL, 257, 65536, NULL])), arrayExists(x -> (x IN (SELECT '2.55')), [-9223372036854775808]) FROM remote('127.0.0.{1,2}', system.one) GROUP BY NULL, NULL, NULL, NULL; 1000.0001 \N 0 1000.0001 257 0 1000.0001 65536 0 1000.0001 \N 0 +SELECT 1000.0001, toUInt64(arrayJoin([NULL, 257, 65536, NULL])), arrayExists(x -> (x IN (SELECT '2.55')), [-9223372036854775808]) FROM remote('127.0.0.{1,2}') GROUP BY NULL, NULL, NULL, NULL; +1000.0001 \N 0 +1000.0001 257 0 +1000.0001 65536 0 +1000.0001 \N 0 diff --git a/tests/queries/0_stateless/02596_build_set_and_remote.sql b/tests/queries/0_stateless/02596_build_set_and_remote.sql index 7a904344c91..4785446c1ab 100644 --- a/tests/queries/0_stateless/02596_build_set_and_remote.sql +++ b/tests/queries/0_stateless/02596_build_set_and_remote.sql @@ -2,13 +2,19 @@ SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM system.one; SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one); +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}'); SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one) GROUP BY NULL; +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}') GROUP BY NULL; SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one) GROUP BY 1; +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}') GROUP BY 1; SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one) GROUP BY 'A'; +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}') GROUP BY 'A'; SELECT 1 IN ( SELECT 1 ) FROM remote('127.0.0.{1,2}', system.one) GROUP BY dummy; +SELECT 1 IN ( SELECT 1 ) FROM remote('127.0.0.{1,2}') GROUP BY dummy; SELECT 1000.0001, toUInt64(arrayJoin([NULL, 257, 65536, NULL])), arrayExists(x -> (x IN (SELECT '2.55')), [-9223372036854775808]) FROM remote('127.0.0.{1,2}', system.one) GROUP BY NULL, NULL, NULL, NULL; +SELECT 1000.0001, toUInt64(arrayJoin([NULL, 257, 65536, NULL])), arrayExists(x -> (x IN (SELECT '2.55')), [-9223372036854775808]) FROM remote('127.0.0.{1,2}') GROUP BY NULL, NULL, NULL, NULL; diff --git a/tests/queries/0_stateless/02661_read_from_archive.lib b/tests/queries/0_stateless/02661_read_from_archive.lib new file mode 100644 index 00000000000..88b2c82f704 --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive.lib @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +function read_archive_file() { + $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/$1') ORDER BY 1, 2" + $CLICKHOUSE_CLIENT --query "SELECT * FROM file('${user_files_path}/$1') ORDER BY 1, 2" + $CLICKHOUSE_CLIENT --query "CREATE TABLE 02661_archive_table Engine=File('CSV', '${user_files_path}/$1')" + $CLICKHOUSE_CLIENT --query "SELECT * FROM 02661_archive_table ORDER BY 1, 2" + $CLICKHOUSE_CLIENT --query "DROP TABLE 02661_archive_table" +} + +function run_archive_test() { + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS 02661_archive_table" + + FILE_PREFIX="${CLICKHOUSE_TEST_UNIQUE_NAME}_$1_" + + user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + + echo -e "1,2\n3,4" > ${FILE_PREFIX}_data1.csv + echo -e "5,6\n7,8" > ${FILE_PREFIX}_data2.csv + echo -e "9,10\n11,12" > ${FILE_PREFIX}_data3.csv + + eval "$2 ${user_files_path}/${FILE_PREFIX}_archive1.$1 ${FILE_PREFIX}_data1.csv ${FILE_PREFIX}_data2.csv > /dev/null" + eval "$2 ${user_files_path}/${FILE_PREFIX}_archive2.$1 ${FILE_PREFIX}_data1.csv ${FILE_PREFIX}_data3.csv > /dev/null" + eval "$2 ${user_files_path}/${FILE_PREFIX}_archive3.$1 ${FILE_PREFIX}_data2.csv ${FILE_PREFIX}_data3.csv > /dev/null" + + echo "archive1 data1.csv" + read_archive_file "${FILE_PREFIX}_archive1.$1 :: ${FILE_PREFIX}_data1.csv" + echo "archive{1..2} data1.csv" + read_archive_file "${FILE_PREFIX}_archive{1..2}.$1 :: ${FILE_PREFIX}_data1.csv" + echo "archive{1,2} data{1,3}.csv" + read_archive_file "${FILE_PREFIX}_archive{1,2}.$1 :: ${FILE_PREFIX}_data{1,3}.csv" + echo "archive3 data*.csv" + read_archive_file "${FILE_PREFIX}_archive3.$1 :: ${FILE_PREFIX}_data*.csv" + echo "archive* *.csv" + read_archive_file "${FILE_PREFIX}_archive*.$1 :: *.csv" + echo "archive* {2..3}.csv" + read_archive_file "${FILE_PREFIX}_archive*.$1 :: ${FILE_PREFIX}_data{2..3}.csv" + + $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/${FILE_PREFIX}_archive1.$1::nonexistent.csv')" 2>&1 | grep -q "CANNOT_UNPACK_ARCHIVE" && echo "OK" || echo "FAIL" + $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/${FILE_PREFIX}_archive3.$1::{2..3}.csv')" 2>&1 | grep -q "CANNOT_UNPACK_ARCHIVE" && echo "OK" || echo "FAIL" + + rm ${user_files_path}/${FILE_PREFIX}_archive{1..3}.$1 + + rm ${FILE_PREFIX}_data{1..3}.csv +} \ No newline at end of file diff --git a/tests/queries/0_stateless/02661_read_from_archive_7z.reference b/tests/queries/0_stateless/02661_read_from_archive_7z.reference new file mode 100644 index 00000000000..27edb5536ad --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_7z.reference @@ -0,0 +1,116 @@ +archive1 data1.csv +1 2 +3 4 +1 2 +3 4 +1 2 +3 4 +archive{1..2} data1.csv +1 2 +1 2 +3 4 +3 4 +1 2 +1 2 +3 4 +3 4 +1 2 +1 2 +3 4 +3 4 +archive{1,2} data{1,3}.csv +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +archive3 data*.csv +5 6 +7 8 +9 10 +11 12 +5 6 +7 8 +9 10 +11 12 +5 6 +7 8 +9 10 +11 12 +archive* *.csv +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +archive* {2..3}.csv +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +OK +OK diff --git a/tests/queries/0_stateless/02661_read_from_archive_7z.sh b/tests/queries/0_stateless/02661_read_from_archive_7z.sh new file mode 100755 index 00000000000..62137c4bf77 --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_7z.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# shellcheck source=./02661_read_from_archive.lib +. "$CUR_DIR"/02661_read_from_archive.lib + +run_archive_test "7z" "7z a" \ No newline at end of file diff --git a/tests/queries/0_stateless/02661_read_from_archive_tar.reference b/tests/queries/0_stateless/02661_read_from_archive_tar.reference new file mode 100644 index 00000000000..27edb5536ad --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_tar.reference @@ -0,0 +1,116 @@ +archive1 data1.csv +1 2 +3 4 +1 2 +3 4 +1 2 +3 4 +archive{1..2} data1.csv +1 2 +1 2 +3 4 +3 4 +1 2 +1 2 +3 4 +3 4 +1 2 +1 2 +3 4 +3 4 +archive{1,2} data{1,3}.csv +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +archive3 data*.csv +5 6 +7 8 +9 10 +11 12 +5 6 +7 8 +9 10 +11 12 +5 6 +7 8 +9 10 +11 12 +archive* *.csv +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +archive* {2..3}.csv +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +OK +OK diff --git a/tests/queries/0_stateless/02661_read_from_archive_tar.sh b/tests/queries/0_stateless/02661_read_from_archive_tar.sh new file mode 100755 index 00000000000..7e7db389235 --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_tar.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# shellcheck source=./02661_read_from_archive.lib +. "$CUR_DIR"/02661_read_from_archive.lib + +run_archive_test "tar" "tar -cvf" \ No newline at end of file diff --git a/tests/queries/0_stateless/02661_read_from_archive_tarbzip2.reference b/tests/queries/0_stateless/02661_read_from_archive_tarbzip2.reference new file mode 100644 index 00000000000..27edb5536ad --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_tarbzip2.reference @@ -0,0 +1,116 @@ +archive1 data1.csv +1 2 +3 4 +1 2 +3 4 +1 2 +3 4 +archive{1..2} data1.csv +1 2 +1 2 +3 4 +3 4 +1 2 +1 2 +3 4 +3 4 +1 2 +1 2 +3 4 +3 4 +archive{1,2} data{1,3}.csv +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +archive3 data*.csv +5 6 +7 8 +9 10 +11 12 +5 6 +7 8 +9 10 +11 12 +5 6 +7 8 +9 10 +11 12 +archive* *.csv +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +archive* {2..3}.csv +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +OK +OK diff --git a/tests/queries/0_stateless/02661_read_from_archive_tarbzip2.sh b/tests/queries/0_stateless/02661_read_from_archive_tarbzip2.sh new file mode 100755 index 00000000000..4c3763629f4 --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_tarbzip2.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# shellcheck source=./02661_read_from_archive.lib +. "$CUR_DIR"/02661_read_from_archive.lib + +run_archive_test "tar.bz2" "tar -cjf" \ No newline at end of file diff --git a/tests/queries/0_stateless/02661_read_from_archive_targz.reference b/tests/queries/0_stateless/02661_read_from_archive_targz.reference new file mode 100644 index 00000000000..27edb5536ad --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_targz.reference @@ -0,0 +1,116 @@ +archive1 data1.csv +1 2 +3 4 +1 2 +3 4 +1 2 +3 4 +archive{1..2} data1.csv +1 2 +1 2 +3 4 +3 4 +1 2 +1 2 +3 4 +3 4 +1 2 +1 2 +3 4 +3 4 +archive{1,2} data{1,3}.csv +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +archive3 data*.csv +5 6 +7 8 +9 10 +11 12 +5 6 +7 8 +9 10 +11 12 +5 6 +7 8 +9 10 +11 12 +archive* *.csv +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +archive* {2..3}.csv +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +OK +OK diff --git a/tests/queries/0_stateless/02661_read_from_archive_targz.sh b/tests/queries/0_stateless/02661_read_from_archive_targz.sh new file mode 100755 index 00000000000..4505a150237 --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_targz.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# shellcheck source=./02661_read_from_archive.lib +. "$CUR_DIR"/02661_read_from_archive.lib + +run_archive_test "tar.gz" "tar -cvzf" \ No newline at end of file diff --git a/tests/queries/0_stateless/02661_read_from_archive_tarxz.reference b/tests/queries/0_stateless/02661_read_from_archive_tarxz.reference new file mode 100644 index 00000000000..27edb5536ad --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_tarxz.reference @@ -0,0 +1,116 @@ +archive1 data1.csv +1 2 +3 4 +1 2 +3 4 +1 2 +3 4 +archive{1..2} data1.csv +1 2 +1 2 +3 4 +3 4 +1 2 +1 2 +3 4 +3 4 +1 2 +1 2 +3 4 +3 4 +archive{1,2} data{1,3}.csv +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +archive3 data*.csv +5 6 +7 8 +9 10 +11 12 +5 6 +7 8 +9 10 +11 12 +5 6 +7 8 +9 10 +11 12 +archive* *.csv +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +archive* {2..3}.csv +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +OK +OK diff --git a/tests/queries/0_stateless/02661_read_from_archive_tarxz.sh b/tests/queries/0_stateless/02661_read_from_archive_tarxz.sh new file mode 100755 index 00000000000..b8ee5bc46d2 --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_tarxz.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# shellcheck source=./02661_read_from_archive.lib +. "$CUR_DIR"/02661_read_from_archive.lib + +run_archive_test "tar.xz" "tar -cJf" \ No newline at end of file diff --git a/tests/queries/0_stateless/02661_read_from_archive_tzst.reference b/tests/queries/0_stateless/02661_read_from_archive_tzst.reference new file mode 100644 index 00000000000..27edb5536ad --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_tzst.reference @@ -0,0 +1,116 @@ +archive1 data1.csv +1 2 +3 4 +1 2 +3 4 +1 2 +3 4 +archive{1..2} data1.csv +1 2 +1 2 +3 4 +3 4 +1 2 +1 2 +3 4 +3 4 +1 2 +1 2 +3 4 +3 4 +archive{1,2} data{1,3}.csv +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +archive3 data*.csv +5 6 +7 8 +9 10 +11 12 +5 6 +7 8 +9 10 +11 12 +5 6 +7 8 +9 10 +11 12 +archive* *.csv +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +archive* {2..3}.csv +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +OK +OK diff --git a/tests/queries/0_stateless/02661_read_from_archive_tzst.sh b/tests/queries/0_stateless/02661_read_from_archive_tzst.sh new file mode 100755 index 00000000000..b4145e0d1d0 --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_tzst.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# shellcheck source=./02661_read_from_archive.lib +. "$CUR_DIR"/02661_read_from_archive.lib + +run_archive_test "tzst" "tar -caf" \ No newline at end of file diff --git a/tests/queries/0_stateless/02661_read_from_archive_zip.reference b/tests/queries/0_stateless/02661_read_from_archive_zip.reference new file mode 100644 index 00000000000..27edb5536ad --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_zip.reference @@ -0,0 +1,116 @@ +archive1 data1.csv +1 2 +3 4 +1 2 +3 4 +1 2 +3 4 +archive{1..2} data1.csv +1 2 +1 2 +3 4 +3 4 +1 2 +1 2 +3 4 +3 4 +1 2 +1 2 +3 4 +3 4 +archive{1,2} data{1,3}.csv +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +1 2 +1 2 +3 4 +3 4 +9 10 +11 12 +archive3 data*.csv +5 6 +7 8 +9 10 +11 12 +5 6 +7 8 +9 10 +11 12 +5 6 +7 8 +9 10 +11 12 +archive* *.csv +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +1 2 +1 2 +3 4 +3 4 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +archive* {2..3}.csv +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +5 6 +5 6 +7 8 +7 8 +9 10 +9 10 +11 12 +11 12 +OK +OK diff --git a/tests/queries/0_stateless/02661_read_from_archive_zip.sh b/tests/queries/0_stateless/02661_read_from_archive_zip.sh new file mode 100755 index 00000000000..1234464f0a6 --- /dev/null +++ b/tests/queries/0_stateless/02661_read_from_archive_zip.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# shellcheck source=./02661_read_from_archive.lib +. "$CUR_DIR"/02661_read_from_archive.lib + +run_archive_test "zip" "zip" \ No newline at end of file diff --git a/tests/queries/0_stateless/02668_ulid_decoding.sql b/tests/queries/0_stateless/02668_ulid_decoding.sql index df94025b7b5..ecab5004df6 100644 --- a/tests/queries/0_stateless/02668_ulid_decoding.sql +++ b/tests/queries/0_stateless/02668_ulid_decoding.sql @@ -4,7 +4,7 @@ SELECT dateDiff('minute', ULIDStringToDateTime(generateULID()), now()) = 0; SELECT toTimezone(ULIDStringToDateTime('01GWJWKW30MFPQJRYEAF4XFZ9E'), 'America/Costa_Rica'); SELECT ULIDStringToDateTime('01GWJWKW30MFPQJRYEAF4XFZ9E', 'America/Costa_Rica'); SELECT ULIDStringToDateTime('01GWJWKW30MFPQJRYEAF4XFZ9', 'America/Costa_Rica'); -- { serverError ILLEGAL_COLUMN } -SELECT ULIDStringToDateTime('01GWJWKW30MFPQJRYEAF4XFZ9E', 'America/Costa_Ric'); -- { serverError POCO_EXCEPTION } +SELECT ULIDStringToDateTime('01GWJWKW30MFPQJRYEAF4XFZ9E', 'America/Costa_Ric'); -- { serverError BAD_ARGUMENTS } SELECT ULIDStringToDateTime('01GWJWKW30MFPQJRYEAF4XFZ9E0'); -- { serverError ILLEGAL_COLUMN } SELECT ULIDStringToDateTime(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT ULIDStringToDateTime(1, 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } diff --git a/tests/queries/0_stateless/02681_undrop_query.reference b/tests/queries/0_stateless/02681_undrop_query.reference deleted file mode 100644 index 5df6c392eb9..00000000000 --- a/tests/queries/0_stateless/02681_undrop_query.reference +++ /dev/null @@ -1,32 +0,0 @@ -test MergeTree undrop -02681_undrop_mergetree -1 -2 -3 -test detach -UPDATE num = 2 WHERE id = 1 -test MergeTree with cluster -02681_undrop_uuid_on_cluster -1 -2 -3 -test MergeTree without uuid on cluster -02681_undrop_no_uuid_on_cluster -1 -2 -3 -test ReplicatedMergeTree undrop -02681_undrop_replicatedmergetree -1 -2 -3 -test Log undrop -02681_undrop_log -1 -2 -3 -test Distributed undrop -02681_undrop_distributed -test MergeTree drop and undrop multiple times -02681_undrop_multiple -3 diff --git a/tests/queries/0_stateless/02681_undrop_query.sql b/tests/queries/0_stateless/02681_undrop_query.sql deleted file mode 100644 index 39ca1548d53..00000000000 --- a/tests/queries/0_stateless/02681_undrop_query.sql +++ /dev/null @@ -1,90 +0,0 @@ --- Tags: no-ordinary-database, no-replicated-database, distributed, zookeeper - -set database_atomic_wait_for_drop_and_detach_synchronously = 0; -set allow_experimental_undrop_table_query = 1; - -select 'test MergeTree undrop'; -drop table if exists 02681_undrop_mergetree sync; -create table 02681_undrop_mergetree (id Int32) Engine=MergeTree() order by id; -insert into 02681_undrop_mergetree values (1),(2),(3); -drop table 02681_undrop_mergetree; -select table from system.dropped_tables where table = '02681_undrop_mergetree' limit 1; -undrop table 02681_undrop_mergetree; -select * from 02681_undrop_mergetree order by id; -drop table 02681_undrop_mergetree sync; - -select 'test detach'; -drop table if exists 02681_undrop_detach sync; -create table 02681_undrop_detach (id Int32, num Int32) Engine=MergeTree() order by id; -insert into 02681_undrop_detach values (1, 1); -detach table 02681_undrop_detach; -undrop table 02681_undrop_detach; -- { serverError 57 } -attach table 02681_undrop_detach; -alter table 02681_undrop_detach update num = 2 where id = 1; -select command from system.mutations where table='02681_undrop_detach' and database=currentDatabase() limit 1; -drop table 02681_undrop_detach sync; - -select 'test MergeTree with cluster'; -drop table if exists 02681_undrop_uuid_on_cluster on cluster test_shard_localhost sync format Null; -create table 02681_undrop_uuid_on_cluster on cluster test_shard_localhost (id Int32) Engine=MergeTree() order by id format Null; -insert into 02681_undrop_uuid_on_cluster values (1),(2),(3); -drop table 02681_undrop_uuid_on_cluster on cluster test_shard_localhost format Null; -select table from system.dropped_tables where table = '02681_undrop_uuid_on_cluster' limit 1; -undrop table 02681_undrop_uuid_on_cluster on cluster test_shard_localhost format Null; -select * from 02681_undrop_uuid_on_cluster order by id; -drop table 02681_undrop_uuid_on_cluster sync; - -select 'test MergeTree without uuid on cluster'; -drop table if exists 02681_undrop_no_uuid_on_cluster on cluster test_shard_localhost sync format Null; -create table 02681_undrop_no_uuid_on_cluster on cluster test_shard_localhost (id Int32) Engine=MergeTree() order by id format Null; -insert into 02681_undrop_no_uuid_on_cluster values (1),(2),(3); -drop table 02681_undrop_no_uuid_on_cluster on cluster test_shard_localhost format Null; -select table from system.dropped_tables where table = '02681_undrop_no_uuid_on_cluster' limit 1; -undrop table 02681_undrop_no_uuid_on_cluster on cluster test_shard_localhost format Null; -select * from 02681_undrop_no_uuid_on_cluster order by id; -drop table 02681_undrop_no_uuid_on_cluster on cluster test_shard_localhost sync format Null; - -select 'test ReplicatedMergeTree undrop'; -drop table if exists 02681_undrop_replicatedmergetree sync; -create table 02681_undrop_replicatedmergetree (id Int32) Engine=ReplicatedMergeTree('/clickhouse/tables/{database}/02681_undrop_replicatedmergetree', 'test_undrop') order by id; -insert into 02681_undrop_replicatedmergetree values (1),(2),(3); -drop table 02681_undrop_replicatedmergetree; -select table from system.dropped_tables where table = '02681_undrop_replicatedmergetree' limit 1; -undrop table 02681_undrop_replicatedmergetree; -select * from 02681_undrop_replicatedmergetree order by id; -drop table 02681_undrop_replicatedmergetree sync; - -select 'test Log undrop'; -drop table if exists 02681_undrop_log sync; -create table 02681_undrop_log (id Int32) Engine=Log(); -insert into 02681_undrop_log values (1),(2),(3); -drop table 02681_undrop_log; -select table from system.dropped_tables where table = '02681_undrop_log' limit 1; -undrop table 02681_undrop_log; -select * from 02681_undrop_log order by id; -drop table 02681_undrop_log sync; - -select 'test Distributed undrop'; -drop table if exists 02681_undrop_distributed sync; -create table 02681_undrop_distributed (id Int32) Engine = Distributed(test_shard_localhost, currentDatabase(), 02681_undrop, rand()); -drop table 02681_undrop_distributed; -select table from system.dropped_tables where table = '02681_undrop_distributed' limit 1; -undrop table 02681_undrop_distributed; -drop table 02681_undrop_distributed sync; - -select 'test MergeTree drop and undrop multiple times'; -drop table if exists 02681_undrop_multiple sync; -create table 02681_undrop_multiple (id Int32) Engine=MergeTree() order by id; -insert into 02681_undrop_multiple values (1); -drop table 02681_undrop_multiple; -create table 02681_undrop_multiple (id Int32) Engine=MergeTree() order by id; -insert into 02681_undrop_multiple values (2); -drop table 02681_undrop_multiple; -create table 02681_undrop_multiple (id Int32) Engine=MergeTree() order by id; -insert into 02681_undrop_multiple values (3); -drop table 02681_undrop_multiple; -select table from system.dropped_tables where table = '02681_undrop_multiple' limit 1; -undrop table 02681_undrop_multiple; -select * from 02681_undrop_multiple order by id; -undrop table 02681_undrop_multiple; -- { serverError 57 } -drop table 02681_undrop_multiple sync; diff --git a/tests/queries/0_stateless/02681_undrop_query_uuid.reference b/tests/queries/0_stateless/02681_undrop_query_uuid.reference deleted file mode 100644 index beae016401b..00000000000 --- a/tests/queries/0_stateless/02681_undrop_query_uuid.reference +++ /dev/null @@ -1,6 +0,0 @@ -test MergeTree with uuid -02681_undrop_uuid -OK -1 -2 -3 diff --git a/tests/queries/0_stateless/02681_undrop_query_uuid.sh b/tests/queries/0_stateless/02681_undrop_query_uuid.sh deleted file mode 100755 index a93f30ef459..00000000000 --- a/tests/queries/0_stateless/02681_undrop_query_uuid.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-ordinary-database, no-replicated-database - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -echo 'test MergeTree with uuid' -${CLICKHOUSE_CLIENT} -q "drop table if exists 02681_undrop_uuid sync;" -uuid=$(${CLICKHOUSE_CLIENT} --query "SELECT generateUUIDv4()") -uuid2=$(${CLICKHOUSE_CLIENT} --query "SELECT generateUUIDv4()") -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none -q "create table 02681_undrop_uuid UUID '$uuid' on cluster test_shard_localhost (id Int32) Engine=MergeTree() order by id;" -${CLICKHOUSE_CLIENT} -q "insert into 02681_undrop_uuid values (1),(2),(3);" -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none -q "drop table 02681_undrop_uuid on cluster test_shard_localhost settings database_atomic_wait_for_drop_and_detach_synchronously = 0;" -${CLICKHOUSE_CLIENT} -q "select table from system.dropped_tables where table = '02681_undrop_uuid' limit 1;" -${CLICKHOUSE_CLIENT} -q "undrop table 02681_undrop_uuid UUID '$uuid2' settings allow_experimental_undrop_table_query = 1;" 2>&1| grep -Faq "UNKNOWN_TABLE" && echo OK -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none -q "undrop table 02681_undrop_uuid UUID '$uuid' on cluster test_shard_localhost settings allow_experimental_undrop_table_query = 1;" -${CLICKHOUSE_CLIENT} -q "select * from 02681_undrop_uuid order by id;" -${CLICKHOUSE_CLIENT} -q "drop table 02681_undrop_uuid sync;" diff --git a/tests/queries/0_stateless/02714_local_object_storage.sql b/tests/queries/0_stateless/02714_local_object_storage.sql index fa9025b8b6e..914eac1ddba 100644 --- a/tests/queries/0_stateless/02714_local_object_storage.sql +++ b/tests/queries/0_stateless/02714_local_object_storage.sql @@ -7,7 +7,7 @@ CREATE TABLE test (a Int32, b String) ENGINE = MergeTree() ORDER BY tuple() SETTINGS disk = disk( type = 'local_blob_storage', - path = '/var/lib/clickhouse/disks/${CLICKHOUSE_TEST_UNIQUE_NAME}/'); + path = '${CLICKHOUSE_TEST_UNIQUE_NAME}/'); INSERT INTO test SELECT 1, 'test'; SELECT * FROM test; @@ -19,7 +19,7 @@ ENGINE = MergeTree() ORDER BY tuple() SETTINGS disk = disk( type = 'cache', max_size = '10Mi', - path = '/var/lib/clickhouse/caches/${CLICKHOUSE_TEST_UNIQUE_NAME}/', + path = '${CLICKHOUSE_TEST_UNIQUE_NAME}/', disk = disk(type='local_blob_storage', path='/var/lib/clickhouse/disks/${CLICKHOUSE_TEST_UNIQUE_NAME}/')); INSERT INTO test SELECT 1, 'test'; diff --git a/tests/queries/0_stateless/02722_database_filesystem.sh b/tests/queries/0_stateless/02722_database_filesystem.sh index 3b7a41bb39e..99f2191c991 100755 --- a/tests/queries/0_stateless/02722_database_filesystem.sh +++ b/tests/queries/0_stateless/02722_database_filesystem.sh @@ -40,32 +40,31 @@ ${CLICKHOUSE_LOCAL} -q "SELECT COUNT(*) FROM \"${tmp_dir}/tmp.csv\"" ################# echo "Test 2: check DatabaseFilesystem access rights and errors handling on server" # DATABASE_ACCESS_DENIED: Allows list files only inside user_files -${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../tmp.csv\`;" 2>&1| grep -F "Code: 481" > /dev/null && echo "OK" || echo 'FAIL' ||: -${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`/tmp/tmp.csv\`;" 2>&1| grep -F "Code: 481" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../tmp.csv\`;" 2>&1 | tr '\n' ' ' | grep -oF "PATH_ACCESS_DENIED" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`/tmp/tmp.csv\`;" 2>&1 | tr '\n' ' ' | grep -oF "PATH_ACCESS_DENIED" > /dev/null && echo "OK" || echo 'FAIL' ||: ${CLICKHOUSE_CLIENT} --multiline --multiquery --query """ USE test1; SELECT COUNT(*) FROM \"../${tmp_dir}/tmp.csv\"; -""" 2>&1| grep -F "Code: 481" > /dev/null && echo "OK" || echo 'FAIL' ||: -${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../../../../../../tmp.csv\`;" 2>&1| grep -F "Code: 481" > /dev/null && echo "OK" || echo 'FAIL' ||: +""" 2>&1 | tr '\n' ' ' | grep -oF "PATH_ACCESS_DENIED" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../../../../../../tmp.csv\`;" 2>&1 | tr '\n' ' ' | grep -oF "PATH_ACCESS_DENIED" > /dev/null && echo "OK" || echo 'FAIL' ||: # BAD_ARGUMENTS: path should be inside user_files ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ DROP DATABASE IF EXISTS test2; CREATE DATABASE test2 ENGINE = Filesystem('/tmp'); -""" 2>&1| grep -F "Code: 36" > /dev/null && echo "OK" || echo 'FAIL' ||: +""" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: # BAD_ARGUMENTS: .../user_files/relative_unknown_dir does not exists ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ DROP DATABASE IF EXISTS test2; CREATE DATABASE test2 ENGINE = Filesystem('relative_unknown_dir'); -""" 2>&1| grep -F "Code: 36" > /dev/null && echo "OK" || echo 'FAIL' ||: +""" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: # FILE_DOESNT_EXIST: unknown file -${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`tmp2.csv\`;" 2>&1| grep -F "Code: 60" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`tmp2.csv\`;" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "FILE_DOESNT_EXIST" > /dev/null && echo "OK" || echo 'FAIL' ||: # BAD_ARGUMENTS: Cannot determine the file format by it's extension -${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`${unique_name}/tmp.myext\`;" 2>&1| grep -F "Code: 36" > /dev/null && echo "OK" || echo 'FAIL' ||: - +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`${unique_name}/tmp.myext\`;" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: # Clean ${CLICKHOUSE_CLIENT} --query "DROP DATABASE test1;" rm -rd $tmp_dir diff --git a/tests/queries/0_stateless/02724_database_s3.sh b/tests/queries/0_stateless/02724_database_s3.sh index bb8f1f5f7ee..13b627c0342 100755 --- a/tests/queries/0_stateless/02724_database_s3.sh +++ b/tests/queries/0_stateless/02724_database_s3.sh @@ -46,12 +46,12 @@ DROP DATABASE IF EXISTS test3; CREATE DATABASE test3 ENGINE = S3; USE test3; SELECT * FROM \"http://localhost:11111/test/a.myext\" -""" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK" +""" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ USE test3; SELECT * FROM \"abacaba\" -""" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK" +""" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: # Cleanup ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ diff --git a/tests/queries/0_stateless/02725_database_hdfs.reference b/tests/queries/0_stateless/02725_database_hdfs.reference index ef8adae2bbc..dfc5b63647d 100644 --- a/tests/queries/0_stateless/02725_database_hdfs.reference +++ b/tests/queries/0_stateless/02725_database_hdfs.reference @@ -4,9 +4,8 @@ test1 1 2 3 test2 Test 2: check exceptions -OK0 -OK1 -OK2 -OK3 -OK4 -OK5 +BAD_ARGUMENTS +OK +OK +OK +OK diff --git a/tests/queries/0_stateless/02725_database_hdfs.sh b/tests/queries/0_stateless/02725_database_hdfs.sh index 89ff7421a6f..f6089cfa18a 100755 --- a/tests/queries/0_stateless/02725_database_hdfs.sh +++ b/tests/queries/0_stateless/02725_database_hdfs.sh @@ -1,6 +1,8 @@ #!/usr/bin/env bash # Tags: no-fasttest, use-hdfs, no-parallel +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh @@ -36,19 +38,20 @@ echo "Test 2: check exceptions" ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ DROP DATABASE IF EXISTS test3; CREATE DATABASE test3 ENGINE = HDFS('abacaba'); -""" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK0" +""" 2>&1 | tr '\n' ' ' | grep -oF "BAD_ARGUMENTS" ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ DROP DATABASE IF EXISTS test4; CREATE DATABASE test4 ENGINE = HDFS; USE test4; SELECT * FROM \"abacaba/file.tsv\" -""" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK1" +""" 2>&1 | tr '\n' ' ' | grep -oF "CANNOT_EXTRACT_TABLE_STRUCTURE" -${CLICKHOUSE_CLIENT} -q "SELECT * FROM test4.\`http://localhost:11111/test/a.tsv\`" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK2" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/file.myext\`" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK3" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK4" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222\`" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK5" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM test4.\`http://localhost:11111/test/a.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "CANNOT_EXTRACT_TABLE_STRUCTURE" > /dev/null && echo "OK" || echo 'FAIL' ||: + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: # Cleanup diff --git a/tests/queries/0_stateless/02725_parquet_preserve_order.sh b/tests/queries/0_stateless/02725_parquet_preserve_order.sh index ac29ef3f361..94f2eaaa753 100755 --- a/tests/queries/0_stateless/02725_parquet_preserve_order.sh +++ b/tests/queries/0_stateless/02725_parquet_preserve_order.sh @@ -10,7 +10,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # It'll be read into two blocks. The first block will sleep 2x longer than the second. # So reordering is very likely if the order-preservation doesn't work. -$CLICKHOUSE_LOCAL -q "select number + sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=1, function_sleep_max_microseconds_per_block = 6000000" +$CLICKHOUSE_LOCAL -q "select number + sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=1, function_sleep_max_microseconds_per_block = 6000000, input_format_parquet_local_file_min_bytes_for_seek=0" -$CLICKHOUSE_LOCAL -q "explain pipeline select number + sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=1, max_threads=2" -$CLICKHOUSE_LOCAL -q "explain pipeline select number + sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=0, parallelize_output_from_storages=1, max_threads=2" +$CLICKHOUSE_LOCAL -q "explain pipeline select number + sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=1, max_threads=2, input_format_parquet_local_file_min_bytes_for_seek=0" +$CLICKHOUSE_LOCAL -q "explain pipeline select number + sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=0, parallelize_output_from_storages=1, max_threads=2, input_format_parquet_local_file_min_bytes_for_seek=0" diff --git a/tests/queries/0_stateless/02725_start_stop_fetches.sh b/tests/queries/0_stateless/02725_start_stop_fetches.sh index 0ca687ae951..c9922455d94 100755 --- a/tests/queries/0_stateless/02725_start_stop_fetches.sh +++ b/tests/queries/0_stateless/02725_start_stop_fetches.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-parallel, no-upgrade-check, no-replicated-database +# Tags: race, zookeeper, no-parallel, no-replicated-database CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02726_set_allow_experimental_query_cache_as_obsolete.sql b/tests/queries/0_stateless/02726_set_allow_experimental_query_cache_as_obsolete.sql deleted file mode 100644 index 244ba4e959a..00000000000 --- a/tests/queries/0_stateless/02726_set_allow_experimental_query_cache_as_obsolete.sql +++ /dev/null @@ -1,2 +0,0 @@ -SET allow_experimental_query_cache = 0; -SET allow_experimental_query_cache = 1; diff --git a/tests/queries/0_stateless/02735_parquet_encoder.reference b/tests/queries/0_stateless/02735_parquet_encoder.reference index c7d79392d85..a7ee82bc67f 100644 --- a/tests/queries/0_stateless/02735_parquet_encoder.reference +++ b/tests/queries/0_stateless/02735_parquet_encoder.reference @@ -43,6 +43,7 @@ ipv6 Nullable(FixedString(16)) [(2,0,NULL,'','[]')] 1 1 0 1 +5090915589685802007 16159458007063698496 16159458007063698496 BYTE_ARRAY String @@ -53,3 +54,10 @@ BYTE_ARRAY String never gonna give you up +ms Nullable(DateTime64(3, \'UTC\')) +us Nullable(DateTime64(6, \'UTC\')) +ns Nullable(DateTime64(9, \'UTC\')) +cs Nullable(DateTime64(3, \'UTC\')) +s Nullable(DateTime64(3, \'UTC\')) +dus Nullable(DateTime64(9, \'UTC\')) +12670676506515577395 diff --git a/tests/queries/0_stateless/02735_parquet_encoder.sql b/tests/queries/0_stateless/02735_parquet_encoder.sql index 3701c685120..19125abf8da 100644 --- a/tests/queries/0_stateless/02735_parquet_encoder.sql +++ b/tests/queries/0_stateless/02735_parquet_encoder.sql @@ -147,6 +147,8 @@ insert into function file(compressed_02735.parquet) select concat('aaaaaaaaaaaaa select total_compressed_size < 10000, total_uncompressed_size > 15000 from file(compressed_02735.parquet, ParquetMetadata); insert into function file(compressed_02735.parquet) select concat('aaaaaaaaaaaaaaaa', toString(number)) as s from numbers(1000) settings output_format_parquet_row_group_size = 10000, output_format_parquet_compression_method='none'; select total_compressed_size < 10000, total_uncompressed_size > 15000 from file(compressed_02735.parquet, ParquetMetadata); +insert into function file(compressed_02735.parquet) select if(number%3==1, NULL, 42) as x from numbers(70) settings output_format_parquet_compression_method='zstd'; +select sum(cityHash64(*)) from file(compressed_02735.parquet); -- Single-threaded encoding and Arrow encoder. drop table if exists other_encoders_02735; @@ -166,3 +168,15 @@ select columns.5, columns.6 from file(strings3_02735.parquet, ParquetMetadata) a select * from file(strings1_02735.parquet); select * from file(strings2_02735.parquet); select * from file(strings3_02735.parquet); + +-- DateTime64 with different units. +insert into function file(datetime64_02735.parquet) select + toDateTime64(number / 1e3, 3) as ms, + toDateTime64(number / 1e6, 6) as us, + toDateTime64(number / 1e9, 9) as ns, + toDateTime64(number / 1e2, 2) as cs, + toDateTime64(number, 0) as s, + toDateTime64(number / 1e7, 7) as dus + from numbers(2000); +desc file(datetime64_02735.parquet); +select sum(cityHash64(*)) from file(datetime64_02735.parquet); diff --git a/tests/queries/0_stateless/02751_match_constant_needle.reference b/tests/queries/0_stateless/02751_match_constant_needle.reference index d00491fd7e5..6ed281c757a 100644 --- a/tests/queries/0_stateless/02751_match_constant_needle.reference +++ b/tests/queries/0_stateless/02751_match_constant_needle.reference @@ -1 +1,2 @@ 1 +1 diff --git a/tests/queries/0_stateless/02751_match_constant_needle.sql b/tests/queries/0_stateless/02751_match_constant_needle.sql index 71bdcc7cb0a..9980c3760f3 100644 --- a/tests/queries/0_stateless/02751_match_constant_needle.sql +++ b/tests/queries/0_stateless/02751_match_constant_needle.sql @@ -1 +1,2 @@ select match('default/k8s1', '\\A(?:(?:[-0-9_a-z]+(?:\\.[-0-9_a-z]+)*)/k8s1)\\z'); +select match('abc123', '[a-zA-Z]+(?P\\d+)'); diff --git a/tests/queries/0_stateless/02752_forbidden_headers.sql b/tests/queries/0_stateless/02752_forbidden_headers.sql index d966fc0a187..dd0dcb53b16 100644 --- a/tests/queries/0_stateless/02752_forbidden_headers.sql +++ b/tests/queries/0_stateless/02752_forbidden_headers.sql @@ -3,16 +3,24 @@ SELECT * FROM url('http://localhost:8123/', LineAsString, headers('exact_header' = 'value')); -- { serverError BAD_ARGUMENTS } SELECT * FROM url('http://localhost:8123/', LineAsString, headers('cAsE_INSENSITIVE_header' = 'value')); -- { serverError BAD_ARGUMENTS } +SELECT * FROM url('http://localhost:8123/', LineAsString, headers('bad_header_name: test\nexact_header' = 'value')); -- { serverError BAD_ARGUMENTS } +SELECT * FROM url('http://localhost:8123/', LineAsString, headers('bad_header_value' = 'test\nexact_header: value')); -- { serverError BAD_ARGUMENTS } SELECT * FROM url('http://localhost:8123/', LineAsString, headers('random_header' = 'value')) FORMAT Null; SELECT * FROM urlCluster('test_cluster_two_shards_localhost', 'http://localhost:8123/', LineAsString, headers('exact_header' = 'value')); -- { serverError BAD_ARGUMENTS } SELECT * FROM urlCluster('test_cluster_two_shards_localhost', 'http://localhost:8123/', LineAsString, headers('cAsE_INSENSITIVE_header' = 'value')); -- { serverError BAD_ARGUMENTS } +SELECT * FROM urlCluster('test_cluster_two_shards_localhost', 'http://localhost:8123/', LineAsString, headers('bad_header_name: test\nexact_header' = 'value')); -- { serverError BAD_ARGUMENTS } +SELECT * FROM urlCluster('test_cluster_two_shards_localhost', 'http://localhost:8123/', LineAsString, headers('bad_header_value' = 'test\nexact_header: value')); -- { serverError BAD_ARGUMENTS } SELECT * FROM urlCluster('test_cluster_two_shards_localhost', 'http://localhost:8123/', LineAsString, headers('random_header' = 'value')) FORMAT Null; SELECT * FROM s3('http://localhost:8123/123/4', LineAsString, headers('exact_header' = 'value')); -- { serverError BAD_ARGUMENTS } SELECT * FROM s3('http://localhost:8123/123/4', LineAsString, headers('cAsE_INSENSITIVE_header' = 'value')); -- { serverError BAD_ARGUMENTS } +SELECT * FROM s3('http://localhost:8123/123/4', LineAsString, headers('bad_header_name: test\nexact_header' = 'value')); -- { serverError BAD_ARGUMENTS } +SELECT * FROM s3('http://localhost:8123/123/4', LineAsString, headers('bad_header_value' = 'test\nexact_header: value')); -- { serverError BAD_ARGUMENTS } SELECT * FROM s3('http://localhost:8123/123/4', LineAsString, headers('random_header' = 'value')); -- { serverError S3_ERROR } SELECT * FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:8123/123/4', LineAsString, headers('exact_header' = 'value')); -- { serverError BAD_ARGUMENTS } SELECT * FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:8123/123/4', LineAsString, headers('cAsE_INSENSITIVE_header' = 'value')); -- { serverError BAD_ARGUMENTS } +SELECT * FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:8123/123/4', LineAsString, headers('bad_header_name: test\nexact_header' = 'value')); -- { serverError BAD_ARGUMENTS } +SELECT * FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:8123/123/4', LineAsString, headers('bad_header_value' = 'test\nexact_header: value')); -- { serverError BAD_ARGUMENTS } SELECT * FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:8123/123/4', LineAsString, headers('random_header' = 'value')); -- { serverError S3_ERROR } diff --git a/tests/queries/0_stateless/02766_prql.sh b/tests/queries/0_stateless/02766_prql.sh index f8bbd72af4e..85b1167027c 100755 --- a/tests/queries/0_stateless/02766_prql.sh +++ b/tests/queries/0_stateless/02766_prql.sh @@ -21,18 +21,18 @@ INSERT INTO aboba (user_id, message, creation_date, metric) VALUES (101, 'Hello, SET dialect = 'prql'; from aboba -derive [ +derive { a = 2, b = s\"LEFT(message, 2)\" -] -select [ user_id, message, a, b ]; +} +select { user_id, message, a, b }; from aboba filter user_id > 101 group user_id ( - aggregate [ + aggregate { metrics = sum metric - ] + } ); SET dialect = 'clickhouse'; @@ -49,10 +49,10 @@ SELECT '---'; SET dialect = 'prql'; from aboba -select [ user_id, message, metric ] +select { user_id, message, metric } derive creation_date = s\"toTimeZone(creation_date, 'Europe/Amsterdam')\" -select [ user_id, message, creation_date, metric]; +select { user_id, message, creation_date, metric}; from s\"SELECT * FROM system.users\" | select non_existent_column; # {serverError UNKNOWN_IDENTIFIER} from non_existent_table; # {serverError UNKNOWN_TABLE} -" \ No newline at end of file +" diff --git a/tests/queries/0_stateless/02783_date_predicate_optimizations.reference b/tests/queries/0_stateless/02783_date_predicate_optimizations.reference index 872a5dd1d7d..5c3cab9bf4a 100644 --- a/tests/queries/0_stateless/02783_date_predicate_optimizations.reference +++ b/tests/queries/0_stateless/02783_date_predicate_optimizations.reference @@ -1,5 +1,7 @@ 2021-12-31 23:00:00 0 2021-12-31 23:00:00 0 +2021-12-31 23:00:00 0 +2021-12-31 23:00:00 0 Date 2 3 @@ -13,6 +15,18 @@ Date 4 1 4 +2 +3 +2 +4 +1 +3 +3 +2 +1 +4 +1 +4 DateTime 2 3 @@ -26,6 +40,18 @@ DateTime 4 1 4 +2 +3 +2 +4 +1 +3 +3 +2 +1 +4 +1 +4 Date32 2 3 @@ -39,6 +65,18 @@ Date32 4 1 4 +2 +3 +2 +4 +1 +3 +3 +2 +1 +4 +1 +4 DateTime64 2 3 @@ -52,3 +90,15 @@ DateTime64 4 1 4 +2 +3 +2 +4 +1 +3 +3 +2 +1 +4 +1 +4 diff --git a/tests/queries/0_stateless/02783_date_predicate_optimizations.sql b/tests/queries/0_stateless/02783_date_predicate_optimizations.sql index 0a2fa6cc93b..4da8cebff1c 100644 --- a/tests/queries/0_stateless/02783_date_predicate_optimizations.sql +++ b/tests/queries/0_stateless/02783_date_predicate_optimizations.sql @@ -11,6 +11,8 @@ INSERT INTO source values ('2021-12-31 23:00:00', 0); SELECT * FROM source WHERE toYYYYMM(ts) = 202112; SELECT * FROM source WHERE toYear(ts) = 2021; +SELECT * FROM source WHERE toYYYYMM(ts) = 202112 SETTINGS allow_experimental_analyzer=1; +SELECT * FROM source WHERE toYear(ts) = 2021 SETTINGS allow_experimental_analyzer=1; DROP TABLE IF EXISTS source; CREATE TABLE source @@ -44,6 +46,18 @@ SELECT count(*) FROM source WHERE toYear(dt) < 2023; SELECT count(*) FROM source WHERE toYear(dt) <= 2023; SELECT count(*) FROM source WHERE toYear(dt) > 2023; SELECT count(*) FROM source WHERE toYear(dt) >= 2023; +SELECT count(*) FROM source WHERE toYYYYMM(dt) = 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(dt) <> 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(dt) < 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(dt) <= 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(dt) > 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(dt) >= 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(dt) = 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(dt) <> 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(dt) < 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(dt) <= 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(dt) > 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(dt) >= 2023 SETTINGS allow_experimental_analyzer=1; SELECT 'DateTime'; SELECT count(*) FROM source WHERE toYYYYMM(ts) = 202312; @@ -58,6 +72,18 @@ SELECT count(*) FROM source WHERE toYear(ts) < 2023; SELECT count(*) FROM source WHERE toYear(ts) <= 2023; SELECT count(*) FROM source WHERE toYear(ts) > 2023; SELECT count(*) FROM source WHERE toYear(ts) >= 2023; +SELECT count(*) FROM source WHERE toYYYYMM(ts) = 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(ts) <> 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(ts) < 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(ts) <= 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(ts) > 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(ts) >= 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(ts) = 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(ts) <> 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(ts) < 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(ts) <= 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(ts) > 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(ts) >= 2023 SETTINGS allow_experimental_analyzer=1; SELECT 'Date32'; SELECT count(*) FROM source WHERE toYYYYMM(dt_32) = 202312; @@ -72,6 +98,18 @@ SELECT count(*) FROM source WHERE toYear(dt_32) < 2023; SELECT count(*) FROM source WHERE toYear(dt_32) <= 2023; SELECT count(*) FROM source WHERE toYear(dt_32) > 2023; SELECT count(*) FROM source WHERE toYear(dt_32) >= 2023; +SELECT count(*) FROM source WHERE toYYYYMM(dt_32) = 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(dt_32) <> 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(dt_32) < 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(dt_32) <= 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(dt_32) > 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(dt_32) >= 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(dt_32) = 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(dt_32) <> 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(dt_32) < 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(dt_32) <= 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(dt_32) > 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(dt_32) >= 2023 SETTINGS allow_experimental_analyzer=1; SELECT 'DateTime64'; SELECT count(*) FROM source WHERE toYYYYMM(ts_64) = 202312; @@ -86,4 +124,16 @@ SELECT count(*) FROM source WHERE toYear(ts_64) < 2023; SELECT count(*) FROM source WHERE toYear(ts_64) <= 2023; SELECT count(*) FROM source WHERE toYear(ts_64) > 2023; SELECT count(*) FROM source WHERE toYear(ts_64) >= 2023; +SELECT count(*) FROM source WHERE toYYYYMM(ts_64) = 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(ts_64) <> 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(ts_64) < 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(ts_64) <= 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(ts_64) > 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYYYYMM(ts_64) >= 202312 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(ts_64) = 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(ts_64) <> 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(ts_64) < 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(ts_64) <= 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(ts_64) > 2023 SETTINGS allow_experimental_analyzer=1; +SELECT count(*) FROM source WHERE toYear(ts_64) >= 2023 SETTINGS allow_experimental_analyzer=1; DROP TABLE source; diff --git a/tests/queries/0_stateless/02784_projections_read_in_order_bug.sql b/tests/queries/0_stateless/02784_projections_read_in_order_bug.sql index 6bf287a3d77..2aa23e2b70d 100644 --- a/tests/queries/0_stateless/02784_projections_read_in_order_bug.sql +++ b/tests/queries/0_stateless/02784_projections_read_in_order_bug.sql @@ -1,3 +1,5 @@ +DROP TABLE IF EXISTS events; + create table events ( `organisation_id` UUID, `session_id` UUID, diff --git a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.reference b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.reference new file mode 100644 index 00000000000..0fd2f694aeb --- /dev/null +++ b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.reference @@ -0,0 +1,1122 @@ +SELECT value1 +FROM date_t +WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 11, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE ((date1 < \'1993-01-01\') OR (date1 >= \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: or, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 11, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 9, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 10, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 16, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE (date1 >= \'1994-01-01\') AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 9, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 10, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 16, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE (date1 < \'1994-01-01\') AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 9, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 10, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 16, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE (date1 >= \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 9, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 10, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 16, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1998-01-01\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 11, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 15, constant_value: \'1998-01-01\', constant_value_type: String + FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE (((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) OR ((date1 >= \'1994-01-01\') AND (date1 < \'1995-01-01\'))) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: or, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + FUNCTION id: 10, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 13, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 14, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 15, nodes: 2 + COLUMN id: 16, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 17, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 18, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + FUNCTION id: 20, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 21, nodes: 2 + COLUMN id: 22, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 23, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 24, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 25, nodes: 2 + COLUMN id: 26, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 27, constant_value: \'1995-01-01\', constant_value_type: String + FUNCTION id: 28, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 29, nodes: 2 + FUNCTION id: 30, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 31, nodes: 2 + COLUMN id: 32, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 33, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 34, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 35, nodes: 2 + COLUMN id: 32, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 36, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT + value1, + toYear(date1) AS year1 +FROM date_t +WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + year1 UInt16 + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + FUNCTION id: 4, function_name: toYear, function_type: ordinary, result_type: UInt16 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: date1, result_type: Date, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 7, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 8, nodes: 2 + FUNCTION id: 9, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 10, nodes: 2 + FUNCTION id: 11, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 12, nodes: 2 + COLUMN id: 13, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 14, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 15, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 16, nodes: 2 + COLUMN id: 17, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 18, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 19, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 20, nodes: 2 + FUNCTION id: 21, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 22, nodes: 2 + COLUMN id: 23, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 25, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 26, nodes: 2 + COLUMN id: 23, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 27, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 9, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 10, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 16, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +PREWHERE (date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\') +WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + PREWHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 9, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 10, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 13, constant_value: \'1994-01-01\', constant_value_type: String + WHERE + FUNCTION id: 14, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 15, nodes: 2 + FUNCTION id: 16, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + COLUMN id: 18, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 19, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 20, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 21, nodes: 2 + COLUMN id: 18, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 22, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE ((id >= 1) AND (id <= 3)) AND ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 9, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 10, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + COLUMN id: 8, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 12, constant_value: UInt64_3, constant_value_type: UInt8 + HAVING + FUNCTION id: 13, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 16, nodes: 2 + COLUMN id: 17, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 18, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 19, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 20, nodes: 2 + COLUMN id: 21, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 22, constant_value: \'1994-01-01\', constant_value_type: String + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE (toYYYYMM(date1) = 199300) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: toYYYYMM, function_type: ordinary, result_type: UInt32 + ARGUMENTS + LIST id: 9, nodes: 1 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 11, constant_value: UInt64_199300, constant_value_type: UInt32 + FUNCTION id: 12, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + FUNCTION id: 14, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 15, nodes: 2 + COLUMN id: 16, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 17, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 18, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 16, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE (toYYYYMM(date1) = 199313) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: toYYYYMM, function_type: ordinary, result_type: UInt32 + ARGUMENTS + LIST id: 9, nodes: 1 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 11, constant_value: UInt64_199313, constant_value_type: UInt32 + FUNCTION id: 12, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + FUNCTION id: 14, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 15, nodes: 2 + COLUMN id: 16, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 17, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 18, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 16, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE ((date1 >= \'1993-12-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 11, constant_value: \'1993-12-01\', constant_value_type: String + FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE ((date1 >= \'1992-03-01\') AND (date1 < \'1992-04-01\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 11, constant_value: \'1992-03-01\', constant_value_type: String + FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 15, constant_value: \'1992-04-01\', constant_value_type: String + FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE ((date1 < \'1992-03-01\') OR (date1 >= \'1992-04-01\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: or, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 11, constant_value: \'1992-03-01\', constant_value_type: String + FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 15, constant_value: \'1992-04-01\', constant_value_type: String + FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE (date1 < \'1992-03-01\') AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 9, constant_value: \'1992-03-01\', constant_value_type: String + FUNCTION id: 10, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 16, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE (date1 >= \'1992-04-01\') AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 9, constant_value: \'1992-04-01\', constant_value_type: String + FUNCTION id: 10, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 16, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE (date1 < \'1992-04-01\') AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 9, constant_value: \'1992-04-01\', constant_value_type: String + FUNCTION id: 10, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 16, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE (date1 >= \'1992-03-01\') AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 9, constant_value: \'1992-03-01\', constant_value_type: String + FUNCTION id: 10, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 16, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + COLUMN id: 14, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date_t +WHERE ((date1 >= \'1992-03-01\') OR ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\'))) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: or, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 11, constant_value: \'1992-03-01\', constant_value_type: String + FUNCTION id: 12, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + FUNCTION id: 14, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 15, nodes: 2 + COLUMN id: 16, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 17, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 18, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 21, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 22, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + FUNCTION id: 24, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 25, nodes: 2 + COLUMN id: 26, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 27, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 28, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 29, nodes: 2 + COLUMN id: 26, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 30, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM datetime_t +WHERE ((date1 >= \'1993-01-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.datetime_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: DateTime, source_id: 3 + CONSTANT id: 11, constant_value: \'1993-01-01 00:00:00\', constant_value_type: String + FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: date1, result_type: DateTime, source_id: 3 + CONSTANT id: 15, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String + FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM datetime_t +WHERE ((date1 >= \'1993-12-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.datetime_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: DateTime, source_id: 3 + CONSTANT id: 11, constant_value: \'1993-12-01 00:00:00\', constant_value_type: String + FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: date1, result_type: DateTime, source_id: 3 + CONSTANT id: 15, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String + FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date32_t +WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date32_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: Date32, source_id: 3 + CONSTANT id: 11, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: date1, result_type: Date32, source_id: 3 + CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM date32_t +WHERE ((date1 >= \'1993-12-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.date32_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: Date32, source_id: 3 + CONSTANT id: 11, constant_value: \'1993-12-01\', constant_value_type: String + FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: date1, result_type: Date32, source_id: 3 + CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM datetime64_t +WHERE ((date1 >= \'1993-01-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.datetime64_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: DateTime64(3), source_id: 3 + CONSTANT id: 11, constant_value: \'1993-01-01 00:00:00\', constant_value_type: String + FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: date1, result_type: DateTime64(3), source_id: 3 + CONSTANT id: 15, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String + FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT value1 +FROM datetime64_t +WHERE ((date1 >= \'1993-12-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) +QUERY id: 0 + PROJECTION COLUMNS + value1 String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value1, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.datetime64_t + WHERE + FUNCTION id: 4, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + FUNCTION id: 8, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: DateTime64(3), source_id: 3 + CONSTANT id: 11, constant_value: \'1993-12-01 00:00:00\', constant_value_type: String + FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: date1, result_type: DateTime64(3), source_id: 3 + CONSTANT id: 15, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String + FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 diff --git a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.sql b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.sql new file mode 100644 index 00000000000..9cc8dd74e5d --- /dev/null +++ b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.sql @@ -0,0 +1,75 @@ +DROP TABLE IF EXISTS date_t; +CREATE TABLE date_t (id UInt32, value1 String, date1 Date) ENGINE ReplacingMergeTree() ORDER BY id; + +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) <> 1993 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYear(date1) <> 1993 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) < 1993 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYear(date1) < 1993 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) > 1993 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYear(date1) > 1993 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) <= 1993 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYear(date1) <= 1993 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) >= 1993 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYear(date1) >= 1993 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) BETWEEN 1993 AND 1997 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYear(date1) BETWEEN 1993 AND 1997 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE (toYear(date1) = 1993 OR toYear(date1) = 1994) AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE (toYear(date1) = 1993 OR toYear(date1) = 1994) AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1, toYear(date1) as year1 FROM date_t WHERE year1 = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1, toYear(date1) as year1 FROM date_t WHERE year1 = 1993 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE 1993 > toYear(date1) AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE 1993 > toYear(date1) AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t PREWHERE toYear(date1) = 1993 WHERE id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t PREWHERE toYear(date1) = 1993 WHERE id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE id BETWEEN 1 AND 3 HAVING toYear(date1) = 1993; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE id BETWEEN 1 AND 3 HAVING toYear(date1) = 1993 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199300 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199300 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199313 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199313 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199203 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199203 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) <> 199203 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYYYYMM(date1) <> 199203 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) < 199203 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYYYYMM(date1) < 199203 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) > 199203 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYYYYMM(date1) > 199203 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) <= 199203 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYYYYMM(date1) <= 199203 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) >= 199203 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE toYYYYMM(date1) >= 199203 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE (toYYYYMM(date1) >= 199203 OR toYear(date1) = 1993) AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date_t WHERE (toYYYYMM(date1) >= 199203 OR toYear(date1) = 1993) AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +DROP TABLE date_t; + +DROP TABLE IF EXISTS datetime_t; +CREATE TABLE datetime_t (id UInt32, value1 String, date1 Datetime) ENGINE ReplacingMergeTree() ORDER BY id; + +EXPLAIN SYNTAX SELECT value1 FROM datetime_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM datetime_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM datetime_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM datetime_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +DROP TABLE datetime_t; + +DROP TABLE IF EXISTS date32_t; +CREATE TABLE date32_t (id UInt32, value1 String, date1 Date32) ENGINE ReplacingMergeTree() ORDER BY id; + +EXPLAIN SYNTAX SELECT value1 FROM date32_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date32_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM date32_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM date32_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +DROP TABLE date32_t; + +DROP TABLE IF EXISTS datetime64_t; +CREATE TABLE datetime64_t (id UInt32, value1 String, date1 Datetime64) ENGINE ReplacingMergeTree() ORDER BY id; + +EXPLAIN SYNTAX SELECT value1 FROM datetime64_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM datetime64_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +EXPLAIN SYNTAX SELECT value1 FROM datetime64_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; +EXPLAIN QUERY TREE run_passes=1 SELECT value1 FROM datetime64_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3 SETTINGS allow_experimental_analyzer=1; +DROP TABLE datetime64_t; diff --git a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.reference b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.reference deleted file mode 100644 index 9235e7e106a..00000000000 --- a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.reference +++ /dev/null @@ -1,87 +0,0 @@ -SELECT value1 -FROM date_t -WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE ((date1 < \'1993-01-01\') OR (date1 >= \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 >= \'1994-01-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 < \'1994-01-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 >= \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1998-01-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) OR ((date1 >= \'1994-01-01\') AND (date1 < \'1995-01-01\'))) AND ((id >= 1) AND (id <= 3)) -SELECT - value1, - toYear(date1) AS year1 -FROM date_t -WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -PREWHERE (date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\') -WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE ((id >= 1) AND (id <= 3)) AND ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) -SELECT value1 -FROM date_t -WHERE (toYYYYMM(date1) = 199300) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (toYYYYMM(date1) = 199313) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE ((date1 >= \'1993-12-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE ((date1 >= \'1992-03-01\') AND (date1 < \'1992-04-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE ((date1 < \'1992-03-01\') OR (date1 >= \'1992-04-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 < \'1992-03-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 >= \'1992-04-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 < \'1992-04-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 >= \'1992-03-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE ((date1 >= \'1992-03-01\') OR ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\'))) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM datetime_t -WHERE ((date1 >= \'1993-01-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM datetime_t -WHERE ((date1 >= \'1993-12-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date32_t -WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date32_t -WHERE ((date1 >= \'1993-12-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM datetime64_t -WHERE ((date1 >= \'1993-01-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM datetime64_t -WHERE ((date1 >= \'1993-12-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) diff --git a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.sql b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.sql deleted file mode 100644 index 266be59b0a3..00000000000 --- a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.sql +++ /dev/null @@ -1,47 +0,0 @@ -DROP TABLE IF EXISTS date_t; -CREATE TABLE date_t (id UInt32, value1 String, date1 Date) ENGINE ReplacingMergeTree() ORDER BY id; - -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) <> 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) < 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) > 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) <= 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) >= 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) BETWEEN 1993 AND 1997 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE (toYear(date1) = 1993 OR toYear(date1) = 1994) AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1, toYear(date1) as year1 FROM date_t WHERE year1 = 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE 1993 > toYear(date1) AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t PREWHERE toYear(date1) = 1993 WHERE id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE id BETWEEN 1 AND 3 HAVING toYear(date1) = 1993; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199300 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199313 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199203 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) <> 199203 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) < 199203 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) > 199203 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) <= 199203 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) >= 199203 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE (toYYYYMM(date1) >= 199203 OR toYear(date1) = 1993) AND id BETWEEN 1 AND 3; -DROP TABLE date_t; - -DROP TABLE IF EXISTS datetime_t; -CREATE TABLE datetime_t (id UInt32, value1 String, date1 Datetime) ENGINE ReplacingMergeTree() ORDER BY id; - -EXPLAIN SYNTAX SELECT value1 FROM datetime_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM datetime_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; -DROP TABLE datetime_t; - -DROP TABLE IF EXISTS date32_t; -CREATE TABLE date32_t (id UInt32, value1 String, date1 Date32) ENGINE ReplacingMergeTree() ORDER BY id; - -EXPLAIN SYNTAX SELECT value1 FROM date32_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date32_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; -DROP TABLE date32_t; - -DROP TABLE IF EXISTS datetime64_t; -CREATE TABLE datetime64_t (id UInt32, value1 String, date1 Datetime64) ENGINE ReplacingMergeTree() ORDER BY id; - -EXPLAIN SYNTAX SELECT value1 FROM datetime64_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM datetime64_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; -DROP TABLE datetime64_t; diff --git a/tests/queries/0_stateless/02790_async_queries_in_query_log.reference b/tests/queries/0_stateless/02790_async_queries_in_query_log.reference index e202a38f068..aa18817f4e6 100644 --- a/tests/queries/0_stateless/02790_async_queries_in_query_log.reference +++ b/tests/queries/0_stateless/02790_async_queries_in_query_log.reference @@ -36,6 +36,14 @@ exception_code: 0 system.query_views_log +system.part_log +Row 1: +────── +database: default +table: async_insert_landing +partition_id: all +rows: 4 + system.query_log Row 1: ────── @@ -85,6 +93,14 @@ written_bytes: 0 status: QueryFinish exception_code: 0 +system.part_log +Row 1: +────── +database: default +table: async_insert_landing +partition_id: all +rows: 3 + system.query_log Row 1: ────── @@ -133,3 +149,11 @@ written_rows: 0 written_bytes: 0 status: Exc*****onWhileProcessing exception_code: 395 + +system.part_log +Row 1: +────── +database: default +table: async_insert_landing +partition_id: all +rows: 3 diff --git a/tests/queries/0_stateless/02790_async_queries_in_query_log.sh b/tests/queries/0_stateless/02790_async_queries_in_query_log.sh index d1c8fe122cb..1ff97031acb 100755 --- a/tests/queries/0_stateless/02790_async_queries_in_query_log.sh +++ b/tests/queries/0_stateless/02790_async_queries_in_query_log.sh @@ -28,7 +28,7 @@ function print_flush_query_logs() FROM system.query_log WHERE event_date >= yesterday() - AND initial_query_id = (SELECT flush_query_id FROM system.asynchronous_insert_log WHERE query_id = '$1') + AND initial_query_id = (SELECT flush_query_id FROM system.asynchronous_insert_log WHERE event_date >= yesterday() AND query_id = '$1') -- AND current_database = currentDatabase() -- Just to silence style check: this is not ok for this test since the query uses default values ORDER BY type DESC FORMAT Vertical" @@ -50,7 +50,21 @@ function print_flush_query_logs() FROM system.query_views_log WHERE event_date >= yesterday() - AND initial_query_id = (SELECT flush_query_id FROM system.asynchronous_insert_log WHERE query_id = '$1') + AND initial_query_id = (SELECT flush_query_id FROM system.asynchronous_insert_log WHERE event_date >= yesterday() AND query_id = '$1') + FORMAT Vertical" + + echo "" + echo "system.part_log" + ${CLICKHOUSE_CLIENT} -q " + SELECT + database, + table, + partition_id, + rows + FROM system.part_log + WHERE + event_date >= yesterday() + AND query_id = (SELECT flush_query_id FROM system.asynchronous_insert_log WHERE event_date >= yesterday() AND query_id = '$1') FORMAT Vertical" } diff --git a/tests/queries/0_stateless/01306_benchmark_json.reference b/tests/queries/0_stateless/02790_client_max_opening_fd.reference similarity index 100% rename from tests/queries/0_stateless/01306_benchmark_json.reference rename to tests/queries/0_stateless/02790_client_max_opening_fd.reference diff --git a/tests/queries/0_stateless/02790_client_max_opening_fd.sh b/tests/queries/0_stateless/02790_client_max_opening_fd.sh new file mode 100755 index 00000000000..ecc05d32050 --- /dev/null +++ b/tests/queries/0_stateless/02790_client_max_opening_fd.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Ensure that clickhouse-client does not open a large number of files. +ulimit -n 1024 +${CLICKHOUSE_CLIENT} --query "SELECT 1" diff --git a/tests/queries/0_stateless/02796_projection_date_filter_on_view.sql b/tests/queries/0_stateless/02796_projection_date_filter_on_view.sql index 9d9d7a3abd5..cb26a6bce4f 100644 --- a/tests/queries/0_stateless/02796_projection_date_filter_on_view.sql +++ b/tests/queries/0_stateless/02796_projection_date_filter_on_view.sql @@ -13,7 +13,7 @@ CREATE TABLE fx_1m ( ) ENGINE = MergeTree() PARTITION BY toYear(dt_close) -ORDER BY (symbol, dt_close); +ORDER BY (symbol, dt_close) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; -- add projection ALTER TABLE fx_1m diff --git a/tests/queries/0_stateless/02797_aggregator_huge_mem_usage_bug.reference b/tests/queries/0_stateless/02797_aggregator_huge_mem_usage_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02797_aggregator_huge_mem_usage_bug.sql b/tests/queries/0_stateless/02797_aggregator_huge_mem_usage_bug.sql new file mode 100644 index 00000000000..e204d968382 --- /dev/null +++ b/tests/queries/0_stateless/02797_aggregator_huge_mem_usage_bug.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS v; + +create view v (s LowCardinality(String), n UInt8) as select 'test' as s, toUInt8(number) as n from numbers(10000000); + +-- this is what allows mem usage to go really high +set max_block_size=10000000000; + +set max_memory_usage = '1Gi'; + +select s, sum(n) from v group by s format Null; + +DROP TABLE v; diff --git a/tests/queries/0_stateless/02797_read_subcolumns_from_files.reference b/tests/queries/0_stateless/02797_read_subcolumns_from_files.reference new file mode 100644 index 00000000000..45ea5a7a29f --- /dev/null +++ b/tests/queries/0_stateless/02797_read_subcolumns_from_files.reference @@ -0,0 +1,4 @@ +2 (1,2) 3 +2 (1,2) 3 +0 (0,0) 0 +42 (42,42) 42 diff --git a/tests/queries/0_stateless/02797_read_subcolumns_from_files.sh b/tests/queries/0_stateless/02797_read_subcolumns_from_files.sh new file mode 100755 index 00000000000..767acf68553 --- /dev/null +++ b/tests/queries/0_stateless/02797_read_subcolumns_from_files.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME + +$CLICKHOUSE_LOCAL -q "select ((1, 2), 3)::Tuple(b Tuple(c UInt32, d UInt32), e UInt32) as a format TSV" > $DATA_FILE +$CLICKHOUSE_LOCAL -q "select a.b.d, a.b, a.e from file('$DATA_FILE', TSV, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + +$CLICKHOUSE_LOCAL -q "select ((1, 2), 3)::Tuple(b Tuple(c UInt32, d UInt32), e UInt32) as a format JSONEachRow" > $DATA_FILE +$CLICKHOUSE_LOCAL -q "select a.b.d, a.b, a.e from file('$DATA_FILE', JSONEachRow, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" +$CLICKHOUSE_LOCAL -q "select x.b.d, x.b, x.e from file('$DATA_FILE', JSONEachRow, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" +$CLICKHOUSE_LOCAL -q "select x.b.d, x.b, x.e from file('$DATA_FILE', JSONEachRow, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" + +rm $DATA_FILE + diff --git a/tests/queries/0_stateless/02801_backup_native_copy.sh b/tests/queries/0_stateless/02801_backup_native_copy.sh index 31a7cc3b410..b8ee97a7c7d 100755 --- a/tests/queries/0_stateless/02801_backup_native_copy.sh +++ b/tests/queries/0_stateless/02801_backup_native_copy.sh @@ -29,14 +29,14 @@ $CLICKHOUSE_CLIENT -nm -q " " query_id=$(random_str 10) -$CLICKHOUSE_CLIENT --send_logs_level=error --format Null --query_id $query_id -q "RESTORE TABLE data AS data_native_copy FROM S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_native_copy') SETTINGS allow_s3_native_copy=true" +$CLICKHOUSE_CLIENT --format Null --query_id $query_id -q "RESTORE TABLE data AS data_native_copy FROM S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_native_copy') SETTINGS allow_s3_native_copy=true" $CLICKHOUSE_CLIENT -nm -q " SYSTEM FLUSH LOGS; SELECT query, ProfileEvents['S3CopyObject']>0 FROM system.query_log WHERE type = 'QueryFinish' AND event_date >= yesterday() AND current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' " query_id=$(random_str 10) -$CLICKHOUSE_CLIENT --send_logs_level=error --format Null --query_id $query_id -q "RESTORE TABLE data AS data_no_native_copy FROM S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_no_native_copy') SETTINGS allow_s3_native_copy=false" +$CLICKHOUSE_CLIENT --format Null --query_id $query_id -q "RESTORE TABLE data AS data_no_native_copy FROM S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_no_native_copy') SETTINGS allow_s3_native_copy=false" $CLICKHOUSE_CLIENT -nm -q " SYSTEM FLUSH LOGS; SELECT query, ProfileEvents['S3CopyObject']>0 FROM system.query_log WHERE type = 'QueryFinish' AND event_date >= yesterday() AND current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' diff --git a/tests/queries/0_stateless/02803_backup_tmp_files.reference b/tests/queries/0_stateless/02803_backup_tmp_files.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02803_backup_tmp_files.sh b/tests/queries/0_stateless/02803_backup_tmp_files.sh new file mode 100755 index 00000000000..d86beae4923 --- /dev/null +++ b/tests/queries/0_stateless/02803_backup_tmp_files.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag: no-fasttest - requires S3 + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +set -e + +$CLICKHOUSE_CLIENT -nm -q " + drop table if exists data; + create table data (key Int) engine=MergeTree() order by tuple() settings disk='s3_disk'; + insert into data select * from numbers(10); +" + +$CLICKHOUSE_CLIENT --format Null -q "BACKUP TABLE data TO S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data')" +$CLICKHOUSE_CLIENT --format Null -q "RESTORE TABLE data AS data_native_copy FROM S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data')" diff --git a/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh b/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh index a43cd6deb9e..333bc1bc25d 100755 --- a/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh +++ b/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh @@ -12,7 +12,7 @@ $CLICKHOUSE_CLIENT -nm --query """ DROP TABLE IF EXISTS test; CREATE TABLE test (a Int32, b String) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS disk = disk_s3_disk(type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3_disk); +SETTINGS disk = disk(name = 's3_disk', type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3_disk); """ 2>&1 | grep -q "Disk with name \`s3_disk\` already exist" && echo 'OK' || echo 'FAIL' disk_name="${CLICKHOUSE_TEST_UNIQUE_NAME}" @@ -25,7 +25,7 @@ $CLICKHOUSE_CLIENT -nm --query """ DROP TABLE IF EXISTS test; CREATE TABLE test (a Int32, b String) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS disk = disk_$disk_name(type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3_disk); +SETTINGS disk = disk(name = '$disk_name', type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3_disk); """ $CLICKHOUSE_CLIENT -nm --query """ diff --git a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.reference b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.reference new file mode 100644 index 00000000000..d2c8c83d997 --- /dev/null +++ b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.reference @@ -0,0 +1,5 @@ +OK +1 +0 +1 +0 diff --git a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh new file mode 100755 index 00000000000..b44f9e50513 --- /dev/null +++ b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings + +# set -x + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +disk_name="${CLICKHOUSE_TEST_UNIQUE_NAME}" +$CLICKHOUSE_CLIENT -nm --query """ +DROP TABLE IF EXISTS test; +CREATE TABLE test (a Int32, b String) +ENGINE = MergeTree() ORDER BY tuple() +SETTINGS disk = disk(name = '$disk_name', type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3_disk); + +INSERT INTO test SELECT 1, 'test'; +""" + +query_id=$RANDOM + +$CLICKHOUSE_CLIENT --query_id "$query_id" --query "SELECT * FROM test FORMAT Null SETTINGS enable_filesystem_cache_log = 1" + +$CLICKHOUSE_CLIENT -nm --query """ +SYSTEM DROP FILESYSTEM CACHE '$disk_name' KEY kek; +""" 2>&1 | grep -q "Invalid cache key hex: kek" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_CLIENT} -q " system flush logs" + +key=$($CLICKHOUSE_CLIENT -nm --query """ +SELECT key FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; +""") + +offset=$($CLICKHOUSE_CLIENT -nm --query """ +SELECT offset FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; +""") + +$CLICKHOUSE_CLIENT -nm --query """ +SELECT count() FROM system.filesystem_cache WHERE key = '$key' AND file_segment_range_begin = $offset; +""" + +$CLICKHOUSE_CLIENT -nm --query """ +SYSTEM DROP FILESYSTEM CACHE '$disk_name' KEY $key OFFSET $offset; +""" + +$CLICKHOUSE_CLIENT -nm --query """ +SELECT count() FROM system.filesystem_cache WHERE key = '$key' AND file_segment_range_begin = $offset; +""" + +query_id=$RANDOM$RANDOM + +$CLICKHOUSE_CLIENT --query_id "$query_id" --query "SELECT * FROM test FORMAT Null SETTINGS enable_filesystem_cache_log = 1" + +${CLICKHOUSE_CLIENT} -q " system flush logs" + +key=$($CLICKHOUSE_CLIENT -nm --query """ +SELECT key FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; +""") + +$CLICKHOUSE_CLIENT -nm --query """ +SELECT count() FROM system.filesystem_cache WHERE key = '$key'; +""" + +$CLICKHOUSE_CLIENT -nm --query """ +SYSTEM DROP FILESYSTEM CACHE '$disk_name' KEY $key +""" + +$CLICKHOUSE_CLIENT -nm --query """ +SELECT count() FROM system.filesystem_cache WHERE key = '$key'; +""" diff --git a/tests/queries/0_stateless/02812_csv_date_time_with_comma.reference b/tests/queries/0_stateless/02812_csv_date_time_with_comma.reference new file mode 100644 index 00000000000..f569df13dc1 --- /dev/null +++ b/tests/queries/0_stateless/02812_csv_date_time_with_comma.reference @@ -0,0 +1,2 @@ +2000-01-01 00:00:00 abc +2000-01-01 00:00:00.000 abc diff --git a/tests/queries/0_stateless/02812_csv_date_time_with_comma.sql b/tests/queries/0_stateless/02812_csv_date_time_with_comma.sql new file mode 100644 index 00000000000..ecd3cff6ad0 --- /dev/null +++ b/tests/queries/0_stateless/02812_csv_date_time_with_comma.sql @@ -0,0 +1,3 @@ +select * from format(CSV, 'c1 DateTime, c2 String', '01-01-2000,abc') settings date_time_input_format='best_effort'; +select * from format(CSV, 'c1 DateTime64(3), c2 String', '01-01-2000,abc') settings date_time_input_format='best_effort'; + diff --git a/tests/queries/0_stateless/02812_from_to_utc_timestamp.reference b/tests/queries/0_stateless/02812_from_to_utc_timestamp.reference new file mode 100644 index 00000000000..91c52ebb7c3 --- /dev/null +++ b/tests/queries/0_stateless/02812_from_to_utc_timestamp.reference @@ -0,0 +1,3 @@ +1 2023-03-16 12:22:33 2023-03-16 10:22:33.000 2023-03-15 16:00:00 2023-03-16 19:22:33.000 +2 2023-03-16 12:22:33 2023-03-16 10:22:33.000 2023-03-16 03:22:33 2023-03-16 08:00:00.000 +3 2023-03-16 12:22:33 2023-03-16 10:22:33.000 2023-03-16 03:22:33 2023-03-16 19:22:33.123 diff --git a/tests/queries/0_stateless/02812_from_to_utc_timestamp.sh b/tests/queries/0_stateless/02812_from_to_utc_timestamp.sh new file mode 100755 index 00000000000..59a6399ee2f --- /dev/null +++ b/tests/queries/0_stateless/02812_from_to_utc_timestamp.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +# NOTE: this sh wrapper is required because of shell_config + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists test_tbl" +$CLICKHOUSE_CLIENT -q "create table test_tbl (x UInt32, y DateTime, z DateTime64) engine=MergeTree order by x" +${CLICKHOUSE_CLIENT} -q "INSERT INTO test_tbl values(1, '2023-03-16', '2023-03-16 11:22:33')" +${CLICKHOUSE_CLIENT} -q "INSERT INTO test_tbl values(2, '2023-03-16 11:22:33', '2023-03-16')" +${CLICKHOUSE_CLIENT} -q "INSERT INTO test_tbl values(3, '2023-03-16 11:22:33', '2023-03-16 11:22:33.123456')" +$CLICKHOUSE_CLIENT -q "select x, to_utc_timestamp(toDateTime('2023-03-16 11:22:33'), 'Etc/GMT+1'), from_utc_timestamp(toDateTime64('2023-03-16 11:22:33', 3), 'Etc/GMT+1'), to_utc_timestamp(y, 'Asia/Shanghai'), from_utc_timestamp(z, 'Asia/Shanghai') from test_tbl order by x" +$CLICKHOUSE_CLIENT -q "drop table test_tbl" \ No newline at end of file diff --git a/tests/queries/0_stateless/02812_pointwise_array_operations.reference b/tests/queries/0_stateless/02812_pointwise_array_operations.reference new file mode 100644 index 00000000000..3b1b973fd3f --- /dev/null +++ b/tests/queries/0_stateless/02812_pointwise_array_operations.reference @@ -0,0 +1,23 @@ +[2,5] +[2,6] +[4.5,5,12,10.1] +[(11.1,5.4),(6,21)] +[[13,2],[3]] +[2,2] +[2,3] +[2,4] +[2,5] +[2,6] +[2,2] +[2,3] +[2,4] +[2,5] +[2,6] +[0,0,0] +[(NULL,100000000000000000000),(NULL,1048833)] +[2,2] +[2,3] +[2,4] +[2,5] +[2,6] +[11,1,-2] diff --git a/tests/queries/0_stateless/02812_pointwise_array_operations.sql b/tests/queries/0_stateless/02812_pointwise_array_operations.sql new file mode 100644 index 00000000000..e28c4bda347 --- /dev/null +++ b/tests/queries/0_stateless/02812_pointwise_array_operations.sql @@ -0,0 +1,18 @@ +SELECT (materialize([1,1]) + materialize([1,4])); +SELECT ([1,2] + [1,4]); +SELECT ([2.5, 1, 3, 10.1] + [2, 4, 9, 0]); +SELECT ([(1,3), (2,9)] + [(10.1, 2.4), (4,12)]); +SELECT ([[1,1],[2]]+[[12,1],[1]]); +SELECT ([1,2]+[1,number]) from numbers(5); +SELECT ([1,2::UInt64]+[1,number]) from numbers(5); +SELECT ([materialize(1),materialize(2),materialize(3)]-[1,2,3]); +SELECT [(NULL, 256), (NULL, 256)] + [(1., 100000000000000000000.), (NULL, 1048577)]; +SELECT ([1,2::UInt64]+[1,number]) from numbers(5); +CREATE TABLE my_table (values Array(Int32)) ENGINE = MergeTree() ORDER BY values; +INSERT INTO my_table (values) VALUES ([12, 3, 1]); +SELECT values - [1,2,3] FROM my_table WHERE arrayExists(x -> x > 5, values); +SELECT ([12,13] % [5,6]); -- { serverError 43 } +SELECT ([2,3,4]-[1,-2,10,29]); -- { serverError 190 } +CREATE TABLE a ( x Array(UInt64), y Array(UInt64)) ENGINE = Memory; +INSERT INTO a VALUES ([2,3],[4,5]),([1,2,3], [4,5]),([6,7],[8,9,10]); +SELECT x, y, x+y FROM a; -- { serverError 190 } diff --git a/tests/queries/0_stateless/02813_float_parsing.reference b/tests/queries/0_stateless/02813_float_parsing.reference new file mode 100644 index 00000000000..c83331e0138 --- /dev/null +++ b/tests/queries/0_stateless/02813_float_parsing.reference @@ -0,0 +1,2 @@ +1.7090999999999998 15008753.000000002 6.000000000000001e-9 6.000000000000002e-9 1.7091 15008752 5.9999996e-9 5.9999996e-9 +1.7091 15008753 6e-9 6.000000000000001e-9 1.7091 15008753 6e-9 6e-9 diff --git a/tests/queries/0_stateless/02813_float_parsing.sql b/tests/queries/0_stateless/02813_float_parsing.sql new file mode 100644 index 00000000000..ba57b87f191 --- /dev/null +++ b/tests/queries/0_stateless/02813_float_parsing.sql @@ -0,0 +1,21 @@ +SELECT + toFloat64('1.7091'), + toFloat64('1.5008753E7'), + toFloat64('6e-09'), + toFloat64('6.000000000000001e-9'), + toFloat32('1.7091'), + toFloat32('1.5008753E7'), + toFloat32('6e-09'), + toFloat32('6.000000000000001e-9') +SETTINGS precise_float_parsing = 0; + +SELECT + toFloat64('1.7091'), + toFloat64('1.5008753E7'), + toFloat64('6e-09'), + toFloat64('6.000000000000001e-9'), + toFloat32('1.7091'), + toFloat32('1.5008753E7'), + toFloat32('6e-09'), + toFloat32('6.000000000000001e-9') +SETTINGS precise_float_parsing = 1; diff --git a/tests/queries/0_stateless/02814_create_index_uniq_noop.reference b/tests/queries/0_stateless/02814_create_index_uniq_noop.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02814_create_index_uniq_noop.sql b/tests/queries/0_stateless/02814_create_index_uniq_noop.sql new file mode 100644 index 00000000000..127b3cbdeb6 --- /dev/null +++ b/tests/queries/0_stateless/02814_create_index_uniq_noop.sql @@ -0,0 +1,3 @@ +SET allow_create_index_without_type=1; +SET create_index_ignore_unique=1; +CREATE UNIQUE INDEX idx_tab2_0 ON tab2 (col1); diff --git a/tests/queries/0_stateless/02815_no_throw_in_simple_queries.reference b/tests/queries/0_stateless/02815_no_throw_in_simple_queries.reference new file mode 100644 index 00000000000..1e7d6b54cce --- /dev/null +++ b/tests/queries/0_stateless/02815_no_throw_in_simple_queries.reference @@ -0,0 +1,5 @@ +Aborted +1 +1 +1 +2 diff --git a/tests/queries/0_stateless/02815_no_throw_in_simple_queries.sh b/tests/queries/0_stateless/02815_no_throw_in_simple_queries.sh new file mode 100755 index 00000000000..a5c6de3ce58 --- /dev/null +++ b/tests/queries/0_stateless/02815_no_throw_in_simple_queries.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +export CLICKHOUSE_TERMINATE_ON_ANY_EXCEPTION=1 + +# The environment variable works as expected: +bash -c " + abort_handler() + { + exit 0 + } + trap 'abort_handler' ABRT + $CLICKHOUSE_LOCAL --query 'this is wrong' +" 2>&1 | grep -o 'Aborted' + +# No exceptions are thrown in simple cases: +$CLICKHOUSE_LOCAL --query "SELECT 1" +$CLICKHOUSE_LOCAL --query "SHOW TABLES" +$CLICKHOUSE_LOCAL --query "SELECT * FROM system.tables WHERE database = currentDatabase() FORMAT Null" + +# The same for the client app: +$CLICKHOUSE_CLIENT --query "SELECT 1" +$CLICKHOUSE_CLIENT --query "SHOW TABLES" +$CLICKHOUSE_CLIENT --query "SELECT * FROM system.tables WHERE database = currentDatabase() FORMAT Null" + +# Multi queries are ok: +$CLICKHOUSE_LOCAL --multiquery "SELECT 1; SELECT 2;" + +# It can run in interactive mode: +function run() +{ + command=$1 + expect << EOF + +log_user 0 +set timeout 60 +match_max 100000 + +spawn bash -c "$command" + +expect ":) " + +send -- "SELECT 1\r" +expect "1" +expect ":) " + +send -- "exit\r" +expect eof + +EOF +} + +run "$CLICKHOUSE_LOCAL" diff --git a/tests/queries/0_stateless/02816_clickhouse_local_table_name_expressions.reference b/tests/queries/0_stateless/02816_clickhouse_local_table_name_expressions.reference new file mode 100644 index 00000000000..1e3b82ac136 --- /dev/null +++ b/tests/queries/0_stateless/02816_clickhouse_local_table_name_expressions.reference @@ -0,0 +1,27 @@ +Test 1: check double quotes +1 abc 123 abacaba +2 def 456 bacabaa +3 story 78912 acabaab +4 history 21321321 cabaaba +Test 1a: check double quotes no parsing overflow +1 +Test 1b: check double quotes empty +1 +Test 2: check back quotes +1 abc 123 abacaba +2 def 456 bacabaa +3 story 78912 acabaab +4 history 21321321 cabaaba +Test 2a: check back quotes no parsing overflow +1 +Test 2b: check back quotes empty +1 +Test 3: check literal +1 abc 123 abacaba +2 def 456 bacabaa +3 story 78912 acabaab +4 history 21321321 cabaaba +Test 3a: check literal no parsing overflow +1 +Test 3b: check literal empty +1 diff --git a/tests/queries/0_stateless/02816_clickhouse_local_table_name_expressions.sh b/tests/queries/0_stateless/02816_clickhouse_local_table_name_expressions.sh new file mode 100755 index 00000000000..5cf3b1c88fd --- /dev/null +++ b/tests/queries/0_stateless/02816_clickhouse_local_table_name_expressions.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +dir=${CLICKHOUSE_TEST_UNIQUE_NAME} +[[ -d $dir ]] && rm -rd $dir +mkdir $dir + +# Create temporary csv file for tests +echo '"id","str","int","text"' > $dir/tmp.csv +echo '1,"abc",123,"abacaba"' >> $dir/tmp.csv +echo '2,"def",456,"bacabaa"' >> $dir/tmp.csv +echo '3,"story",78912,"acabaab"' >> $dir/tmp.csv +echo '4,"history",21321321,"cabaaba"' >> $dir/tmp.csv + +################# +echo "Test 1: check double quotes" + +$CLICKHOUSE_LOCAL -q "SELECT * FROM \"${dir}/tmp.csv\"" +################# +echo "Test 1a: check double quotes no parsing overflow" + +$CLICKHOUSE_LOCAL -q "SELECT * FROM \"${dir}/tmp.csv\"\"bad\"" 2>&1 | grep -c "UNKNOWN_TABLE" +################# +echo "Test 1b: check double quotes empty" + +$CLICKHOUSE_LOCAL -q "SELECT * FROM \"\"" 2>&1 | grep -c "SYNTAX_ERROR" +################# +echo "Test 2: check back quotes" + +$CLICKHOUSE_LOCAL -q "SELECT * FROM \`${dir}/tmp.csv\`" +################# +echo "Test 2a: check back quotes no parsing overflow" + +$CLICKHOUSE_LOCAL -q "SELECT * FROM \`${dir}/tmp.csv\`\`bad\`" 2>&1 | grep -c "UNKNOWN_TABLE" +################# +echo "Test 2b: check back quotes empty" + +$CLICKHOUSE_LOCAL -q "SELECT * FROM \`\`" 2>&1 | grep -c "SYNTAX_ERROR" +################# +echo "Test 3: check literal" + +$CLICKHOUSE_LOCAL -q "SELECT * FROM '${dir}/tmp.csv'" +################# +echo "Test 3a: check literal no parsing overflow" + +$CLICKHOUSE_LOCAL -q "SELECT * FROM '${dir}/tmp.csv''bad'" 2>&1 | grep -c "SYNTAX_ERROR" +################# +echo "Test 3b: check literal empty" + +$CLICKHOUSE_LOCAL -q "SELECT * FROM ''" 2>&1 | grep -c "SYNTAX_ERROR" + +# Remove temporary dir with files +rm -rd $dir diff --git a/tests/queries/0_stateless/02816_has_token_empty.reference b/tests/queries/0_stateless/02816_has_token_empty.reference index aa47d0d46d4..8435d77c5fe 100644 --- a/tests/queries/0_stateless/02816_has_token_empty.reference +++ b/tests/queries/0_stateless/02816_has_token_empty.reference @@ -1,2 +1,6 @@ 0 +\N +\N 0 +\N +\N diff --git a/tests/queries/0_stateless/02816_has_token_empty.sql b/tests/queries/0_stateless/02816_has_token_empty.sql index e5d6156debd..3e00959126b 100644 --- a/tests/queries/0_stateless/02816_has_token_empty.sql +++ b/tests/queries/0_stateless/02816_has_token_empty.sql @@ -2,6 +2,10 @@ SELECT hasTokenCaseInsensitive('K(G', ''); -- { serverError BAD_ARGUMENTS } SELECT hasTokenCaseInsensitive('Hello', ''); -- { serverError BAD_ARGUMENTS } SELECT hasTokenCaseInsensitive('', ''); -- { serverError BAD_ARGUMENTS } SELECT hasTokenCaseInsensitive('', 'Hello'); +SELECT hasTokenCaseInsensitiveOrNull('Hello', ''); +SELECT hasTokenCaseInsensitiveOrNull('', ''); SELECT hasToken('Hello', ''); -- { serverError BAD_ARGUMENTS } SELECT hasToken('', 'Hello'); SELECT hasToken('', ''); -- { serverError BAD_ARGUMENTS } +SELECT hasTokenOrNull('', ''); +SELECT hasTokenOrNull('Hello', ''); diff --git a/tests/queries/0_stateless/02817_structure_to_schema.reference b/tests/queries/0_stateless/02817_structure_to_schema.reference new file mode 100644 index 00000000000..1f39a8ed50e --- /dev/null +++ b/tests/queries/0_stateless/02817_structure_to_schema.reference @@ -0,0 +1,466 @@ +CapnProto +Numbers + +struct Message +{ + int8 @0 : Int8; + uint8 @1 : UInt8; + int16 @2 : Int16; + uint16 @3 : UInt16; + int32 @4 : Int32; + uint32 @5 : UInt32; + int64 @6 : Int64; + uint64 @7 : UInt64; + int128 @8 : Data; + uint128 @9 : Data; + int256 @10 : Data; + uint256 @11 : Data; + float32 @12 : Float32; + float64 @13 : Float64; + decimal32 @14 : Int32; + decimal64 @15 : Int64; + decimal128 @16 : Data; + decimal256 @17 : Data; +} +Dates + +struct Message +{ + data @0 : UInt16; + date32 @1 : Int32; + datetime @2 : UInt32; + datatime64 @3 : Int64; +} +Strings + +struct Message +{ + string @0 : Data; + fixedstring @1 : Data; +} +Special + +struct Message +{ + ipv4 @0 : UInt32; + ipv6 @1 : Data; + uuid @2 : Data; +} +Nullable + +struct Message +{ + struct Nullable + { + union + { + value @0 : UInt32; + null @1 : Void; + } + } + nullable @0 : Nullable; +} +Enums + +struct Message +{ + enum Enum8 + { + v1 @0; + v2 @1; + v3 @2; + v4 @3; + } + enum8 @0 : Enum8; + enum Enum16 + { + v5 @0; + v6 @1; + v7 @2; + v8 @3; + v9 @4; + } + enum16 @1 : Enum16; +} +Arrays + +struct Message +{ + arr1 @0 : List(UInt32); + arr2 @1 : List(List(List(UInt32))); +} +Tuples + +struct Message +{ + struct Tuple1 + { + e1 @0 : UInt32; + e2 @1 : Data; + e3 @2 : UInt32; + } + tuple1 @0 : Tuple1; + struct Tuple2 + { + struct E1 + { + e1 @0 : UInt32; + struct E2 + { + e1 @0 : Data; + e2 @1 : UInt32; + } + e2 @1 : E2; + e3 @2 : Data; + } + e1 @0 : E1; + struct E2 + { + e1 @0 : Data; + e2 @1 : UInt32; + } + e2 @1 : E2; + } + tuple2 @1 : Tuple2; +} +Maps + +struct Message +{ + struct Map1 + { + struct Entry + { + key @0 : Data; + value @1 : UInt32; + } + entries @0 : List(Entry); + } + map1 @0 : Map1; + struct Map2 + { + struct Entry + { + struct Value + { + struct Entry + { + struct Value + { + struct Entry + { + key @0 : Data; + value @1 : UInt32; + } + entries @0 : List(Entry); + } + key @0 : Data; + value @1 : Value; + } + entries @0 : List(Entry); + } + key @0 : Data; + value @1 : Value; + } + entries @0 : List(Entry); + } + map2 @1 : Map2; +} +Complex + +struct Message +{ + struct C1 + { + struct E1 + { + struct Entry + { + struct Value + { + union + { + value @0 : UInt32; + null @1 : Void; + } + } + key @0 : Data; + value @1 : List(List(Value)); + } + entries @0 : List(Entry); + } + e1 @0 : List(E1); + struct E2 + { + struct Entry + { + struct Value + { + struct E1 + { + union + { + value @0 : Data; + null @1 : Void; + } + } + e1 @0 : List(List(E1)); + struct E2 + { + e1 @0 : UInt32; + struct E2 + { + struct E1 + { + union + { + value @0 : Data; + null @1 : Void; + } + } + e1 @0 : List(List(E1)); + e2 @1 : UInt32; + } + e2 @1 : E2; + } + e2 @1 : List(E2); + } + key @0 : Data; + value @1 : Value; + } + entries @0 : List(Entry); + } + e2 @1 : List(E2); + } + c1 @0 : C1; +} +Read/write with no schema +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +Output schema + +struct Message +{ + number @0 : UInt64; +} +Bad output schema path +2 +2 +Protobuf +Numbers + +message Message +{ + int32 int8 = 1; + uint32 uint8 = 2; + int32 int16 = 3; + uint32 uint16 = 4; + int32 int32 = 5; + uint32 uint32 = 6; + int64 int64 = 7; + uint64 uint64 = 8; + bytes int128 = 9; + bytes uint128 = 10; + bytes int256 = 11; + bytes uint256 = 12; + float float32 = 13; + double float64 = 14; + bytes decimal32 = 15; + bytes decimal64 = 16; + bytes decimal128 = 17; + bytes decimal256 = 18; +} +Dates + +message Message +{ + uint32 data = 1; + int32 date32 = 2; + uint32 datetime = 3; + uint64 datatime64 = 4; +} +Strings + +message Message +{ + bytes string = 1; + bytes fixedstring = 2; +} +Special + +message Message +{ + uint32 ipv4 = 1; + bytes ipv6 = 2; + bytes uuid = 3; +} +Nullable + +message Message +{ + uint32 nullable = 1; +} +Enums + +message Message +{ + enum Enum8 + { + v1 = 0; + v2 = 1; + v3 = 2; + v4 = 3; + } + Enum8 enum8 = 1; + enum Enum16 + { + v5 = 0; + v6 = 1; + v7 = 2; + v8 = 3; + v9 = 4; + } + Enum16 enum16 = 2; +} +Arrays + +message Message +{ + repeated uint32 arr1 = 1; + message Arr2 + { + message Arr2 + { + repeated uint32 arr2 = 1; + } + repeated Arr2 arr2 = 1; + } + repeated Arr2 arr2 = 2; +} +Tuples + +message Message +{ + message Tuple1 + { + uint32 e1 = 1; + bytes e2 = 2; + uint32 e3 = 3; + } + Tuple1 tuple1 = 1; + message Tuple2 + { + message E1 + { + uint32 e1 = 1; + message E2 + { + bytes e1 = 1; + uint32 e2 = 2; + } + E2 e2 = 2; + bytes e3 = 3; + } + E1 e1 = 1; + message E2 + { + bytes e1 = 1; + uint32 e2 = 2; + } + E2 e2 = 2; + } + Tuple2 tuple2 = 2; +} +Maps + +message Message +{ + map map1 = 1; + message Map2Value + { + message Map2ValueValue + { + map map2ValueValue = 1; + } + map map2Value = 1; + } + map map2 = 2; +} +Complex + +message Message +{ + message C1 + { + message E1 + { + message E1Value + { + message E1Value + { + repeated uint32 e1Value = 1; + } + repeated E1Value e1Value = 1; + } + map e1 = 1; + } + repeated E1 e1 = 1; + message E2 + { + message E2Value + { + message E1 + { + repeated bytes e1 = 1; + } + repeated E1 e1 = 1; + message E2 + { + uint32 e1 = 1; + message E2 + { + message E1 + { + repeated bytes e1 = 1; + } + repeated E1 e1 = 1; + uint32 e2 = 2; + } + E2 e2 = 2; + } + repeated E2 e2 = 2; + } + map e2 = 1; + } + repeated E2 e2 = 2; + } + C1 c1 = 1; +} +Read/write with no schema +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +Output schema + +message Message +{ + uint64 number = 1; +} +Bad output schema path +2 +2 diff --git a/tests/queries/0_stateless/02817_structure_to_schema.sh b/tests/queries/0_stateless/02817_structure_to_schema.sh new file mode 100755 index 00000000000..3b79fa099a8 --- /dev/null +++ b/tests/queries/0_stateless/02817_structure_to_schema.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME-data +SCHEMA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME-schema + +function test_structure() +{ + format=$1 + ext=$2 + structure=$3 + + $CLICKHOUSE_LOCAL -q "select structureTo${format}Schema('$structure') format TSVRaw" > $SCHEMA_FILE.$ext + tail -n +2 $SCHEMA_FILE.$ext + + $CLICKHOUSE_LOCAL -q "select * from generateRandom('$structure', 42) limit 10 format $format settings format_schema='$SCHEMA_FILE:Message', format_capn_proto_enum_comparising_mode='by_names'" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "select * from file('$DATA_FILE', $format, '$structure') format Null settings format_schema='$SCHEMA_FILE:Message', format_capn_proto_enum_comparising_mode='by_names'" + +} + +function test_format() +{ + format=$1 + ext=$2 + + echo $format + + echo Numbers + numbers='int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, int128 Int128, uint128 UInt128, int256 Int256, uint256 UInt256, float32 Float32, float64 Float64, decimal32 Decimal32(3), decimal64 Decimal64(10), decimal128 Decimal128(20), decimal256 Decimal256(40)' + test_structure $format $ext "$numbers" + + echo Dates + dates='data Date, date32 Date32, datetime DateTime, datatime64 DateTime64(9)' + test_structure $format $ext "$dates" + + echo Strings + strings='string String, fixedstring FixedString(42)' + test_structure $format $ext "$strings" + + echo Special + special='ipv4 IPv4, ipv6 IPv6, uuid UUID' + test_structure $format $ext "$special" + + echo Nullable + nullable='nullable Nullable(UInt32)' + test_structure $format $ext "$nullable" + + echo Enums + enums="enum8 Enum8(''v1'' = -100, ''v2'' = -10, ''v3'' = 0, ''v4'' = 42), enum16 Enum16(''v5'' = -2000, ''v6'' = -1000, ''v7'' = 0, ''v8'' = 1000, ''v9'' = 2000)" + test_structure $format $ext "$enums" + + echo Arrays + arrays='arr1 Array(UInt32), arr2 Array(Array(Array(UInt32)))' + test_structure $format $ext "$arrays" + + echo Tuples + tuples='tuple1 Tuple(e1 UInt32, e2 String, e3 DateTime), tuple2 Tuple(e1 Tuple(e1 UInt32, e2 Tuple(e1 String, e2 DateTime), e3 String), e2 Tuple(e1 String, e2 UInt32))' + test_structure $format $ext "$tuples" + + echo Maps + maps='map1 Map(String, UInt32), map2 Map(String, Map(String, Map(String, UInt32)))' + test_structure $format $ext "$maps" + + echo Complex + complex='c1 Array(Tuple(e1 Map(String, Array(Array(Nullable(UInt32)))), e2 Map(String, Tuple(e1 Array(Array(Nullable(String))), e2 Nested(e1 UInt32, e2 Tuple(e1 Array(Array(Nullable(String))), e2 UInt32))))))' + test_structure $format $ext "$complex" + + echo "Read/write with no schema" + $CLICKHOUSE_LOCAL -q "select * from numbers(10) format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "select * from file('$DATA_FILE', $format, 'number UInt64')" + + echo "Output schema" + $CLICKHOUSE_LOCAL -q "select * from numbers(10) format $format settings output_format_schema='$SCHEMA_FILE.$ext'" > $DATA_FILE + tail -n +2 $SCHEMA_FILE.$ext + + echo "Bad output schema path" + $CLICKHOUSE_CLIENT -q "insert into function file('$DATA_FILE', $format) select * from numbers(10) settings output_format_schema='/tmp/schema.$ext'" 2>&1 | grep "BAD_ARGUMENTS" -c + $CLICKHOUSE_CLIENT -q "insert into function file('$DATA_FILE', $format) select * from numbers(10) settings output_format_schema='../../schema.$ext'" 2>&1 | grep "BAD_ARGUMENTS" -c +} + +test_format CapnProto capnp +test_format Protobuf proto + +rm $DATA_FILE +rm $SCHEMA_FILE* + diff --git a/tests/queries/0_stateless/02832_alter_delete_indexes_projections.reference b/tests/queries/0_stateless/02832_alter_delete_indexes_projections.reference new file mode 100644 index 00000000000..f14acdf9e6d --- /dev/null +++ b/tests/queries/0_stateless/02832_alter_delete_indexes_projections.reference @@ -0,0 +1,6 @@ +2 +0 +3355402240 +3355402240 +3321851904 +3321851904 diff --git a/tests/queries/0_stateless/02832_alter_delete_indexes_projections.sql b/tests/queries/0_stateless/02832_alter_delete_indexes_projections.sql new file mode 100644 index 00000000000..399d0fba564 --- /dev/null +++ b/tests/queries/0_stateless/02832_alter_delete_indexes_projections.sql @@ -0,0 +1,26 @@ +set mutations_sync = 2; + +drop table if exists t_delete_skip_index; + +create table t_delete_skip_index (x UInt32, y String, index i y type minmax granularity 3) engine = MergeTree order by tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +insert into t_delete_skip_index select number, toString(number) from numbers(8192 * 10); + +select count() from t_delete_skip_index where y in (4, 5); +alter table t_delete_skip_index delete where x < 8192; +select count() from t_delete_skip_index where y in (4, 5); + +drop table if exists t_delete_skip_index; +drop table if exists t_delete_projection; + +create table t_delete_projection (x UInt32, y UInt64, projection p (select sum(y))) engine = MergeTree order by tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +insert into t_delete_projection select number, toString(number) from numbers(8192 * 10); + +select sum(y) from t_delete_projection settings optimize_use_projections = 0; +select sum(y) from t_delete_projection settings optimize_use_projections = 0, force_optimize_projection = 1; + +alter table t_delete_projection delete where x < 8192; + +select sum(y) from t_delete_projection settings optimize_use_projections = 0; +select sum(y) from t_delete_projection settings optimize_use_projections = 0, force_optimize_projection = 1; + +drop table if exists t_delete_projection; diff --git a/tests/queries/0_stateless/02832_alter_max_sessions_for_user.reference b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.reference new file mode 100644 index 00000000000..f80f8738ff8 --- /dev/null +++ b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.reference @@ -0,0 +1,12 @@ +test_alter_profile case: max_session_count 1 alter_sessions_count 1 +test_alter_profile case: max_session_count 2 alter_sessions_count 1 +USER_SESSION_LIMIT_EXCEEDED +test_alter_profile case: max_session_count 1 alter_sessions_count 2 +test_alter_profile case: max_session_count 2 alter_sessions_count 2 +READONLY +READONLY +READONLY +READONLY +READONLY +READONLY +READONLY diff --git a/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh new file mode 100755 index 00000000000..546c54a4de9 --- /dev/null +++ b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +SESSION_ID_PREFIX="02832_alter_max_sessions_session_$$" +PROFILE="02832_alter_max_sessions_profile_$$" +USER="02832_alter_max_sessions_user_$$" +USER2="02832_alter_max_sessions_user_two_$$" +ROLE="02832_alter_max_sessions_role_$$" + +${CLICKHOUSE_CLIENT} -q $"DROP USER IF EXISTS '${USER}'" +${CLICKHOUSE_CLIENT} -q $"DROP PROFILE IF EXISTS ${PROFILE}" +${CLICKHOUSE_CLIENT} -q $"CREATE SETTINGS PROFILE ${PROFILE}" +${CLICKHOUSE_CLIENT} -q $"CREATE USER '${USER}' SETTINGS PROFILE '${PROFILE}'" + +function test_alter_profile() +{ + local max_session_count="$1" + local alter_sessions_count="$2" + echo $"test_alter_profile case: max_session_count ${max_session_count} alter_sessions_count ${alter_sessions_count}" + + ${CLICKHOUSE_CLIENT} -q $"ALTER SETTINGS PROFILE ${PROFILE} SETTINGS max_sessions_for_user = ${max_session_count}" + + # Create sesssions with $max_session_count resriction + for ((i = 1 ; i <= ${max_session_count} ; i++)); do + local session_id="${SESSION_ID_PREFIX}_${i}" + # Skip output from this query + ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${USER}&session_id=${session_id}&session_check=0" --data-binary "SELECT 1" > /dev/null + done + + # Update resriction to $alter_sessions_count + ${CLICKHOUSE_CLIENT} -q $"ALTER SETTINGS PROFILE ${PROFILE} SETTINGS max_sessions_for_user = ${alter_sessions_count}" + + # Simultaneous sessions should use max settings from profile ($alter_sessions_count) + for ((i = 1 ; i <= ${max_session_count} ; i++)); do + local session_id="${SESSION_ID_PREFIX}_${i}" + # ignore select 1, we need only errors + ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${USER}&session_id=${session_id}&session_check=1" --data-binary "select sleep(0.3)" | grep -o -m 1 'USER_SESSION_LIMIT_EXCEEDED' & + done + + wait +} + +test_alter_profile 1 1 +test_alter_profile 2 1 +test_alter_profile 1 2 +test_alter_profile 2 2 + +${CLICKHOUSE_CLIENT} -q "SELECT 1 SETTINGS max_sessions_for_user = 1" 2>&1 | grep -m 1 -o 'READONLY' | head -1 +${CLICKHOUSE_CLIENT} -q $"SET max_sessions_for_user = 1 " 2>&1 | grep -o -m 1 'READONLY' | head -1 +${CLICKHOUSE_CLIENT} --max_sessions_for_user=1 -q $"SELECT 1 " 2>&1 | grep -o -m 1 'READONLY' | head -1 +# max_sessions_for_user is profile setting +${CLICKHOUSE_CLIENT} -q $"CREATE USER ${USER2} SETTINGS max_sessions_for_user = 1 " 2>&1 | grep -o -m 1 'READONLY' | head -1 +${CLICKHOUSE_CLIENT} -q $"ALTER USER ${USER} SETTINGS max_sessions_for_user = 1" 2>&1 | grep -o -m 1 'READONLY' | head -1 +${CLICKHOUSE_CLIENT} -q $"CREATE ROLE ${ROLE} SETTINGS max_sessions_for_user = 1" 2>&1 | grep -o -m 1 'READONLY' | head -1 +${CLICKHOUSE_CLIENT} -q $"CREATE ROLE ${ROLE}" +${CLICKHOUSE_CLIENT} -q $"ALTER ROLE ${ROLE} SETTINGS max_sessions_for_user = 1 " 2>&1 | grep -o -m 1 'READONLY' | head -1 + +${CLICKHOUSE_CLIENT} -q $"DROP USER IF EXISTS '${USER}'" +${CLICKHOUSE_CLIENT} -q $"DROP USER IF EXISTS '${USER2}'" +${CLICKHOUSE_CLIENT} -q $"DROP PROFILE IF EXISTS ${PROFILE}" +${CLICKHOUSE_CLIENT} -q $"DROP ROLE IF EXISTS ${ROLE}" diff --git a/tests/queries/0_stateless/02833_array_join_columns.reference b/tests/queries/0_stateless/02833_array_join_columns.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02833_array_join_columns.sql b/tests/queries/0_stateless/02833_array_join_columns.sql new file mode 100644 index 00000000000..3f9a33a3959 --- /dev/null +++ b/tests/queries/0_stateless/02833_array_join_columns.sql @@ -0,0 +1,19 @@ +drop table if exists test_array_joins; +drop table if exists v4test_array_joins; + +create table test_array_joins +( + id UInt64 default rowNumberInAllBlocks() + 1, + arr_1 Array(String), + arr_2 Array(String), + arr_3 Array(String), + arr_4 Array(String) +) engine = MergeTree order by id; + +insert into test_array_joins (id,arr_1, arr_2, arr_3, arr_4) +SELECT number,array(randomPrintableASCII(3)),array(randomPrintableASCII(3)),array(randomPrintableASCII(3)),array(randomPrintableASCII(3)) +from numbers(1000); + +create view v4test_array_joins as SELECT * from test_array_joins where id != 10; + +select * from v4test_array_joins array join columns('^arr') where match(arr_4,'a') and id < 100 order by id format Null settings optimize_read_in_order = 0; diff --git a/tests/queries/0_stateless/02833_concurrrent_sessions.reference b/tests/queries/0_stateless/02833_concurrrent_sessions.reference new file mode 100644 index 00000000000..bfe507e8eac --- /dev/null +++ b/tests/queries/0_stateless/02833_concurrrent_sessions.reference @@ -0,0 +1,34 @@ +sessions: +150 +port_0_sessions: +0 +address_0_sessions: +0 +tcp_sessions +60 +http_sessions +30 +http_with_session_id_sessions +30 +my_sql_sessions +30 +Corresponding LoginSuccess/Logout +10 +LoginFailure +10 +Corresponding LoginSuccess/Logout +10 +LoginFailure +10 +Corresponding LoginSuccess/Logout +10 +LoginFailure +10 +Corresponding LoginSuccess/Logout +10 +LoginFailure +10 +Corresponding LoginSuccess/Logout +10 +LoginFailure +10 diff --git a/tests/queries/0_stateless/02833_concurrrent_sessions.sh b/tests/queries/0_stateless/02833_concurrrent_sessions.sh new file mode 100755 index 00000000000..c5b6204529b --- /dev/null +++ b/tests/queries/0_stateless/02833_concurrrent_sessions.sh @@ -0,0 +1,143 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-debug + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +readonly PID=$$ + +# Each user uses a separate thread. +readonly TCP_USERS=( "02833_TCP_USER_${PID}"_{1,2} ) # 2 concurrent TCP users +readonly HTTP_USERS=( "02833_HTTP_USER_${PID}" ) +readonly HTTP_WITH_SESSION_ID_SESSION_USERS=( "02833_HTTP_WITH_SESSION_ID_USER_${PID}" ) +readonly MYSQL_USERS=( "02833_MYSQL_USER_${PID}") +readonly ALL_USERS=( "${TCP_USERS[@]}" "${HTTP_USERS[@]}" "${HTTP_WITH_SESSION_ID_SESSION_USERS[@]}" "${MYSQL_USERS[@]}" ) + +TCP_USERS_SQL_COLLECTION_STRING="$( echo "${TCP_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )" +readonly TCP_USERS_SQL_COLLECTION_STRING +HTTP_USERS_SQL_COLLECTION_STRING="$( echo "${HTTP_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )" +readonly HTTP_USERS_SQL_COLLECTION_STRING +HTTP_WITH_SESSION_ID_USERS_SQL_COLLECTION_STRING="$( echo "${HTTP_WITH_SESSION_ID_SESSION_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )" +readonly HTTP_WITH_SESSION_ID_USERS_SQL_COLLECTION_STRING +MYSQL_USERS_SQL_COLLECTION_STRING="$( echo "${MYSQL_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )" +readonly MYSQL_USERS_SQL_COLLECTION_STRING +ALL_USERS_SQL_COLLECTION_STRING="$( echo "${ALL_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )" +readonly ALL_USERS_SQL_COLLECTION_STRING + +readonly SESSION_LOG_MATCHING_FIELDS="auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface" + +for user in "${ALL_USERS[@]}"; do + ${CLICKHOUSE_CLIENT} -q "CREATE USER IF NOT EXISTS ${user} IDENTIFIED WITH plaintext_password BY 'pass'" + ${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.* TO ${user}" + ${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON INFORMATION_SCHEMA.* TO ${user}"; +done + +# All _session functions execute in separate threads. +# These functions try to create a session with successful login and logout. +# Sleep a small, random amount of time to make concurrency more intense. +# and try to login with an invalid password. +function tcp_session() +{ + local user=$1 + local i=0 + while (( (i++) < 10 )); do + # login logout + ${CLICKHOUSE_CLIENT} -q "SELECT 1, sleep(0.01${RANDOM})" --user="${user}" --password="pass" + # login failure + ${CLICKHOUSE_CLIENT} -q "SELECT 2" --user="${user}" --password 'invalid' + done +} + +function http_session() +{ + local user=$1 + local i=0 + while (( (i++) < 10 )); do + # login logout + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=pass" -d "SELECT 3, sleep(0.01${RANDOM})" + + # login failure + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=wrong" -d "SELECT 4" + done +} + +function http_with_session_id_session() +{ + local user=$1 + local i=0 + while (( (i++) < 10 )); do + # login logout + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${user}&user=${user}&password=pass" -d "SELECT 5, sleep 0.01${RANDOM}" + + # login failure + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${user}&user=${user}&password=wrong" -d "SELECT 6" + done +} + +function mysql_session() +{ + local user=$1 + local i=0 + while (( (i++) < 10 )); do + # login logout + ${CLICKHOUSE_CLIENT} -q "SELECT 1, sleep(0.01${RANDOM}) FROM mysql('127.0.0.1:9004', 'system', 'one', '${user}', 'pass')" + + # login failure + ${CLICKHOUSE_CLIENT} -q "SELECT 1 FROM mysql('127.0.0.1:9004', 'system', 'one', '${user}', 'wrong', SETTINGS connection_max_tries=1)" + done +} + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" +${CLICKHOUSE_CLIENT} -q "DELETE FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING})" + +export -f tcp_session; +export -f http_session; +export -f http_with_session_id_session; +export -f mysql_session; + +for user in "${TCP_USERS[@]}"; do + timeout 60s bash -c "tcp_session ${user}" >/dev/null 2>&1 & +done + +for user in "${HTTP_USERS[@]}"; do + timeout 60s bash -c "http_session ${user}" >/dev/null 2>&1 & +done + +for user in "${HTTP_WITH_SESSION_ID_SESSION_USERS[@]}"; do + timeout 60s bash -c "http_with_session_id_session ${user}" >/dev/null 2>&1 & +done + +for user in "${MYSQL_USERS[@]}"; do + timeout 60s bash -c "mysql_session ${user}" >/dev/null 2>&1 & +done + +wait + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" + +echo "sessions:" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING})" + +echo "port_0_sessions:" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING}) AND client_port = 0" + +echo "address_0_sessions:" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING}) AND client_address = toIPv6('::')" + +echo "tcp_sessions" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${TCP_USERS_SQL_COLLECTION_STRING}) AND interface = 'TCP'" +echo "http_sessions" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${HTTP_USERS_SQL_COLLECTION_STRING}) AND interface = 'HTTP'" +echo "http_with_session_id_sessions" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${HTTP_WITH_SESSION_ID_USERS_SQL_COLLECTION_STRING}) AND interface = 'HTTP'" +echo "my_sql_sessions" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${MYSQL_USERS_SQL_COLLECTION_STRING}) AND interface = 'MySQL'" + +for user in "${ALL_USERS[@]}"; do + ${CLICKHOUSE_CLIENT} -q "DROP USER ${user}" + echo "Corresponding LoginSuccess/Logout" + ${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM (SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${user}' AND type = 'LoginSuccess' INTERSECT SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${user}' AND type = 'Logout')" + echo "LoginFailure" + ${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM system.session_log WHERE user = '${user}' AND type = 'LoginFailure'" + done diff --git a/tests/queries/0_stateless/02833_starts_ends_with_utf8.reference b/tests/queries/0_stateless/02833_starts_ends_with_utf8.reference new file mode 100644 index 00000000000..ca2a5bc50f8 --- /dev/null +++ b/tests/queries/0_stateless/02833_starts_ends_with_utf8.reference @@ -0,0 +1,29 @@ +-- { echoOn } +select startsWithUTF8('富强民主文明和谐', '富强'); +1 +select startsWithUTF8('富强民主文明和谐', '\xe5'); +0 +select startsWithUTF8('富强民主文明和谐', ''); +1 +SELECT startsWithUTF8('123', '123'); +1 +SELECT startsWithUTF8('123', '12'); +1 +SELECT startsWithUTF8('123', '1234'); +0 +SELECT startsWithUTF8('123', ''); +1 +select endsWithUTF8('富强民主文明和谐', '和谐'); +1 +select endsWithUTF8('富强民主文明和谐', '\x90'); +0 +select endsWithUTF8('富强民主文明和谐', ''); +1 +SELECT endsWithUTF8('123', '3'); +1 +SELECT endsWithUTF8('123', '23'); +1 +SELECT endsWithUTF8('123', '32'); +0 +SELECT endsWithUTF8('123', ''); +1 diff --git a/tests/queries/0_stateless/02833_starts_ends_with_utf8.sql b/tests/queries/0_stateless/02833_starts_ends_with_utf8.sql new file mode 100644 index 00000000000..3a783dc280e --- /dev/null +++ b/tests/queries/0_stateless/02833_starts_ends_with_utf8.sql @@ -0,0 +1,19 @@ +-- { echoOn } +select startsWithUTF8('富强民主文明和谐', '富强'); +select startsWithUTF8('富强民主文明和谐', '\xe5'); +select startsWithUTF8('富强民主文明和谐', ''); + +SELECT startsWithUTF8('123', '123'); +SELECT startsWithUTF8('123', '12'); +SELECT startsWithUTF8('123', '1234'); +SELECT startsWithUTF8('123', ''); + +select endsWithUTF8('富强民主文明和谐', '和谐'); +select endsWithUTF8('富强民主文明和谐', '\x90'); +select endsWithUTF8('富强民主文明和谐', ''); + +SELECT endsWithUTF8('123', '3'); +SELECT endsWithUTF8('123', '23'); +SELECT endsWithUTF8('123', '32'); +SELECT endsWithUTF8('123', ''); +-- { echoOff } diff --git a/tests/queries/0_stateless/02833_tuple_concat.reference b/tests/queries/0_stateless/02833_tuple_concat.reference new file mode 100644 index 00000000000..2c865f13ffc --- /dev/null +++ b/tests/queries/0_stateless/02833_tuple_concat.reference @@ -0,0 +1,6 @@ +(1,'y',2,'n') +(1,'y',2,'n',3,'n') +(1,2,3,'a','b','c','2020-10-08','2020-11-08') 1 2 3 a b c 2020-10-08 2020-11-08 +(1,2,1,2) 1 2 1 2 +(1,2,3,4) 1 2 3 4 +(3,4,1,2) 3 4 1 2 diff --git a/tests/queries/0_stateless/02833_tuple_concat.sql b/tests/queries/0_stateless/02833_tuple_concat.sql new file mode 100644 index 00000000000..df43e08d595 --- /dev/null +++ b/tests/queries/0_stateless/02833_tuple_concat.sql @@ -0,0 +1,23 @@ +SELECT tupleConcat(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tupleConcat((1, 'y'), 1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT tupleConcat((1, 'y'), (2, 'n')); +SELECT tupleConcat((1, 'y'), (2, 'n'), (3, 'n')); + +WITH (1,2,3) || ('a','b','c') || ('2020-10-08'::Date, '2020-11-08'::Date) AS t +SELECT t, t.1, t.2, t.3, t.4, t.5, t.6, t.7, t.8; + +DROP TABLE IF EXISTS t_02833; +CREATE TABLE t_02833 (tup Tuple(a UInt64, b UInt64)) ENGINE=Log; +INSERT INTO t_02833 VALUES ((1, 2)); + +WITH (tup || tup) AS res +SELECT res, res.1, res.2, res.3, res.4 FROM t_02833; + +WITH (tup || (3, 4)) AS res +SELECT res, res.1, res.2, res.3, res.4 FROM t_02833; + +WITH ((3, 4) || tup) AS res +SELECT res, res.1, res.2, res.3, res.4 FROM t_02833; + +DROP TABLE t_02833; diff --git a/tests/queries/0_stateless/02833_url_without_path_encoding.reference b/tests/queries/0_stateless/02833_url_without_path_encoding.reference new file mode 100644 index 00000000000..d5626230d71 --- /dev/null +++ b/tests/queries/0_stateless/02833_url_without_path_encoding.reference @@ -0,0 +1,2 @@ +4 +test%2Fa.tsv diff --git a/tests/queries/0_stateless/02833_url_without_path_encoding.sh b/tests/queries/0_stateless/02833_url_without_path_encoding.sh new file mode 100755 index 00000000000..b71586099cf --- /dev/null +++ b/tests/queries/0_stateless/02833_url_without_path_encoding.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "select count() from url('http://localhost:11111/test%2Fa.tsv') settings enable_url_encoding=1" + +# Grep 'test%2Fa.tsv' to ensure that path wasn't encoded/decoded +$CLICKHOUSE_CLIENT -q "select count() from url('http://localhost:11111/test%2Fa.tsv') settings enable_url_encoding=0" 2>&1 | grep -o "test%2Fa.tsv" -m1 + diff --git a/tests/queries/0_stateless/02833_window_func_range_offset.reference b/tests/queries/0_stateless/02833_window_func_range_offset.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02833_window_func_range_offset.sql b/tests/queries/0_stateless/02833_window_func_range_offset.sql new file mode 100644 index 00000000000..f1d26c5cbaf --- /dev/null +++ b/tests/queries/0_stateless/02833_window_func_range_offset.sql @@ -0,0 +1,6 @@ +-- invalid start offset with RANGE +SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN 0.0 PRECEDING AND UNBOUNDED FOLLOWING); -- { serverError BAD_ARGUMENTS } +SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN nan PRECEDING AND UNBOUNDED FOLLOWING); -- { serverError BAD_ARGUMENTS } +-- invalid end offset with RANGE +SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN UNBOUNDED PRECEDING AND 0.0 FOLLOWING); -- { serverError BAD_ARGUMENTS } +SELECT count() OVER (ORDER BY 3.4028234663852886e38 RANGE BETWEEN UNBOUNDED PRECEDING AND nan FOLLOWING); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02834_alter_exception.reference b/tests/queries/0_stateless/02834_alter_exception.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02834_alter_exception.sql b/tests/queries/0_stateless/02834_alter_exception.sql new file mode 100644 index 00000000000..d42f40fcbf7 --- /dev/null +++ b/tests/queries/0_stateless/02834_alter_exception.sql @@ -0,0 +1,4 @@ +DROP TABLE IF EXISTS alter_02834; +CREATE TABLE alter_02834 (a UInt64) ENGINE=MergeTree() ORDER BY a; +ALTER TABLE alter_02834 MODIFY QUERY SELECT a FROM alter_02834; -- { serverError NOT_IMPLEMENTED } +DROP TABLE alter_02834; diff --git a/tests/queries/0_stateless/02834_client_yaml_configs.reference b/tests/queries/0_stateless/02834_client_yaml_configs.reference new file mode 100644 index 00000000000..b2eddb19e52 --- /dev/null +++ b/tests/queries/0_stateless/02834_client_yaml_configs.reference @@ -0,0 +1,3 @@ +31337 +31338 +31339 diff --git a/tests/queries/0_stateless/02834_client_yaml_configs.sh b/tests/queries/0_stateless/02834_client_yaml_configs.sh new file mode 100755 index 00000000000..66d3df8829e --- /dev/null +++ b/tests/queries/0_stateless/02834_client_yaml_configs.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-random-settings + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +pushd "${CLICKHOUSE_TMP}" > /dev/null || exit + +echo "max_block_size: 31337" > clickhouse-client.yaml +${CLICKHOUSE_CLIENT} --query "SELECT getSetting('max_block_size')" +rm clickhouse-client.yaml + +echo "max_block_size: 31338" > clickhouse-client.yml +${CLICKHOUSE_CLIENT} --query "SELECT getSetting('max_block_size')" +rm clickhouse-client.yml + +echo "31339" > clickhouse-client.xml +${CLICKHOUSE_CLIENT} --query "SELECT getSetting('max_block_size')" +rm clickhouse-client.xml + +popd > /dev/null || exit diff --git a/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.reference b/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.reference new file mode 100644 index 00000000000..50173c150c0 --- /dev/null +++ b/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.reference @@ -0,0 +1,76 @@ +CSV +1 1 +2 0 +0 0 +3 3 +1 1 \N \N +2 \N \N \N +\N \N \N \N +3 3 3 3 +1 1 +2 \N +\N \N +3 3 +1 0 +2 0 +0 0 +3 0 +TSV +1 1 +2 0 +0 0 +3 3 +1 1 \N \N +2 \N \N \N +\N \N \N \N +3 3 3 3 +1 1 +2 \N +\N \N +3 3 +1 0 +2 0 +0 0 +3 0 +JSONCompactEachRow +1 1 +2 0 +0 0 +3 3 +1 1 +2 0 +0 0 +3 3 +1 [1,2,3] +2 [] +0 [] +3 [3] +1 1 \N \N +2 \N \N \N +\N \N \N \N +3 3 3 3 +1 1 +2 \N +\N \N +3 3 +1 0 +2 0 +0 0 +3 0 +CustomSeparated +1 1 +2 0 +0 0 +3 3 +1 1 \N \N +2 \N \N \N +\N \N \N \N +3 3 3 3 +1 1 +2 \N +\N \N +3 3 +1 0 +2 0 +0 0 +3 0 diff --git a/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.sql b/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.sql new file mode 100644 index 00000000000..7c55cf2e9a7 --- /dev/null +++ b/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.sql @@ -0,0 +1,24 @@ +select 'CSV'; +select * from format(CSV, 'x UInt32, y UInt32', '1,1\n2\n\n3,3,3,3') settings input_format_csv_allow_variable_number_of_columns=1; +select * from format(CSV, '1,1\n2\n\n3,3,3,3') settings input_format_csv_allow_variable_number_of_columns=1; +select * from format(CSVWithNames, '"x","y"\n1,1\n2\n\n3,3,3,3') settings input_format_csv_allow_variable_number_of_columns=1; +select * from format(CSVWithNames, 'x UInt32, z UInt32', '"x","y"\n1,1\n2\n\n3,3,3,3') settings input_format_csv_allow_variable_number_of_columns=1; +select 'TSV'; +select * from format(TSV, 'x UInt32, y UInt32', '1\t1\n2\n\n3\t3\t3\t3') settings input_format_tsv_allow_variable_number_of_columns=1; +select * from format(TSV, '1\t1\n2\n\n3\t3\t3\t3') settings input_format_tsv_allow_variable_number_of_columns=1; +select * from format(TSVWithNames, 'x\ty\n1\t1\n2\n\n3\t3\t3\t3') settings input_format_tsv_allow_variable_number_of_columns=1; +select * from format(TSVWithNames, 'x UInt32, z UInt32', 'x\ty\n1\t1\n2\n\n3\t3\t3\t3') settings input_format_tsv_allow_variable_number_of_columns=1; +select 'JSONCompactEachRow'; +select * from format(JSONCompactEachRow, 'x UInt32, y UInt32', '[1,1]\n[2]\n[]\n[3,3,3,3]') settings input_format_json_compact_allow_variable_number_of_columns=1; +select * from format(JSONCompactEachRow, 'x UInt32, y UInt32', '[1,1,[1,2,3]]\n[2]\n[]\n[3,3,3,3,[1,2,3]]') settings input_format_json_compact_allow_variable_number_of_columns=1; +select * from format(JSONCompactEachRow, 'x UInt32, y Array(UInt32)', '[1,[1,2,3],1]\n[2]\n[]\n[3,[3],3,3,[1,2,3]]') settings input_format_json_compact_allow_variable_number_of_columns=1; +select * from format(JSONCompactEachRow, '[1,1]\n[2]\n[]\n[3,3,3,3]') settings input_format_json_compact_allow_variable_number_of_columns=1; +select * from format(JSONCompactEachRowWithNames, '["x","y"]\n[1,1]\n[2]\n[]\n[3,3,3,3]') settings input_format_json_compact_allow_variable_number_of_columns=1; +select * from format(JSONCompactEachRowWithNames, 'x UInt32, z UInt32', '["x","y"]\n[1,1]\n[2]\n[]\n[3,3,3,3]') settings input_format_json_compact_allow_variable_number_of_columns=1; +select 'CustomSeparated'; +set format_custom_escaping_rule='CSV', format_custom_field_delimiter='', format_custom_row_before_delimiter='', format_custom_row_after_delimiter='', format_custom_row_between_delimiter='', format_custom_result_before_delimiter='', format_custom_result_after_delimiter=''; +select * from format(CustomSeparated, 'x UInt32, y UInt32', '1123333') settings input_format_custom_allow_variable_number_of_columns=1; +select * from format(CustomSeparated, '1123333') settings input_format_custom_allow_variable_number_of_columns=1; +select * from format(CustomSeparatedWithNames, '"x""y"1123333') settings input_format_custom_allow_variable_number_of_columns=1; +select * from format(CustomSeparatedWithNames, 'x UInt32, z UInt32', '"x""y"1123333') settings input_format_custom_allow_variable_number_of_columns=1; + diff --git a/tests/queries/0_stateless/02834_nulls_first_sort.reference b/tests/queries/0_stateless/02834_nulls_first_sort.reference new file mode 100644 index 00000000000..c16f69ac3c1 --- /dev/null +++ b/tests/queries/0_stateless/02834_nulls_first_sort.reference @@ -0,0 +1,5 @@ +5 \N 1 +5 \N 2 +5 \N 3 +5 \N 7 +5 1 1 diff --git a/tests/queries/0_stateless/02834_nulls_first_sort.sql b/tests/queries/0_stateless/02834_nulls_first_sort.sql new file mode 100644 index 00000000000..e17a49baf24 --- /dev/null +++ b/tests/queries/0_stateless/02834_nulls_first_sort.sql @@ -0,0 +1,7 @@ +DROP TABLE IF EXISTS nulls_first_sort_test; +CREATE TABLE nulls_first_sort_test (a Nullable(Int32), b Nullable(Int32), c Nullable(Int32)) ENGINE = Memory; + +INSERT INTO nulls_first_sort_test VALUES (5,null,2), (5,null,1), (5,null,7), (5,null,3), (5,7,4), (5,7,6), (5,7,2), (5,7,1), (5,7,3), (5,7,9), (5,1,4), (5,1,6), (5,1,2), (5,1,1), (5,1,3), (5,1,9); + +SELECT * FROM nulls_first_sort_test ORDER BY a NULLS FIRST,b NULLS FIRST,c NULLS FIRST LIMIT 5; +DROP TABLE nulls_first_sort_test; diff --git a/tests/queries/0_stateless/02834_remote_session_log.reference b/tests/queries/0_stateless/02834_remote_session_log.reference new file mode 100644 index 00000000000..e2680982ab0 --- /dev/null +++ b/tests/queries/0_stateless/02834_remote_session_log.reference @@ -0,0 +1,13 @@ +0 +0 +0 +0 +client_port 0 connections: +0 +client_address '::' connections: +0 +login failures: +0 +TCP Login and logout count is equal +HTTP Login and logout count is equal +MySQL Login and logout count is equal diff --git a/tests/queries/0_stateless/02834_remote_session_log.sh b/tests/queries/0_stateless/02834_remote_session_log.sh new file mode 100755 index 00000000000..3bedfb6c9ee --- /dev/null +++ b/tests/queries/0_stateless/02834_remote_session_log.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +readonly PID=$$ +readonly TEST_USER=$"02834_USER_${PID}" +readonly SESSION_LOG_MATCHING_FIELDS="auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface" + +${CLICKHOUSE_CLIENT} -q "CREATE USER IF NOT EXISTS ${TEST_USER} IDENTIFIED WITH plaintext_password BY 'pass'" +${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON INFORMATION_SCHEMA.* TO ${TEST_USER}" +${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.* TO ${TEST_USER}" +${CLICKHOUSE_CLIENT} -q "GRANT CREATE TEMPORARY TABLE, MYSQL, REMOTE ON *.* TO ${TEST_USER}" + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" +${CLICKHOUSE_CLIENT} -q "DELETE FROM system.session_log WHERE user = '${TEST_USER}'" + +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${TEST_USER}&password=pass" \ + -d "SELECT * FROM remote('127.0.0.1:${CLICKHOUSE_PORT_TCP}', 'system', 'one', '${TEST_USER}', 'pass')" + +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${TEST_USER}&password=pass" \ + -d "SELECT * FROM mysql('127.0.0.1:9004', 'system', 'one', '${TEST_USER}', 'pass')" + +${CLICKHOUSE_CLIENT} -q "SELECT * FROM remote('127.0.0.1:${CLICKHOUSE_PORT_TCP}', 'system', 'one', '${TEST_USER}', 'pass')" -u "${TEST_USER}" --password "pass" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM mysql('127.0.0.1:9004', 'system', 'one', '${TEST_USER}', 'pass')" -u "${TEST_USER}" --password "pass" + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" + +echo "client_port 0 connections:" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' and client_port = 0" + +echo "client_address '::' connections:" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' and client_address = toIPv6('::')" + +echo "login failures:" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' and type = 'LoginFailure'" + +# remote(...) function sometimes reuses old cached sessions for query execution. +# This makes LoginSuccess/Logout entries count unstable, but success and logouts must always match. + +for interface in 'TCP' 'HTTP' 'MySQL' +do + LOGIN_COUNT=`${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'LoginSuccess' AND interface = '${interface}'"` + CORRESPONDING_LOGOUT_RECORDS_COUNT=`${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM (SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'LoginSuccess' AND interface = '${interface}' INTERSECT SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'Logout' AND interface = '${interface}')"` + + if [ "$LOGIN_COUNT" == "$CORRESPONDING_LOGOUT_RECORDS_COUNT" ]; then + echo "${interface} Login and logout count is equal" + else + TOTAL_LOGOUT_COUNT=`${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'Logout' AND interface = '${interface}'"` + echo "${interface} Login count ${LOGIN_COUNT} != corresponding logout count ${CORRESPONDING_LOGOUT_RECORDS_COUNT}. TOTAL_LOGOUT_COUNT ${TOTAL_LOGOUT_COUNT}" + fi +done + +${CLICKHOUSE_CLIENT} -q "DROP USER ${TEST_USER}" diff --git a/tests/queries/0_stateless/02834_sparse_columns_sort_with_limit.reference b/tests/queries/0_stateless/02834_sparse_columns_sort_with_limit.reference new file mode 100644 index 00000000000..e2ed8f4daf2 --- /dev/null +++ b/tests/queries/0_stateless/02834_sparse_columns_sort_with_limit.reference @@ -0,0 +1 @@ +65536 diff --git a/tests/queries/0_stateless/02834_sparse_columns_sort_with_limit.sql b/tests/queries/0_stateless/02834_sparse_columns_sort_with_limit.sql new file mode 100644 index 00000000000..32bd9694bd0 --- /dev/null +++ b/tests/queries/0_stateless/02834_sparse_columns_sort_with_limit.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS t_sparse_sort_limit; + +CREATE TABLE t_sparse_sort_limit (date Date, i UInt64, v Int16) +ENGINE = MergeTree ORDER BY (date, i) +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9; + +INSERT INTO t_sparse_sort_limit SELECT '2020-10-10', number % 10, number FROM numbers(100000); +INSERT INTO t_sparse_sort_limit SELECT '2020-10-11', number % 10, number FROM numbers(100000); + +SELECT count() FROM (SELECT toStartOfMonth(date) AS d FROM t_sparse_sort_limit ORDER BY -i LIMIT 65536); + +DROP TABLE IF EXISTS t_sparse_sort_limit; diff --git a/tests/queries/0_stateless/02835_drop_user_during_session.reference b/tests/queries/0_stateless/02835_drop_user_during_session.reference new file mode 100644 index 00000000000..7252faab8c6 --- /dev/null +++ b/tests/queries/0_stateless/02835_drop_user_during_session.reference @@ -0,0 +1,8 @@ +port_0_sessions: +0 +address_0_sessions: +0 +Corresponding LoginSuccess/Logout +9 +LoginFailure +0 diff --git a/tests/queries/0_stateless/02835_drop_user_during_session.sh b/tests/queries/0_stateless/02835_drop_user_during_session.sh new file mode 100755 index 00000000000..347ebd22f96 --- /dev/null +++ b/tests/queries/0_stateless/02835_drop_user_during_session.sh @@ -0,0 +1,114 @@ +#!/usr/bin/env bash +# Tags: no-debug + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +readonly PID=$$ + +readonly TEST_USER="02835_USER_${PID}" +readonly TEST_ROLE="02835_ROLE_${PID}" +readonly TEST_PROFILE="02835_PROFILE_${PID}" +readonly SESSION_LOG_MATCHING_FIELDS="auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface" + +function tcp_session() +{ + local user=$1 + ${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM system.numbers" --user="${user}" +} + +function http_session() +{ + local user=$1 + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=pass" -d "SELECT COUNT(*) FROM system.numbers" +} + +function http_with_session_id_session() +{ + local user=$1 + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=pass" -d "SELECT COUNT(*) FROM system.numbers" +} + +# Busy-waits until user $1, specified amount of queries ($2) will run simultaneously. +function wait_for_queries_start() +{ + local user=$1 + local queries_count=$2 + # 10 seconds waiting + counter=0 retries=100 + while [[ $counter -lt $retries ]]; do + result=$($CLICKHOUSE_CLIENT --query "SELECT COUNT(*) FROM system.processes WHERE user = '${user}'") + if [[ $result == "${queries_count}" ]]; then + break; + fi + sleep 0.1 + ((++counter)) + done +} + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" +${CLICKHOUSE_CLIENT} -q "DELETE FROM system.session_log WHERE user = '${TEST_USER}'" + +# DROP USE CASE +${CLICKHOUSE_CLIENT} -q "CREATE USER IF NOT EXISTS ${TEST_USER}" +${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.numbers TO ${TEST_USER}" + +export -f tcp_session; +export -f http_session; +export -f http_with_session_id_session; + +timeout 10s bash -c "tcp_session ${TEST_USER}" >/dev/null 2>&1 & +timeout 10s bash -c "http_session ${TEST_USER}" >/dev/null 2>&1 & +timeout 10s bash -c "http_with_session_id_session ${TEST_USER}" >/dev/null 2>&1 & + +wait_for_queries_start $TEST_USER 3 +${CLICKHOUSE_CLIENT} -q "DROP USER ${TEST_USER}" +${CLICKHOUSE_CLIENT} -q "KILL QUERY WHERE user = '${TEST_USER}' SYNC" >/dev/null & + +wait + +# DROP ROLE CASE +${CLICKHOUSE_CLIENT} -q "CREATE ROLE IF NOT EXISTS ${TEST_ROLE}" +${CLICKHOUSE_CLIENT} -q "CREATE USER ${TEST_USER} DEFAULT ROLE ${TEST_ROLE}" +${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.numbers TO ${TEST_USER}" + +timeout 10s bash -c "tcp_session ${TEST_USER}" >/dev/null 2>&1 & +timeout 10s bash -c "http_session ${TEST_USER}" >/dev/null 2>&1 & +timeout 10s bash -c "http_with_session_id_session ${TEST_USER}" >/dev/null 2>&1 & + +wait_for_queries_start $TEST_USER 3 +${CLICKHOUSE_CLIENT} -q "DROP ROLE ${TEST_ROLE}" +${CLICKHOUSE_CLIENT} -q "DROP USER ${TEST_USER}" + +${CLICKHOUSE_CLIENT} -q "KILL QUERY WHERE user = '${TEST_USER}' SYNC" >/dev/null & + +wait + +# DROP PROFILE CASE +${CLICKHOUSE_CLIENT} -q "CREATE SETTINGS PROFILE IF NOT EXISTS '${TEST_PROFILE}'" +${CLICKHOUSE_CLIENT} -q "CREATE USER ${TEST_USER} SETTINGS PROFILE '${TEST_PROFILE}'" +${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.numbers TO ${TEST_USER}" + +timeout 10s bash -c "tcp_session ${TEST_USER}" >/dev/null 2>&1 & +timeout 10s bash -c "http_session ${TEST_USER}" >/dev/null 2>&1 & +timeout 10s bash -c "http_with_session_id_session ${TEST_USER}" >/dev/null 2>&1 & + +wait_for_queries_start $TEST_USER 3 +${CLICKHOUSE_CLIENT} -q "DROP SETTINGS PROFILE '${TEST_PROFILE}'" +${CLICKHOUSE_CLIENT} -q "DROP USER ${TEST_USER}" + +${CLICKHOUSE_CLIENT} -q "KILL QUERY WHERE user = '${TEST_USER}' SYNC" >/dev/null & + +wait + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" + +echo "port_0_sessions:" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' AND client_port = 0" +echo "address_0_sessions:" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' AND client_address = toIPv6('::')" +echo "Corresponding LoginSuccess/Logout" +${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM (SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'LoginSuccess' INTERSECT SELECT ${SESSION_LOG_MATCHING_FIELDS}, FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'Logout')" +echo "LoginFailure" +${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'LoginFailure'" diff --git a/tests/queries/0_stateless/02835_fuzz_remove_redundant_sorting.reference b/tests/queries/0_stateless/02835_fuzz_remove_redundant_sorting.reference new file mode 100644 index 00000000000..5fda23e0114 --- /dev/null +++ b/tests/queries/0_stateless/02835_fuzz_remove_redundant_sorting.reference @@ -0,0 +1,3 @@ +\N + +\N diff --git a/tests/queries/0_stateless/02835_fuzz_remove_redundant_sorting.sql b/tests/queries/0_stateless/02835_fuzz_remove_redundant_sorting.sql new file mode 100644 index 00000000000..bdbc5594189 --- /dev/null +++ b/tests/queries/0_stateless/02835_fuzz_remove_redundant_sorting.sql @@ -0,0 +1,5 @@ +DROP TABLE IF EXISTS numbers500k; +CREATE TABLE numbers500k (`number` UInt32) ENGINE = MergeTree() ORDER BY tuple(); +INSERT INTO numbers500k SELECT number FROM system.numbers LIMIT 500000; +SELECT intDiv(number, NULL) AS k FROM (SELECT * FROM remote('127.0.0.{2,3}', currentDatabase(), numbers500k) PREWHERE 31 WHERE 65537 > 0 ORDER BY number DESC NULLS FIRST) GROUP BY GROUPING SETS ((k)) WITH TOTALS ORDER BY k ASC NULLS LAST LIMIT 2147483648; +DROP TABLE IF EXISTS numbers500k; diff --git a/tests/queries/0_stateless/02835_join_step_explain.reference b/tests/queries/0_stateless/02835_join_step_explain.reference new file mode 100644 index 00000000000..0cc2e802682 --- /dev/null +++ b/tests/queries/0_stateless/02835_join_step_explain.reference @@ -0,0 +1,116 @@ +Expression ((Project names + (Projection + DROP unused columns after JOIN))) +Header: id UInt64 + value_1 String + rhs.id UInt64 + rhs.value_1 String +Actions: INPUT : 0 -> id_0 UInt64 : 0 + INPUT : 1 -> value_1_1 String : 1 + INPUT : 2 -> value_1_3 String : 2 + INPUT : 3 -> id_2 UInt64 : 3 + ALIAS id_0 :: 0 -> id UInt64 : 4 + ALIAS value_1_1 :: 1 -> value_1 String : 0 + ALIAS value_1_3 :: 2 -> rhs.value_1 String : 1 + ALIAS id_2 :: 3 -> rhs.id UInt64 : 2 +Positions: 4 0 2 1 + Join (JOIN FillRightFirst) + Header: id_0 UInt64 + value_1_1 String + value_1_3 String + id_2 UInt64 + Type: INNER + Strictness: ALL + Algorithm: HashJoin + Clauses: [(id_0) = (id_2)] + Expression ((JOIN actions + Change column names to column identifiers)) + Header: id_0 UInt64 + value_1_1 String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value_1 String : 1 + ALIAS id :: 0 -> id_0 UInt64 : 2 + ALIAS value_1 :: 1 -> value_1_1 String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value_1 String + ReadType: Default + Parts: 1 + Granules: 1 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: id_2 UInt64 + value_1_3 String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value_1 String : 1 + ALIAS id :: 0 -> id_2 UInt64 : 2 + ALIAS value_1 :: 1 -> value_1_3 String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value_1 String + ReadType: Default + Parts: 1 + Granules: 1 +-- +Expression ((Project names + (Projection + DROP unused columns after JOIN))) +Header: id UInt64 + value_1 String + rhs.id UInt64 + rhs.value_1 String +Actions: INPUT : 0 -> id_0 UInt64 : 0 + INPUT : 1 -> value_1_1 String : 1 + INPUT :: 2 -> value_2_4 UInt64 : 2 + INPUT : 3 -> value_1_3 String : 3 + INPUT :: 4 -> value_2_5 UInt64 : 4 + INPUT : 5 -> id_2 UInt64 : 5 + ALIAS id_0 :: 0 -> id UInt64 : 6 + ALIAS value_1_1 :: 1 -> value_1 String : 0 + ALIAS value_1_3 :: 3 -> rhs.value_1 String : 1 + ALIAS id_2 :: 5 -> rhs.id UInt64 : 3 +Positions: 6 0 3 1 + Join (JOIN FillRightFirst) + Header: id_0 UInt64 + value_1_1 String + value_2_4 UInt64 + value_1_3 String + value_2_5 UInt64 + id_2 UInt64 + Type: INNER + Strictness: ASOF + Algorithm: HashJoin + ASOF inequality: LESS + Clauses: [(id_0, value_2_4) = (id_2, value_2_5)] + Expression ((JOIN actions + Change column names to column identifiers)) + Header: id_0 UInt64 + value_1_1 String + value_2_4 UInt64 + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value_1 String : 1 + INPUT : 2 -> value_2 UInt64 : 2 + ALIAS id :: 0 -> id_0 UInt64 : 3 + ALIAS value_1 :: 1 -> value_1_1 String : 0 + ALIAS value_2 :: 2 -> value_2_4 UInt64 : 1 + Positions: 3 0 1 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value_1 String + value_2 UInt64 + ReadType: Default + Parts: 1 + Granules: 1 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: id_2 UInt64 + value_1_3 String + value_2_5 UInt64 + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value_1 String : 1 + INPUT : 2 -> value_2 UInt64 : 2 + ALIAS id :: 0 -> id_2 UInt64 : 3 + ALIAS value_1 :: 1 -> value_1_3 String : 0 + ALIAS value_2 :: 2 -> value_2_5 UInt64 : 1 + Positions: 3 0 1 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value_1 String + value_2 UInt64 + ReadType: Default + Parts: 1 + Granules: 1 diff --git a/tests/queries/0_stateless/02835_join_step_explain.sql b/tests/queries/0_stateless/02835_join_step_explain.sql new file mode 100644 index 00000000000..d0475fa14b6 --- /dev/null +++ b/tests/queries/0_stateless/02835_join_step_explain.sql @@ -0,0 +1,31 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table_1; +CREATE TABLE test_table_1 +( + id UInt64, + value_1 String, + value_2 UInt64 +) ENGINE=MergeTree ORDER BY id; + +DROP TABLE IF EXISTS test_table_2; +CREATE TABLE test_table_2 +( + id UInt64, + value_1 String, + value_2 UInt64 +) ENGINE=MergeTree ORDER BY id; + +INSERT INTO test_table_1 VALUES (0, 'Value', 0); +INSERT INTO test_table_2 VALUES (0, 'Value', 0); + +EXPLAIN header = 1, actions = 1 SELECT lhs.id, lhs.value_1, rhs.id, rhs.value_1 +FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 SELECT lhs.id, lhs.value_1, rhs.id, rhs.value_1 +FROM test_table_1 AS lhs ASOF JOIN test_table_2 AS rhs ON lhs.id = rhs.id AND lhs.value_2 < rhs.value_2; + +DROP TABLE test_table_1; +DROP TABLE test_table_2; diff --git a/tests/queries/0_stateless/02835_nested_array_lowcardinality.reference b/tests/queries/0_stateless/02835_nested_array_lowcardinality.reference new file mode 100644 index 00000000000..c2936da0b4f --- /dev/null +++ b/tests/queries/0_stateless/02835_nested_array_lowcardinality.reference @@ -0,0 +1,30 @@ +[] [] +['0'] [''] +['0','1'] ['',''] +['0','1','2'] ['','',''] +['0','1','2','3'] ['','','',''] +['0','1','2','3','4'] ['','','','',''] +['0','1','2','3','4','5'] ['','','','','',''] +['0','1','2','3','4','5','6'] ['','','','','','',''] +['0','1','2','3','4','5','6','7'] ['','','','','','','',''] +['0','1','2','3','4','5','6','7','8'] ['','','','','','','','',''] +[] [] +[[]] [[]] +[[],['0']] [[],[]] +[[],['0'],['0','1']] [[],[],[]] +[[],['0'],['0','1'],['0','1','2']] [[],[],[],[]] +[[],['0'],['0','1'],['0','1','2'],[]] [[],[],[],[],[]] +[[],['0'],['0','1'],['0','1','2'],[],['0']] [[],[],[],[],[],[]] +[[],['0'],['0','1'],['0','1','2'],[],['0'],['0','1']] [[],[],[],[],[],[],[]] +[[],['0'],['0','1'],['0','1','2'],[],['0'],['0','1'],['0','1','2']] [[],[],[],[],[],[],[],[]] +[[],['0'],['0','1'],['0','1','2'],[],['0'],['0','1'],['0','1','2'],[]] [[],[],[],[],[],[],[],[],[]] +[] [] +[{}] [{}] +[{},{'k0':0}] [{},{}] +[{},{'k0':0},{'k0':0,'k1':1}] [{},{},{}] +[{},{'k0':0},{'k0':0,'k1':1},{'k0':0,'k1':1,'k2':2}] [{},{},{},{}] +[{},{'k0':0},{'k0':0,'k1':1},{'k0':0,'k1':1,'k2':2},{}] [{},{},{},{},{}] +[{},{'k0':0},{'k0':0,'k1':1},{'k0':0,'k1':1,'k2':2},{},{'k0':0}] [{},{},{},{},{},{}] +[{},{'k0':0},{'k0':0,'k1':1},{'k0':0,'k1':1,'k2':2},{},{'k0':0},{'k0':0,'k1':1}] [{},{},{},{},{},{},{}] +[{},{'k0':0},{'k0':0,'k1':1},{'k0':0,'k1':1,'k2':2},{},{'k0':0},{'k0':0,'k1':1},{'k0':0,'k1':1,'k2':2}] [{},{},{},{},{},{},{},{}] +[{},{'k0':0},{'k0':0,'k1':1},{'k0':0,'k1':1,'k2':2},{},{'k0':0},{'k0':0,'k1':1},{'k0':0,'k1':1,'k2':2},{}] [{},{},{},{},{},{},{},{},{}] diff --git a/tests/queries/0_stateless/02835_nested_array_lowcardinality.sql b/tests/queries/0_stateless/02835_nested_array_lowcardinality.sql new file mode 100644 index 00000000000..36c1eb39cfd --- /dev/null +++ b/tests/queries/0_stateless/02835_nested_array_lowcardinality.sql @@ -0,0 +1,49 @@ +DROP TABLE IF EXISTS cool_table; + +CREATE TABLE IF NOT EXISTS cool_table +( + id UInt64, + n Nested(n UInt64, lc1 LowCardinality(String)) +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO cool_table SELECT number, range(number), range(number) FROM numbers(10); + +ALTER TABLE cool_table ADD COLUMN IF NOT EXISTS `n.lc2` Array(LowCardinality(String)); + +SELECT n.lc1, n.lc2 FROM cool_table ORDER BY id; + +DROP TABLE IF EXISTS cool_table; + +CREATE TABLE IF NOT EXISTS cool_table +( + id UInt64, + n Nested(n UInt64, lc1 Array(LowCardinality(String))) +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO cool_table SELECT number, range(number), arrayMap(x -> range(x % 4), range(number)) FROM numbers(10); + +ALTER TABLE cool_table ADD COLUMN IF NOT EXISTS `n.lc2` Array(Array(LowCardinality(String))); + +SELECT n.lc1, n.lc2 FROM cool_table ORDER BY id; + +DROP TABLE IF EXISTS cool_table; + +CREATE TABLE IF NOT EXISTS cool_table +( + id UInt64, + n Nested(n UInt64, lc1 Map(LowCardinality(String), UInt64)) +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO cool_table SELECT number, range(number), arrayMap(x -> (arrayMap(y -> 'k' || toString(y), range(x % 4)), range(x % 4))::Map(LowCardinality(String), UInt64), range(number)) FROM numbers(10); + +ALTER TABLE cool_table ADD COLUMN IF NOT EXISTS `n.lc2` Array(Map(LowCardinality(String), UInt64)); + +SELECT n.lc1, n.lc2 FROM cool_table ORDER BY id; + +DROP TABLE IF EXISTS cool_table; diff --git a/tests/queries/0_stateless/02835_parallel_replicas_over_distributed.reference b/tests/queries/0_stateless/02835_parallel_replicas_over_distributed.reference new file mode 100644 index 00000000000..e6a24987c0d --- /dev/null +++ b/tests/queries/0_stateless/02835_parallel_replicas_over_distributed.reference @@ -0,0 +1,6 @@ +-- 1 shard, 3 replicas +100 0 99 49.5 +200 0 99 49.5 +-- 2 shards, 3 replicas each +200 0 99 49.5 +400 0 99 49.5 diff --git a/tests/queries/0_stateless/02835_parallel_replicas_over_distributed.sql b/tests/queries/0_stateless/02835_parallel_replicas_over_distributed.sql new file mode 100644 index 00000000000..c2fc1d8355d --- /dev/null +++ b/tests/queries/0_stateless/02835_parallel_replicas_over_distributed.sql @@ -0,0 +1,47 @@ +-- 1 shard + +SELECT '-- 1 shard, 3 replicas'; +DROP TABLE IF EXISTS test_d; +DROP TABLE IF EXISTS test; +CREATE TABLE test (id UInt64, date Date) +ENGINE = MergeTree +ORDER BY id; + +CREATE TABLE IF NOT EXISTS test_d as test +ENGINE = Distributed(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), test); + +insert into test select *, today() from numbers(100); + +SELECT count(), min(id), max(id), avg(id) +FROM test_d +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1, use_hedged_requests=0; + +insert into test select *, today() from numbers(100); + +SELECT count(), min(id), max(id), avg(id) +FROM test_d +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1, use_hedged_requests=0; + +-- 2 shards + +SELECT '-- 2 shards, 3 replicas each'; +DROP TABLE IF EXISTS test2_d; +DROP TABLE IF EXISTS test2; +CREATE TABLE test2 (id UInt64, date Date) +ENGINE = MergeTree +ORDER BY id; + +CREATE TABLE IF NOT EXISTS test2_d as test2 +ENGINE = Distributed(test_cluster_two_shard_three_replicas_localhost, currentDatabase(), test2, id); + +insert into test2 select *, today() from numbers(100); + +SELECT count(), min(id), max(id), avg(id) +FROM test2_d +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1, use_hedged_requests=0; + +insert into test2 select *, today() from numbers(100); + +SELECT count(), min(id), max(id), avg(id) +FROM test2_d +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1, use_hedged_requests=0; diff --git a/tests/queries/0_stateless/02836_file_diagnostics_while_reading_header.reference b/tests/queries/0_stateless/02836_file_diagnostics_while_reading_header.reference new file mode 100644 index 00000000000..6829f972684 --- /dev/null +++ b/tests/queries/0_stateless/02836_file_diagnostics_while_reading_header.reference @@ -0,0 +1,2 @@ +in file/uri +test.csv diff --git a/tests/queries/0_stateless/02836_file_diagnostics_while_reading_header.sh b/tests/queries/0_stateless/02836_file_diagnostics_while_reading_header.sh new file mode 100755 index 00000000000..d1b5ffa2af8 --- /dev/null +++ b/tests/queries/0_stateless/02836_file_diagnostics_while_reading_header.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +FILENAME="${CLICKHOUSE_TMP}/test.csv" + +printf 'Bad\rHeader\n123\n' > "${FILENAME}" +${CLICKHOUSE_LOCAL} --query "SELECT * FROM file('${CLICKHOUSE_TMP}/t*e*s*t.csv')" 2>&1 | grep -o -P 'in file/uri|test\.csv' +rm "${FILENAME}" diff --git a/tests/queries/0_stateless/02841_join_filter_set_sparse.reference b/tests/queries/0_stateless/02841_join_filter_set_sparse.reference new file mode 100644 index 00000000000..1777e2e42f7 --- /dev/null +++ b/tests/queries/0_stateless/02841_join_filter_set_sparse.reference @@ -0,0 +1,2 @@ +3428033 +3428033 diff --git a/tests/queries/0_stateless/02841_join_filter_set_sparse.sql b/tests/queries/0_stateless/02841_join_filter_set_sparse.sql new file mode 100644 index 00000000000..e1a33998d4f --- /dev/null +++ b/tests/queries/0_stateless/02841_join_filter_set_sparse.sql @@ -0,0 +1,22 @@ + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 (s String) ENGINE = MergeTree ORDER BY s +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.5; + +INSERT INTO t1 SELECT if (number % 13 = 0, toString(number), '') FROM numbers(2000); + +CREATE TABLE t2 (s String) ENGINE = MergeTree ORDER BY s +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.5; + +INSERT INTO t2 SELECT if (number % 14 = 0, toString(number), '') FROM numbers(2000); + +SELECT countIf(ignore(*) == 0) FROM t1 JOIN t2 ON t1.s = t2.s; + +SET join_algorithm = 'full_sorting_merge', max_rows_in_set_to_optimize_join = 100_000; + +SELECT countIf(ignore(*) == 0) FROM t1 JOIN t2 ON t1.s = t2.s; + +DROP TABLE t1; +DROP TABLE t2; diff --git a/tests/queries/0_stateless/02841_local_assert.reference b/tests/queries/0_stateless/02841_local_assert.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02841_local_assert.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02841_local_assert.sh b/tests/queries/0_stateless/02841_local_assert.sh new file mode 100755 index 00000000000..a167c09da1f --- /dev/null +++ b/tests/queries/0_stateless/02841_local_assert.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +set -e + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo "create table test (x UInt64) engine=Memory; +insert into test from infile 'data'; -- {clientError BAD_ARGUMENTS}" | $CLICKHOUSE_LOCAL -nm + +echo "create table test (x UInt64) engine=Memory; +insert into test from infile 'data';" | $CLICKHOUSE_LOCAL -nm --ignore-error + +echo "create table test (x UInt64) engine=Memory; +insert into test from infile 'data'; -- {clientError BAD_ARGUMENTS} +select 1" | $CLICKHOUSE_LOCAL -nm + diff --git a/tests/queries/0_stateless/02841_not_ready_set_bug.reference b/tests/queries/0_stateless/02841_not_ready_set_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02841_not_ready_set_bug.sh b/tests/queries/0_stateless/02841_not_ready_set_bug.sh new file mode 100755 index 00000000000..fd7f62d28bf --- /dev/null +++ b/tests/queries/0_stateless/02841_not_ready_set_bug.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists t1;" +$CLICKHOUSE_CLIENT -q "create table t1 (number UInt64) engine = MergeTree order by tuple();" +$CLICKHOUSE_CLIENT -q "insert into t1 select number from numbers(10);" +$CLICKHOUSE_CLIENT --max_threads=2 --max_result_rows=1 --result_overflow_mode=break -q "with tab as (select min(number) from t1 prewhere number in (select number from view(select number, row_number() OVER (partition by number % 2 ORDER BY number DESC) from numbers_mt(1e4)) where number != 2 order by number)) select number from t1 union all select * from tab;" > /dev/null + diff --git a/tests/queries/0_stateless/02841_parallel_final_wrong_columns_order.reference b/tests/queries/0_stateless/02841_parallel_final_wrong_columns_order.reference new file mode 100644 index 00000000000..749fce669df --- /dev/null +++ b/tests/queries/0_stateless/02841_parallel_final_wrong_columns_order.reference @@ -0,0 +1 @@ +1000000 diff --git a/tests/queries/0_stateless/02841_parallel_final_wrong_columns_order.sql b/tests/queries/0_stateless/02841_parallel_final_wrong_columns_order.sql new file mode 100644 index 00000000000..db15abb28cb --- /dev/null +++ b/tests/queries/0_stateless/02841_parallel_final_wrong_columns_order.sql @@ -0,0 +1,9 @@ +-- Tags: no-random-merge-tree-settings +-- Because we insert one million rows, it shouldn't choose too low index granularity. + +drop table if exists tab2; +create table tab2 (id String, version Int64, l String, accountCode String, z Int32) engine = ReplacingMergeTree(z) PRIMARY KEY (accountCode, id) ORDER BY (accountCode, id, version, l); +insert into tab2 select toString(number), number, toString(number), toString(number), 0 from numbers(1e6); +set max_threads=2; +select count() from tab2 final; +DROP TABLE tab2; diff --git a/tests/queries/0_stateless/02841_parquet_filter_pushdown.reference b/tests/queries/0_stateless/02841_parquet_filter_pushdown.reference new file mode 100644 index 00000000000..4adf418bcc7 --- /dev/null +++ b/tests/queries/0_stateless/02841_parquet_filter_pushdown.reference @@ -0,0 +1,73 @@ +number Nullable(UInt64) +u8 Nullable(UInt8) +u16 Nullable(UInt16) +u32 Nullable(UInt32) +u64 Nullable(UInt64) +i8 Nullable(Int8) +i16 Nullable(Int16) +i32 Nullable(Int32) +i64 Nullable(Int64) +date32 Nullable(Date32) +dt64_ms Nullable(DateTime64(3, \'UTC\')) +dt64_us Nullable(DateTime64(6, \'UTC\')) +dt64_ns Nullable(DateTime64(9, \'UTC\')) +dt64_s Nullable(DateTime64(3, \'UTC\')) +dt64_cs Nullable(DateTime64(3, \'UTC\')) +f32 Nullable(Float32) +f64 Nullable(Float64) +s Nullable(String) +fs Nullable(FixedString(9)) +d32 Nullable(Decimal(9, 3)) +d64 Nullable(Decimal(18, 10)) +d128 Nullable(Decimal(38, 20)) +d256 Nullable(Decimal(76, 40)) +800 3959600 +1000 4999500 +1800 6479100 +500 2474750 +300 1604850 +500 2474750 +300 1604850 +500 2474750 +2100 5563950 +300 1184850 +400 1599800 +300 1184850 +500 2524750 +500 2524750 +300 1514850 +300 1514850 +300 1594850 +300 1594850 +200 999900 +200 999900 +200 999900 +200 999900 +0 \N +400 1709800 +0 \N +10000 49995000 +0 \N +200 909900 +10000 49995000 +0 \N +2 +500 244750 +500 244750 +300 104850 +300 104850 +200 179900 +200 179900 +200 179900 +200 179900 +200 19900 +200 19900 +600 259700 +600 259700 +500 244750 +500 244750 +0 \N +500 244750 +500 244750 +500 244750 +500 244750 diff --git a/tests/queries/0_stateless/02841_parquet_filter_pushdown.sql b/tests/queries/0_stateless/02841_parquet_filter_pushdown.sql new file mode 100644 index 00000000000..8521ada04d5 --- /dev/null +++ b/tests/queries/0_stateless/02841_parquet_filter_pushdown.sql @@ -0,0 +1,137 @@ +-- Tags: no-fasttest, no-parallel + +set output_format_parquet_row_group_size = 100; + +set input_format_null_as_default = 1; +set engine_file_truncate_on_insert = 1; +set optimize_or_like_chain = 0; +set max_block_size = 100000; +set max_insert_threads = 1; + +-- Analyzer breaks the queries with IN and some queries with BETWEEN. +-- TODO: Figure out why. +set allow_experimental_analyzer=0; + +-- Try all the types. +insert into function file('02841.parquet') + -- Use negative numbers to test sign extension for signed types and lack of sign extension for + -- unsigned types. + with 5000 - number as n select + + number, + + intDiv(n, 11)::UInt8 as u8, + n::UInt16 u16, + n::UInt32 as u32, + n::UInt64 as u64, + intDiv(n, 11)::Int8 as i8, + n::Int16 i16, + n::Int32 as i32, + n::Int64 as i64, + + toDate32(n*500000) as date32, + toDateTime64(n*1e6, 3) as dt64_ms, + toDateTime64(n*1e6, 6) as dt64_us, + toDateTime64(n*1e6, 9) as dt64_ns, + toDateTime64(n*1e6, 0) as dt64_s, + toDateTime64(n*1e6, 2) as dt64_cs, + + (n/1000)::Float32 as f32, + (n/1000)::Float64 as f64, + + n::String as s, + n::String::FixedString(9) as fs, + + n::Decimal32(3)/1234 as d32, + n::Decimal64(10)/12345678 as d64, + n::Decimal128(20)/123456789012345 as d128, + n::Decimal256(40)/123456789012345/678901234567890 as d256 + + from numbers(10000); + +desc file('02841.parquet'); + +-- To generate reference results, use a temporary table and GROUP BYs to simulate row group filtering: +-- create temporary table t as with [as above] select intDiv(number, 100) as group, [as above]; +-- then e.g. for a query that filters by `x BETWEEN a AND b`: +-- select sum(c), sum(h) from (select count() as c, sum(number) as h, min(x) as mn, max(x) as mx from t group by group) where a <= mx and b >= mn; + +-- Go over all types individually. +select count(), sum(number) from file('02841.parquet') where indexHint(u8 in (10, 15, 250)); +select count(), sum(number) from file('02841.parquet') where indexHint(i8 between -3 and 2); +select count(), sum(number) from file('02841.parquet') where indexHint(u16 between 4000 and 61000 or u16 == 42); +select count(), sum(number) from file('02841.parquet') where indexHint(i16 between -150 and 250); +select count(), sum(number) from file('02841.parquet') where indexHint(u32 in (42, 4294966296)); +select count(), sum(number) from file('02841.parquet') where indexHint(i32 between -150 and 250); +select count(), sum(number) from file('02841.parquet') where indexHint(u64 in (42, 18446744073709550616)); +select count(), sum(number) from file('02841.parquet') where indexHint(i64 between -150 and 250); +select count(), sum(number) from file('02841.parquet') where indexHint(date32 between '1992-01-01' and '2023-08-02'); +select count(), sum(number) from file('02841.parquet') where indexHint(dt64_ms between '2000-01-01' and '2005-01-01'); +select count(), sum(number) from file('02841.parquet') where indexHint(dt64_us between toDateTime64(900000000, 2) and '2005-01-01'); +select count(), sum(number) from file('02841.parquet') where indexHint(dt64_ns between '2000-01-01' and '2005-01-01'); +select count(), sum(number) from file('02841.parquet') where indexHint(dt64_s between toDateTime64('-2.01e8'::Decimal64(0), 0) and toDateTime64(1.5e8::Decimal64(0), 0)); +select count(), sum(number) from file('02841.parquet') where indexHint(dt64_cs between toDateTime64('-2.01e8'::Decimal64(1), 1) and toDateTime64(1.5e8::Decimal64(2), 2)); +select count(), sum(number) from file('02841.parquet') where indexHint(f32 between -0.11::Float32 and 0.06::Float32); +select count(), sum(number) from file('02841.parquet') where indexHint(f64 between -0.11 and 0.06); +select count(), sum(number) from file('02841.parquet') where indexHint(s between '-9' and '1!!!'); +select count(), sum(number) from file('02841.parquet') where indexHint(fs between '-9' and '1!!!'); +select count(), sum(number) from file('02841.parquet') where indexHint(d32 between '-0.011'::Decimal32(3) and 0.006::Decimal32(3)); +select count(), sum(number) from file('02841.parquet') where indexHint(d64 between '-0.0000011'::Decimal64(7) and 0.0000006::Decimal64(9)); +select count(), sum(number) from file('02841.parquet') where indexHint(d128 between '-0.00000000000011'::Decimal128(20) and 0.00000000000006::Decimal128(20)); +select count(), sum(number) from file('02841.parquet') where indexHint(d256 between '-0.00000000000000000000000000011'::Decimal256(40) and 0.00000000000000000000000000006::Decimal256(35)); + +-- Some random other cases. +select count(), sum(number) from file('02841.parquet') where indexHint(0); +select count(), sum(number) from file('02841.parquet') where indexHint(s like '99%' or u64 == 2000); +select count(), sum(number) from file('02841.parquet') where indexHint(s like 'z%'); +select count(), sum(number) from file('02841.parquet') where indexHint(u8 == 10 or 1 == 1); +select count(), sum(number) from file('02841.parquet') where indexHint(u8 < 0); +select count(), sum(number) from file('02841.parquet') where indexHint(u64 + 1000000 == 1001000); +select count(), sum(number) from file('02841.parquet') where indexHint(u64 + 1000000 == 1001000) settings input_format_parquet_filter_push_down = 0; +select count(), sum(number) from file('02841.parquet') where indexHint(u32 + 1000000 == 999000); + +-- Very long string, which makes the Parquet encoder omit the corresponding min/max stat. +insert into function file('02841.parquet') + select arrayStringConcat(range(number*1000000)) as s from numbers(2); +select count() from file('02841.parquet') where indexHint(s > ''); + +-- Nullable and LowCardinality. +insert into function file('02841.parquet') select + number, + if(number%234 == 0, NULL, number) as sometimes_null, + toNullable(number) as never_null, + if(number%345 == 0, number::String, NULL) as mostly_null, + toLowCardinality(if(number%234 == 0, NULL, number)) as sometimes_null_lc, + toLowCardinality(toNullable(number)) as never_null_lc, + toLowCardinality(if(number%345 == 0, number::String, NULL)) as mostly_null_lc + from numbers(1000); + +select count(), sum(number) from file('02841.parquet') where indexHint(sometimes_null is NULL); +select count(), sum(number) from file('02841.parquet') where indexHint(sometimes_null_lc is NULL); +select count(), sum(number) from file('02841.parquet') where indexHint(mostly_null is not NULL); +select count(), sum(number) from file('02841.parquet') where indexHint(mostly_null_lc is not NULL); +select count(), sum(number) from file('02841.parquet') where indexHint(sometimes_null > 850); +select count(), sum(number) from file('02841.parquet') where indexHint(sometimes_null_lc > 850); +select count(), sum(number) from file('02841.parquet') where indexHint(never_null > 850); +select count(), sum(number) from file('02841.parquet') where indexHint(never_null_lc > 850); +select count(), sum(number) from file('02841.parquet') where indexHint(never_null < 150); +select count(), sum(number) from file('02841.parquet') where indexHint(never_null_lc < 150); +-- Quirk with infinities: this reads too much because KeyCondition represents NULLs as infinities. +select count(), sum(number) from file('02841.parquet') where indexHint(sometimes_null < 150); +select count(), sum(number) from file('02841.parquet') where indexHint(sometimes_null_lc < 150); + +-- Settings that affect the table schema or contents. +insert into function file('02841.parquet') select + number, + if(number%234 == 0, NULL, number + 100) as positive_or_null, + if(number%234 == 0, NULL, -number - 100) as negative_or_null, + if(number%234 == 0, NULL, 'I am a string') as string_or_null + from numbers(1000); + +select count(), sum(number) from file('02841.parquet') where indexHint(positive_or_null < 50); -- quirk with infinities +select count(), sum(number) from file('02841.parquet', Parquet, 'number UInt64, positive_or_null UInt64') where indexHint(positive_or_null < 50); +select count(), sum(number) from file('02841.parquet') where indexHint(negative_or_null > -50); +select count(), sum(number) from file('02841.parquet', Parquet, 'number UInt64, negative_or_null Int64') where indexHint(negative_or_null > -50); +select count(), sum(number) from file('02841.parquet') where indexHint(string_or_null == ''); -- quirk with infinities +select count(), sum(number) from file('02841.parquet', Parquet, 'number UInt64, string_or_null String') where indexHint(string_or_null == ''); +select count(), sum(number) from file('02841.parquet', Parquet, 'number UInt64, nEgAtIvE_oR_nUlL Int64') where indexHint(nEgAtIvE_oR_nUlL > -50) settings input_format_parquet_case_insensitive_column_matching = 1; diff --git a/tests/queries/0_stateless/02841_remote_parameter_parsing_error.reference b/tests/queries/0_stateless/02841_remote_parameter_parsing_error.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02841_remote_parameter_parsing_error.sql b/tests/queries/0_stateless/02841_remote_parameter_parsing_error.sql new file mode 100644 index 00000000000..9e467a1f69a --- /dev/null +++ b/tests/queries/0_stateless/02841_remote_parameter_parsing_error.sql @@ -0,0 +1,14 @@ +-- Tags: shard + + +select * from remote('127.0.0.1', sys); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select * from remote('127.0.0.1', system); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select * from remote('127.0.0.1', system.o); -- { serverError UNKNOWN_TABLE } +select * from remote('127.0.0.1', system.one, default); -- { serverError UNKNOWN_IDENTIFIER } +select * from remote('127.0.0.1', system.one, default, ''); -- { serverError BAD_ARGUMENTS } +select * from remote('127.0.0.1', system.one, default, key1); -- { serverError BAD_ARGUMENTS } +select * from remote('127.0.0.1', system.one, 'default', '', key1); -- { serverError UNKNOWN_IDENTIFIER } +select * from remote('127.0.0.1', system.one, default, '', key1); -- { serverError BAD_ARGUMENTS } +select * from remote('127.0.0.1', system.one, 'default', pwd, key1); -- { serverError BAD_ARGUMENTS } +select * from remote('127.0.0.1', system.one, 'default', '', key1, key2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select * from remote('127.0.0.1', system, one, 'default', '', key1, key2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } diff --git a/tests/queries/0_stateless/02841_tuple_modulo.reference b/tests/queries/0_stateless/02841_tuple_modulo.reference new file mode 100644 index 00000000000..6e6f07d0683 --- /dev/null +++ b/tests/queries/0_stateless/02841_tuple_modulo.reference @@ -0,0 +1,4 @@ +(1,0) +(2,2) +(2,2) +(0,0) diff --git a/tests/queries/0_stateless/02841_tuple_modulo.sql b/tests/queries/0_stateless/02841_tuple_modulo.sql new file mode 100644 index 00000000000..56bacf87967 --- /dev/null +++ b/tests/queries/0_stateless/02841_tuple_modulo.sql @@ -0,0 +1,4 @@ +SELECT (5,4) % 2; +SELECT intDiv((5,4), 2); +SELECT intDivOrZero((5,4), 2); +SELECT intDivOrZero((5,4), 0); diff --git a/tests/queries/0_stateless/02841_with_clause_resolve.reference b/tests/queries/0_stateless/02841_with_clause_resolve.reference new file mode 100644 index 00000000000..e2dfc4d85a9 --- /dev/null +++ b/tests/queries/0_stateless/02841_with_clause_resolve.reference @@ -0,0 +1,15 @@ +2.5 +2.5 +2.5 +2.5 +2.5 +(9399,2858) +(7159,6972) +(7456,3524) +(12685,10191) +(12598,4979) +(9824,2699) +(5655,7793) +(14410,10296) +(16211,7662) +(9349,9053) diff --git a/tests/queries/0_stateless/02841_with_clause_resolve.sql b/tests/queries/0_stateless/02841_with_clause_resolve.sql new file mode 100644 index 00000000000..b416446461b --- /dev/null +++ b/tests/queries/0_stateless/02841_with_clause_resolve.sql @@ -0,0 +1,141 @@ +set allow_experimental_analyzer = 1; + +WITH + -- Input + 44100 AS sample_frequency + , number AS tick + , tick / sample_frequency AS time + + -- Delay + , (time, wave, delay_, decay, count) -> arraySum(n1 -> wave(time - delay_ * n1), range(count)) AS delay + + , delay(time, (time -> 0.5), 0.2, 0.5, 5) AS kick + +SELECT + + kick + +FROM system.numbers +LIMIT 5; + +WITH + -- Input + 44100 AS sample_frequency + , number AS tick + , tick / sample_frequency AS time + + -- Output control + , 1 AS master_volume + , level -> least(1.0, greatest(-1.0, level)) AS clamp + , level -> (clamp(level) * 0x7FFF * master_volume)::Int16 AS output + , x -> (x, x) AS mono + + -- Basic waves + , time -> sin(time * 2 * pi()) AS sine_wave + , time -> time::UInt64 % 2 * 2 - 1 AS square_wave + , time -> (time - floor(time)) * 2 - 1 AS sawtooth_wave + , time -> abs(sawtooth_wave(time)) * 2 - 1 AS triangle_wave + + -- Helpers + , (from, to, wave, time) -> from + ((wave(time) + 1) / 2) * (to - from) AS lfo + , (from, to, steps, time) -> from + floor((time - floor(time)) * steps) / steps * (to - from) AS step_lfo + , (from, to, steps, time) -> exp(step_lfo(log(from), log(to), steps, time)) AS exp_step_lfo + + -- Noise + , time -> cityHash64(time) / 0xFFFFFFFFFFFFFFFF AS uniform_noise + , time -> erf(uniform_noise(time)) AS white_noise + , time -> cityHash64(time) % 2 ? 1 : -1 AS bernoulli_noise + + -- Distortion + , (x, amount) -> clamp(x * amount) AS clipping + , (x, amount) -> clamp(x > 0 ? pow(x, amount) : -pow(-x, amount)) AS power_distortion + , (x, amount) -> round(x * exp2(amount)) / exp2(amount) AS bitcrush + , (time, sample_frequency) -> round(time * sample_frequency) / sample_frequency AS desample + , (time, wave, amount) -> (time - floor(time) < (1 - amount)) ? wave(time * (1 - amount)) : 0 AS thin + , (time, wave, amount) -> wave(floor(time) + pow(time - floor(time), amount)) AS skew + + -- Combining + , (a, b, weight) -> a * (1 - weight) + b * weight AS combine + + -- Envelopes + , (time, offset, attack, hold, release) -> + time < offset ? 0 + : (time < offset + attack ? ((time - offset) / attack) + : (time < offset + attack + hold ? 1 + : (time < offset + attack + hold + release ? (offset + attack + hold + release - time) / release + : 0))) AS envelope + + , (bpm, time, offset, attack, hold, release) -> + envelope( + time * (bpm / 60) - floor(time * (bpm / 60)), + offset, + attack, + hold, + release) AS running_envelope + + -- Sequencers + , (sequence, time) -> sequence[1 + time::UInt64 % length(sequence)] AS sequencer + + -- Delay + , (time, wave, delay, decay, count) -> arraySum(n -> wave(time - delay * n) * pow(decay, n), range(count)) AS delay + + + , delay(time, (time -> power_distortion(sine_wave(time * 80 + sine_wave(time * 2)), lfo(0.5, 1, sine_wave, time / 16)) + * running_envelope(60, time, 0, 0.0, 0.01, 0.1)), + 0.2, 0.5, 5) AS kick + +SELECT + + (output( + kick + + delay(time, (time -> + power_distortion( + sine_wave(time * 50 + 1 * sine_wave(time * 100 + 1/4)) + * running_envelope(60, time, 0, 0.01, 0.01, 0.1), + lfo(1, 0.75, triangle_wave, time / 8))), + 0.2, 0.5, 10) + * lfo(0.5, 1, triangle_wave, time / 7) + + + delay(time, (time -> + power_distortion( + sine_wave(time * sequencer([50, 100, 200, 400], time / 2) + 1 * sine_wave(time * sequencer([50, 100, 200], time / 4) + 1/4)) + * running_envelope(60, time, 0.5, 0.01, 0.01, 0.1), + lfo(1, 0.75, triangle_wave, time / 8))), + 0.2, 0.5, 10) + * lfo(0.5, 1, triangle_wave, 16 + time / 11) + + + delay(time, (time -> + white_noise(time) * running_envelope(60, time, 0.75, 0.01, 0.01, 0.1)), + 0.2, 0.5, 10) + * lfo(0.5, 1, triangle_wave, 24 + time / 13) + + + sine_wave(time * 100 + 1 * sine_wave(time * 10 + 1/4)) + * running_envelope(120, time, 0, 0.01, 0.01, 0.1) + ), + + output( + kick + + delay(time + 0.01, (time -> + power_distortion( + sine_wave(time * 50 + 1 * sine_wave(time * 100 + 1/4)) + * running_envelope(60, time, 0, 0.01, 0.01, 0.1), + lfo(1, 0.75, triangle_wave, time / 8))), + 0.2, 0.5, 10) + * lfo(0.5, 1, triangle_wave, time / 7) + + + delay(time - 0.01, (time -> + power_distortion( + sine_wave(time * sequencer([50, 100, 200, 400], time / 2) + 1 * sine_wave(time * sequencer([50, 100, 200], time / 4) + 1/4)) + * running_envelope(60, time, 0.5, 0.01, 0.01, 0.1), + lfo(1, 0.75, triangle_wave, time / 8))), + 0.2, 0.5, 10) + * lfo(0.5, 1, triangle_wave, 16 + time / 11) + + + delay(time + 0.005, (time -> + white_noise(time) * running_envelope(60, time, 0.75, 0.01, 0.01, 0.1)), + 0.2, 0.5, 10) + * lfo(0.5, 1, triangle_wave, 24 + time / 13) + )) + +FROM system.numbers +LIMIT 10; diff --git a/tests/queries/0_stateless/02842_filesystem_cache_validate_path.reference b/tests/queries/0_stateless/02842_filesystem_cache_validate_path.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02842_filesystem_cache_validate_path.sql b/tests/queries/0_stateless/02842_filesystem_cache_validate_path.sql new file mode 100644 index 00000000000..c8fb776ac5f --- /dev/null +++ b/tests/queries/0_stateless/02842_filesystem_cache_validate_path.sql @@ -0,0 +1,45 @@ +-- Tags: no-fasttest + +DROP TABLE IF EXISTS test; +DROP TABLE IF EXISTS test_1; +DROP TABLE IF EXISTS test_2; + +CREATE TABLE test (a Int32) +ENGINE = MergeTree() +ORDER BY tuple() +SETTINGS disk = disk(type = cache, + max_size = '1Mi', + path = '/kek', + disk = 'local_disk'); -- {serverError BAD_ARGUMENTS} + +CREATE TABLE test (a Int32) +ENGINE = MergeTree() +ORDER BY tuple() +SETTINGS disk = disk(type = cache, + max_size = '1Mi', + path = '/var/lib/clickhouse/filesystem_caches/../kek', + disk = 'local_disk'); -- {serverError BAD_ARGUMENTS} + +CREATE TABLE test (a Int32) +ENGINE = MergeTree() +ORDER BY tuple() +SETTINGS disk = disk(type = cache, + max_size = '1Mi', + path = '../kek', + disk = 'local_disk'); -- {serverError BAD_ARGUMENTS} + +CREATE TABLE test_1 (a Int32) +ENGINE = MergeTree() +ORDER BY tuple() +SETTINGS disk = disk(type = cache, + max_size = '1Mi', + path = '/var/lib/clickhouse/filesystem_caches/kek', + disk = 'local_disk'); + +CREATE TABLE test_2 (a Int32) +ENGINE = MergeTree() +ORDER BY tuple() +SETTINGS disk = disk(type = cache, + max_size = '1Mi', + path = 'kek', + disk = 'local_disk'); diff --git a/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.reference b/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.reference new file mode 100644 index 00000000000..6e04d969e67 --- /dev/null +++ b/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.reference @@ -0,0 +1,20 @@ +SELECT count() +FROM t_02848_mt1 +PREWHERE notEmpty(v) AND (k = 3) +1 +SELECT count() +FROM t_02848_mt2 +PREWHERE (d LIKE \'%es%\') AND (c < 20) AND (b = \'3\') AND (a = 3) +1 +SELECT count() +FROM t_02848_mt2 +PREWHERE (d LIKE \'%es%\') AND (c < 20) AND (c > 0) AND (a = 3) +1 +SELECT count() +FROM t_02848_mt2 +PREWHERE (d LIKE \'%es%\') AND (b = \'3\') AND (c < 20) +1 +SELECT count() +FROM t_02848_mt2 +PREWHERE (d LIKE \'%es%\') AND (b = \'3\') AND (a = 3) +1 diff --git a/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.sql b/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.sql new file mode 100644 index 00000000000..bc9d7e5664e --- /dev/null +++ b/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.sql @@ -0,0 +1,34 @@ +SET optimize_move_to_prewhere = 1; +SET enable_multiple_prewhere_read_steps = 1; + +DROP TABLE IF EXISTS t_02848_mt1; +DROP TABLE IF EXISTS t_02848_mt2; + +CREATE TABLE t_02848_mt1 (k UInt32, v String) ENGINE = MergeTree ORDER BY k SETTINGS min_bytes_for_wide_part=0; + +INSERT INTO t_02848_mt1 SELECT number, toString(number) FROM numbers(100); + +EXPLAIN SYNTAX SELECT count() FROM t_02848_mt1 WHERE k = 3 AND notEmpty(v); +SELECT count() FROM t_02848_mt1 WHERE k = 3 AND notEmpty(v); + +CREATE TABLE t_02848_mt2 (a UInt32, b String, c Int32, d String) ENGINE = MergeTree ORDER BY (a,b,c) SETTINGS min_bytes_for_wide_part=0; + +INSERT INTO t_02848_mt2 SELECT number, toString(number), number, 'aaaabbbbccccddddtestxxxyyy' FROM numbers(100); + +-- the estimated column sizes are: {a: 428, b: 318, c: 428, d: 73} +-- it's not correct but let's fix it in the future. + +EXPLAIN SYNTAX SELECT count() FROM t_02848_mt2 WHERE a = 3 AND b == '3' AND c < 20 AND d like '%es%'; +SELECT count() FROM t_02848_mt2 WHERE a = 3 AND b == '3' AND c < 20 AND d like '%es%'; + +EXPLAIN SYNTAX SELECT count() FROM t_02848_mt2 WHERE a = 3 AND c < 20 AND c > 0 AND d like '%es%'; +SELECT count() FROM t_02848_mt2 WHERE a = 3 AND c < 20 AND c > 0 AND d like '%es%'; + +EXPLAIN SYNTAX SELECT count() FROM t_02848_mt2 WHERE b == '3' AND c < 20 AND d like '%es%'; +SELECT count() FROM t_02848_mt2 WHERE b == '3' AND c < 20 AND d like '%es%'; + +EXPLAIN SYNTAX SELECT count() FROM t_02848_mt2 WHERE a = 3 AND b == '3' AND d like '%es%'; +SELECT count() FROM t_02848_mt2 WHERE a = 3 AND b == '3' AND d like '%es%'; + +DROP TABLE t_02848_mt1; +DROP TABLE t_02848_mt2; diff --git a/tests/queries/0_stateless/02842_mutations_replace_non_deterministic.reference b/tests/queries/0_stateless/02842_mutations_replace_non_deterministic.reference new file mode 100644 index 00000000000..729d6cc94e6 --- /dev/null +++ b/tests/queries/0_stateless/02842_mutations_replace_non_deterministic.reference @@ -0,0 +1,10 @@ +10 4950 +UPDATE v = _CAST(4950, \'Nullable(UInt64)\') WHERE 1 +10 [0,1,2,3,4,5,6,7,8,9] +UPDATE v = _CAST([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], \'Array(UInt64)\') WHERE 1 +10 5 +UPDATE v = _CAST(\'\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\', \'AggregateFunction(uniqExact, UInt64)\') WHERE 1 +10 1 +UPDATE v = _CAST(timestamp, \'DateTime\') WHERE 1 +UPDATE v = (SELECT sum(number) FROM numbers(1000) WHERE number > randConstant()) WHERE 1 +20 2100-10-10 00:00:00 diff --git a/tests/queries/0_stateless/02842_mutations_replace_non_deterministic.sql b/tests/queries/0_stateless/02842_mutations_replace_non_deterministic.sql new file mode 100644 index 00000000000..b0e1b5c54fb --- /dev/null +++ b/tests/queries/0_stateless/02842_mutations_replace_non_deterministic.sql @@ -0,0 +1,134 @@ +DROP TABLE IF EXISTS t_mutations_nondeterministic SYNC; + +SET mutations_sync = 2; +SET mutations_execute_subqueries_on_initiator = 1; +SET mutations_execute_nondeterministic_on_initiator = 1; + +-- SELECT sum(...) + +CREATE TABLE t_mutations_nondeterministic (id UInt64, v UInt64) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/02842_mutations_replace', '1') +ORDER BY id; + +INSERT INTO t_mutations_nondeterministic VALUES (10, 20); + +ALTER TABLE t_mutations_nondeterministic UPDATE v = (SELECT sum(number) FROM numbers(100)) WHERE 1; + +SELECT id, v FROM t_mutations_nondeterministic ORDER BY id; + +SELECT command FROM system.mutations +WHERE database = currentDatabase() AND table = 't_mutations_nondeterministic' AND is_done +ORDER BY command; + +DROP TABLE t_mutations_nondeterministic SYNC; + +-- SELECT groupArray(...) + +CREATE TABLE t_mutations_nondeterministic (id UInt64, v Array(UInt64)) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/02842_mutations_replace', '1') +ORDER BY id; + +INSERT INTO t_mutations_nondeterministic VALUES (10, [20]); + +ALTER TABLE t_mutations_nondeterministic UPDATE v = (SELECT groupArray(number) FROM numbers(10)) WHERE 1; + +SELECT id, v FROM t_mutations_nondeterministic ORDER BY id; + +-- Too big result. +ALTER TABLE t_mutations_nondeterministic UPDATE v = (SELECT groupArray(number) FROM numbers(10000)) WHERE 1; -- { serverError BAD_ARGUMENTS } + +SELECT command FROM system.mutations +WHERE database = currentDatabase() AND table = 't_mutations_nondeterministic' AND is_done +ORDER BY command; + +DROP TABLE t_mutations_nondeterministic SYNC; + +-- SELECT uniqExactState(...) + +CREATE TABLE t_mutations_nondeterministic (id UInt64, v AggregateFunction(uniqExact, UInt64)) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/02842_mutations_replace', '1') +ORDER BY id; + +INSERT INTO t_mutations_nondeterministic VALUES (10, initializeAggregation('uniqExactState', 1::UInt64)); + +ALTER TABLE t_mutations_nondeterministic UPDATE v = (SELECT uniqExactState(number) FROM numbers(5)) WHERE 1; + +SELECT id, finalizeAggregation(v) FROM t_mutations_nondeterministic ORDER BY id; + +SELECT command FROM system.mutations +WHERE database = currentDatabase() AND table = 't_mutations_nondeterministic' AND is_done +ORDER BY command; + +DROP TABLE t_mutations_nondeterministic SYNC; + +-- now() + +CREATE TABLE t_mutations_nondeterministic (id UInt64, v DateTime) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/02842_mutations_replace', '1') +ORDER BY id; + +INSERT INTO t_mutations_nondeterministic VALUES (10, '2020-10-10'); + +ALTER TABLE t_mutations_nondeterministic UPDATE v = now() WHERE 1; + +SELECT id, v BETWEEN now() - INTERVAL 10 MINUTE AND now() FROM t_mutations_nondeterministic; + +SELECT + replaceRegexpOne(command, '(\\d{10})', 'timestamp'), +FROM system.mutations +WHERE database = currentDatabase() AND table = 't_mutations_nondeterministic' AND is_done +ORDER BY command; + +DROP TABLE t_mutations_nondeterministic SYNC; + +-- filesystem(...) + +CREATE TABLE t_mutations_nondeterministic (id UInt64, v UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/02842_mutations_replace', '1') ORDER BY id; + +INSERT INTO t_mutations_nondeterministic VALUES (10, 10); + +ALTER TABLE t_mutations_nondeterministic UPDATE v = filesystemCapacity(materialize('default')) WHERE 1; -- { serverError BAD_ARGUMENTS } + +DROP TABLE t_mutations_nondeterministic SYNC; + +-- UPDATE SELECT randConstant() + +CREATE TABLE t_mutations_nondeterministic (id UInt64, v UInt64) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/02842_mutations_replace', '1') +ORDER BY id; + +INSERT INTO t_mutations_nondeterministic VALUES (10, 10); + +-- Check that function in subquery is not rewritten. +ALTER TABLE t_mutations_nondeterministic +UPDATE v = +( + SELECT sum(number) FROM numbers(1000) WHERE number > randConstant() +) WHERE 1 +SETTINGS mutations_execute_subqueries_on_initiator = 0, allow_nondeterministic_mutations = 1; + +SELECT command FROM system.mutations +WHERE database = currentDatabase() AND table = 't_mutations_nondeterministic' AND is_done +ORDER BY command; + +DROP TABLE t_mutations_nondeterministic SYNC; + +-- DELETE WHERE now() + +CREATE TABLE t_mutations_nondeterministic (id UInt64, d DateTime) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/02842_mutations_replace', '1') +ORDER BY id; + +INSERT INTO t_mutations_nondeterministic VALUES (10, '2000-10-10'), (20, '2100-10-10'); + +ALTER TABLE t_mutations_nondeterministic DELETE WHERE d < now(); + +SELECT + replaceRegexpOne(command, '(\\d{10})', 'timestamp'), +FROM system.mutations +WHERE database = currentDatabase() AND table = 't_mutations_nondeterministic' AND NOT is_done +ORDER BY command; + +SELECT id, d FROM t_mutations_nondeterministic ORDER BY id; + +DROP TABLE t_mutations_nondeterministic SYNC; diff --git a/tests/queries/0_stateless/02842_one_input_format.reference b/tests/queries/0_stateless/02842_one_input_format.reference new file mode 100644 index 00000000000..714df685535 --- /dev/null +++ b/tests/queries/0_stateless/02842_one_input_format.reference @@ -0,0 +1,12 @@ +dummy UInt8 +0 +0 +0 +data.csv +data.jsonl +data.native +0 +0 +0 +1 +1 diff --git a/tests/queries/0_stateless/02842_one_input_format.sh b/tests/queries/0_stateless/02842_one_input_format.sh new file mode 100755 index 00000000000..f2199cbe2ce --- /dev/null +++ b/tests/queries/0_stateless/02842_one_input_format.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +FILE_DIR=$CLICKHOUSE_TEST_UNIQUE_NAME +mkdir -p $FILE_DIR + +$CLICKHOUSE_LOCAL -q "select * from numbers(100000) format Native" > $FILE_DIR/data.native +$CLICKHOUSE_LOCAL -q "select * from numbers(100000) format CSV" > $FILE_DIR/data.csv +$CLICKHOUSE_LOCAL -q "select * from numbers(100000) format JSONEachRow" > $FILE_DIR/data.jsonl + +$CLICKHOUSE_LOCAL -q "desc file('$FILE_DIR/*', One)" +$CLICKHOUSE_LOCAL -q "select * from file('$FILE_DIR/*', One)" +$CLICKHOUSE_LOCAL -q "select _file from file('$FILE_DIR/*', One) order by _file" +$CLICKHOUSE_LOCAL -q "select * from file('$FILE_DIR/*', One, 'x UInt8')" +$CLICKHOUSE_LOCAL -q "select * from file('$FILE_DIR/*', One, 'x UInt64')" 2>&1 | grep "BAD_ARGUMENTS" -c +$CLICKHOUSE_LOCAL -q "select * from file('$FILE_DIR/*', One, 'x UInt8, y UInt8')" 2>&1 | grep "BAD_ARGUMENTS" -c + +rm -rf $FILE_DIR + diff --git a/tests/queries/0_stateless/02842_suggest_http_page_in_error_message.reference b/tests/queries/0_stateless/02842_suggest_http_page_in_error_message.reference new file mode 100644 index 00000000000..0025187be30 --- /dev/null +++ b/tests/queries/0_stateless/02842_suggest_http_page_in_error_message.reference @@ -0,0 +1,4 @@ +There is no handle /sashboards. Maybe you meant /dashboard +There is no handle /sashboard. Maybe you meant /dashboard +There is no handle /sashboarb. Maybe you meant /dashboard +There is no handle /sashboaxb. Maybe you meant /dashboard diff --git a/tests/queries/0_stateless/02842_suggest_http_page_in_error_message.sh b/tests/queries/0_stateless/02842_suggest_http_page_in_error_message.sh new file mode 100755 index 00000000000..cf69c742777 --- /dev/null +++ b/tests/queries/0_stateless/02842_suggest_http_page_in_error_message.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +export CLICKHOUSE_URL="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/" + +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}sashboards" | grep -o ".* Maybe you meant /dashboard" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}sashboard" | grep -o ".* Maybe you meant /dashboard" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}sashboarb" | grep -o ".* Maybe you meant /dashboard" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}sashboaxb" | grep -o ".* Maybe you meant /dashboard" diff --git a/tests/queries/0_stateless/02842_truncate_database.reference b/tests/queries/0_stateless/02842_truncate_database.reference new file mode 100644 index 00000000000..71f52bcd1da --- /dev/null +++ b/tests/queries/0_stateless/02842_truncate_database.reference @@ -0,0 +1,22 @@ +0 +0 +0 +0 +0 +dest_dictionary test_truncate_database 0 +1 First +=== TABLES IN test_truncate_database === +dest_dictionary +dest_view_log +dest_view_memory +dest_view_merge_tree +dest_view_stripe_log +dest_view_tiny_log +source_table_dictionary +source_table_log +source_table_memory +source_table_merge_tree +source_table_stripe_log +source_table_tiny_log +=== DICTIONARIES IN test_truncate_database === +dest_dictionary diff --git a/tests/queries/0_stateless/02842_truncate_database.sql b/tests/queries/0_stateless/02842_truncate_database.sql new file mode 100644 index 00000000000..a767acba14c --- /dev/null +++ b/tests/queries/0_stateless/02842_truncate_database.sql @@ -0,0 +1,76 @@ +-- Tags: no-parallel + +DROP DATABASE IF EXISTS test_truncate_database; + +-- test TRUNCATE DATABASE operation. +-- create tables, views and dictionary and populate them. Then try truncating the database. +-- all tables, views and dictionaries should be removed leaving an empty database +CREATE DATABASE test_truncate_database; +USE test_truncate_database; + +-- create tables with several different types of table engines +CREATE TABLE source_table_memory (x UInt16) ENGINE = Memory; +CREATE TABLE source_table_log (x UInt16) ENGINE = Log; +CREATE TABLE source_table_tiny_log (x UInt16) ENGINE = TinyLog; +CREATE TABLE source_table_stripe_log (x UInt16) ENGINE = StripeLog; +CREATE TABLE source_table_merge_tree (x UInt16) ENGINE = MergeTree ORDER BY x PARTITION BY x; +-- create dictionary source table +CREATE TABLE source_table_dictionary +( + id UInt64, + value String +) ENGINE = Memory(); + +-- insert data into the tables +INSERT INTO source_table_memory SELECT * FROM system.numbers LIMIT 10; +INSERT INTO source_table_log SELECT * FROM system.numbers LIMIT 10; +INSERT INTO source_table_tiny_log SELECT * FROM system.numbers LIMIT 10; +INSERT INTO source_table_stripe_log SELECT * FROM system.numbers LIMIT 10; +INSERT INTO source_table_merge_tree SELECT * FROM system.numbers LIMIT 10; +INSERT INTO source_table_dictionary VALUES (1, 'First'); + + +-- create view based on the tables +CREATE VIEW dest_view_memory (x UInt64) AS SELECT * FROM source_table_memory; +CREATE VIEW dest_view_log (x UInt64) AS SELECT * FROM source_table_log; +CREATE VIEW dest_view_tiny_log (x UInt64) AS SELECT * FROM source_table_tiny_log; +CREATE VIEW dest_view_stripe_log (x UInt64) AS SELECT * FROM source_table_stripe_log; +CREATE VIEW dest_view_merge_tree (x UInt64) AS SELECT * FROM source_table_merge_tree; +-- create dictionary based on source table +CREATE DICTIONARY dest_dictionary +( + id UInt64, + value String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() DB 'test_truncate_database' TABLE 'source_table_dictionary')) +LAYOUT(FLAT()) +LIFETIME(MIN 0 MAX 1000); + + +SELECT * FROM dest_view_memory ORDER BY x LIMIT 1; +SELECT * FROM dest_view_log ORDER BY x LIMIT 1; +SELECT * FROM dest_view_tiny_log ORDER BY x LIMIT 1; +SELECT * FROM dest_view_stripe_log ORDER BY x LIMIT 1; +SELECT * FROM dest_view_merge_tree ORDER BY x LIMIT 1; +SELECT name, database, element_count FROM system.dictionaries WHERE database = 'test_truncate_database' AND name = 'dest_dictionary'; +SELECT * FROM dest_dictionary; +SELECT '=== TABLES IN test_truncate_database ==='; +SHOW TABLES FROM test_truncate_database; +SELECT '=== DICTIONARIES IN test_truncate_database ==='; +SHOW DICTIONARIES FROM test_truncate_database; + +TRUNCATE DATABASE test_truncate_database; + +SELECT * FROM dest_view_set ORDER BY x LIMIT 1; -- {serverError 60} +SELECT * FROM dest_view_memory ORDER BY x LIMIT 1; -- {serverError 60} +SELECT * FROM dest_view_log ORDER BY x LIMIT 1; -- {serverError 60} +SELECT * FROM dest_view_tiny_log ORDER BY x LIMIT 1; -- {serverError 60} +SELECT * FROM dest_view_stripe_log ORDER BY x LIMIT 1; -- {serverError 60} +SELECT * FROM dest_view_merge_tree ORDER BY x LIMIT 1; -- {serverError 60} +SELECT name, database, element_count FROM system.dictionaries WHERE database = 'test_truncate_database' AND name = 'dest_dictionary'; +SELECT * FROM dest_dictionary; -- {serverError 60} +SHOW TABLES FROM test_truncate_database; +SHOW DICTIONARIES FROM test_truncate_database; + +DROP DATABASE test_truncate_database; diff --git a/tests/queries/0_stateless/02843_context_has_expired.reference b/tests/queries/0_stateless/02843_context_has_expired.reference new file mode 100644 index 00000000000..229972f2924 --- /dev/null +++ b/tests/queries/0_stateless/02843_context_has_expired.reference @@ -0,0 +1,5 @@ +0 +0 +0 +0 +0 diff --git a/tests/queries/0_stateless/02843_context_has_expired.sql b/tests/queries/0_stateless/02843_context_has_expired.sql new file mode 100644 index 00000000000..8355ce2c18c --- /dev/null +++ b/tests/queries/0_stateless/02843_context_has_expired.sql @@ -0,0 +1,36 @@ +DROP DICTIONARY IF EXISTS 02843_dict; +DROP TABLE IF EXISTS 02843_source; +DROP TABLE IF EXISTS 02843_join; + +CREATE TABLE 02843_source +( + id UInt64, + value String +) +ENGINE=Memory; + +CREATE DICTIONARY 02843_dict +( + id UInt64, + value String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE '02843_source')) +LAYOUT(DIRECT()); + +SELECT 1 IN (SELECT dictGet('02843_dict', 'value', materialize('1'))); + +CREATE TABLE 02843_join (id UInt8, value String) ENGINE Join(ANY, LEFT, id); +SELECT 1 IN (SELECT joinGet(02843_join, 'value', materialize(1))); +SELECT 1 IN (SELECT joinGetOrNull(02843_join, 'value', materialize(1))); + +SELECT 1 IN (SELECT materialize(connectionId())); +SELECT 1000000 IN (SELECT materialize(getSetting('max_threads'))); +SELECT 1 in (SELECT file(materialize('a'))); -- { serverError 107 } + +EXPLAIN ESTIMATE SELECT 1 IN (SELECT dictGet('02843_dict', 'value', materialize('1'))); +EXPLAIN ESTIMATE SELECT 1 IN (SELECT joinGet(`02843_join`, 'value', materialize(1))); + +DROP DICTIONARY 02843_dict; +DROP TABLE 02843_source; +DROP TABLE 02843_join; diff --git a/tests/queries/0_stateless/02843_date_predicate_optimizations_bugs.reference b/tests/queries/0_stateless/02843_date_predicate_optimizations_bugs.reference new file mode 100644 index 00000000000..d641328e9a5 --- /dev/null +++ b/tests/queries/0_stateless/02843_date_predicate_optimizations_bugs.reference @@ -0,0 +1,2 @@ +202308 1 +202308 2 diff --git a/tests/queries/0_stateless/02843_date_predicate_optimizations_bugs.sql b/tests/queries/0_stateless/02843_date_predicate_optimizations_bugs.sql new file mode 100644 index 00000000000..6e26a5166cb --- /dev/null +++ b/tests/queries/0_stateless/02843_date_predicate_optimizations_bugs.sql @@ -0,0 +1,9 @@ +select + toYYYYMM(date) as date_, + n +from (select + [toDate('20230815'), toDate('20230816')] as date, + [1, 2] as n +) as data +array join date, n +where date_ >= 202303; diff --git a/tests/queries/0_stateless/02844_distributed_virtual_columns.reference b/tests/queries/0_stateless/02844_distributed_virtual_columns.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02844_distributed_virtual_columns.sql b/tests/queries/0_stateless/02844_distributed_virtual_columns.sql new file mode 100644 index 00000000000..31a6780f19e --- /dev/null +++ b/tests/queries/0_stateless/02844_distributed_virtual_columns.sql @@ -0,0 +1,5 @@ +drop table if exists data_01072; +drop table if exists dist_01072; +create table data_01072 (key Int) Engine=MergeTree() ORDER BY key; +create table dist_01072 (key Int) Engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01072, key); +select * from dist_01072 where key=0 and _part='0'; diff --git a/tests/queries/0_stateless/02844_subquery_timeout_with_break.reference b/tests/queries/0_stateless/02844_subquery_timeout_with_break.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02844_subquery_timeout_with_break.sql b/tests/queries/0_stateless/02844_subquery_timeout_with_break.sql new file mode 100644 index 00000000000..511ed0c59de --- /dev/null +++ b/tests/queries/0_stateless/02844_subquery_timeout_with_break.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS t; +CREATE TABLE t (key UInt64, value UInt64, INDEX value_idx value TYPE bloom_filter GRANULARITY 1) ENGINE=MergeTree() ORDER BY key; + +INSERT INTO t SELECT number, rand()%1000 FROM numbers(10000); + +SET timeout_overflow_mode='break'; +SET max_execution_time=0.1; +SELECT * FROM t WHERE value IN (SELECT number FROM numbers(1000000000)); + +DROP TABLE t; diff --git a/tests/queries/0_stateless/02845_join_on_cond_sparse.reference b/tests/queries/0_stateless/02845_join_on_cond_sparse.reference new file mode 100644 index 00000000000..9e4a1331412 --- /dev/null +++ b/tests/queries/0_stateless/02845_join_on_cond_sparse.reference @@ -0,0 +1 @@ +0 1 0 0 diff --git a/tests/queries/0_stateless/02845_join_on_cond_sparse.sql b/tests/queries/0_stateless/02845_join_on_cond_sparse.sql new file mode 100644 index 00000000000..b70419af057 --- /dev/null +++ b/tests/queries/0_stateless/02845_join_on_cond_sparse.sql @@ -0,0 +1,21 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 ( id UInt32, attr UInt32 ) ENGINE = MergeTree ORDER BY id +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.1; + +INSERT INTO t1 VALUES (0, 0); + +CREATE TABLE t2 ( id UInt32, attr UInt32 ) ENGINE = MergeTree ORDER BY id +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.1; + +INSERT INTO t2 VALUES (0, 0); + +SELECT * FROM t1 JOIN t2 ON t1.id = t2.id AND t1.attr != 0; + +INSERT INTO t1 VALUES (0, 1); + +SELECT * FROM t1 JOIN t2 ON t1.id = t2.id AND t1.attr != 0; + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/02845_parquet_odd_decimals.reference b/tests/queries/0_stateless/02845_parquet_odd_decimals.reference new file mode 100644 index 00000000000..29d6383b52c --- /dev/null +++ b/tests/queries/0_stateless/02845_parquet_odd_decimals.reference @@ -0,0 +1 @@ +100 diff --git a/tests/queries/0_stateless/02845_parquet_odd_decimals.sh b/tests/queries/0_stateless/02845_parquet_odd_decimals.sh new file mode 100755 index 00000000000..f1e2ec849c4 --- /dev/null +++ b/tests/queries/0_stateless/02845_parquet_odd_decimals.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# 9-byte decimals produced by spark in integration test test_storage_delta/test.py::test_single_log_file + +${CLICKHOUSE_CLIENT} --query="drop table if exists 02845_parquet_odd_decimals" +${CLICKHOUSE_CLIENT} --query="create table 02845_parquet_odd_decimals (\`col-1de12c05-5dd5-4fa7-9f93-33c43c9a4028\` Decimal(20, 0), \`col-5e1b97f1-dade-4c7d-b71b-e31d789e01a4\` String) engine Memory" +${CLICKHOUSE_CLIENT} --query="insert into 02845_parquet_odd_decimals from infile '$CUR_DIR/data_parquet/nine_byte_decimals_from_spark.parquet'" +${CLICKHOUSE_CLIENT} --query="select count() from 02845_parquet_odd_decimals" diff --git a/tests/queries/0_stateless/02860_distributed_flush_on_detach.reference b/tests/queries/0_stateless/02860_distributed_flush_on_detach.reference new file mode 100644 index 00000000000..8e5815188e7 --- /dev/null +++ b/tests/queries/0_stateless/02860_distributed_flush_on_detach.reference @@ -0,0 +1,27 @@ +-- { echoOn } + +create table data (key Int) engine=Memory(); +create table dist (key Int) engine=Distributed(default, currentDatabase(), data); +system stop distributed sends dist; +-- check that FLUSH DISTRIBUTED does flushing anyway +insert into dist values (1); +select * from data; +system flush distributed dist; +select * from data; +1 +truncate table data; +-- check that flush_on_detach=1 by default +insert into dist values (1); +detach table dist; +select * from data; +1 +attach table dist; +truncate table data; +-- check flush_on_detach=0 +drop table dist; +create table dist (key Int) engine=Distributed(default, currentDatabase(), data) settings flush_on_detach=0; +system stop distributed sends dist; +insert into dist values (1); +detach table dist; +select * from data; +attach table dist; diff --git a/tests/queries/0_stateless/02860_distributed_flush_on_detach.sql b/tests/queries/0_stateless/02860_distributed_flush_on_detach.sql new file mode 100644 index 00000000000..5ba45d72c08 --- /dev/null +++ b/tests/queries/0_stateless/02860_distributed_flush_on_detach.sql @@ -0,0 +1,33 @@ +set prefer_localhost_replica=0; + +drop table if exists data; +drop table if exists dist; + +-- { echoOn } + +create table data (key Int) engine=Memory(); +create table dist (key Int) engine=Distributed(default, currentDatabase(), data); +system stop distributed sends dist; + +-- check that FLUSH DISTRIBUTED does flushing anyway +insert into dist values (1); +select * from data; +system flush distributed dist; +select * from data; +truncate table data; + +-- check that flush_on_detach=1 by default +insert into dist values (1); +detach table dist; +select * from data; +attach table dist; +truncate table data; + +-- check flush_on_detach=0 +drop table dist; +create table dist (key Int) engine=Distributed(default, currentDatabase(), data) settings flush_on_detach=0; +system stop distributed sends dist; +insert into dist values (1); +detach table dist; +select * from data; +attach table dist; diff --git a/tests/queries/0_stateless/02861_index_set_incorrect_args.reference b/tests/queries/0_stateless/02861_index_set_incorrect_args.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02861_index_set_incorrect_args.sql b/tests/queries/0_stateless/02861_index_set_incorrect_args.sql new file mode 100644 index 00000000000..fa51f5c9abc --- /dev/null +++ b/tests/queries/0_stateless/02861_index_set_incorrect_args.sql @@ -0,0 +1,6 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/52019 +DROP TABLE IF EXISTS set_index__fuzz_41; +CREATE TABLE set_index__fuzz_41 (`a` Date, `b` Nullable(DateTime64(3)), INDEX b_set b TYPE set(0) GRANULARITY 1) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO set_index__fuzz_41 (a) VALUES (today()); +SELECT b FROM set_index__fuzz_41 WHERE and(b = 256) SETTINGS force_data_skipping_indices = 'b_set', optimize_move_to_prewhere = 0, max_parallel_replicas=2, parallel_replicas_for_non_replicated_merge_tree=1, allow_experimental_parallel_reading_from_replicas=2, use_hedged_requests=0; -- { serverError TOO_FEW_ARGUMENTS_FOR_FUNCTION } +DROP TABLE set_index__fuzz_41; diff --git a/tests/queries/0_stateless/02861_interpolate_alias_precedence.reference b/tests/queries/0_stateless/02861_interpolate_alias_precedence.reference new file mode 100644 index 00000000000..0e207969f7c --- /dev/null +++ b/tests/queries/0_stateless/02861_interpolate_alias_precedence.reference @@ -0,0 +1,8 @@ +2023-05-15 1 +2023-05-16 1 +2023-05-17 1 +2023-05-18 1 +2023-05-19 1 +2023-05-20 1 +2023-05-21 1 +2023-05-22 15 diff --git a/tests/queries/0_stateless/02861_interpolate_alias_precedence.sql b/tests/queries/0_stateless/02861_interpolate_alias_precedence.sql new file mode 100644 index 00000000000..dc96b9c57c7 --- /dev/null +++ b/tests/queries/0_stateless/02861_interpolate_alias_precedence.sql @@ -0,0 +1,8 @@ +DROP TABLE IF EXISTS 02861_interpolate; + +CREATE TABLE 02861_interpolate (date Date, id String, f Int16) ENGINE=MergeTree() ORDER BY (date); +INSERT INTO 02861_interpolate VALUES ('2023-05-15', '1', 1), ('2023-05-22', '1', 15); + +SELECT date AS d, toNullable(f) AS f FROM 02861_interpolate WHERE id = '1' ORDER BY d ASC WITH FILL STEP toIntervalDay(1) INTERPOLATE (f); + +DROP TABLE 02861_interpolate; diff --git a/tests/queries/0_stateless/02861_uuid_format_serialization.reference b/tests/queries/0_stateless/02861_uuid_format_serialization.reference new file mode 100644 index 00000000000..4c6b4cd21e8 Binary files /dev/null and b/tests/queries/0_stateless/02861_uuid_format_serialization.reference differ diff --git a/tests/queries/0_stateless/02861_uuid_format_serialization.sql b/tests/queries/0_stateless/02861_uuid_format_serialization.sql new file mode 100644 index 00000000000..e73ef2d5197 --- /dev/null +++ b/tests/queries/0_stateless/02861_uuid_format_serialization.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS t_uuid; +CREATE TABLE t_uuid (x UUID) ENGINE=MergeTree ORDER BY x; + +INSERT INTO t_uuid VALUES ('61f0c404-5cb3-11e7-907b-a6006ad3dba0'), ('992f6910-42b2-43cd-98bc-c812fbf9b683'), ('417ddc5d-e556-4d27-95dd-a34d84e46a50'); + +SELECT * FROM t_uuid ORDER BY x LIMIT 1 FORMAT RowBinary; +SELECT * FROM t_uuid ORDER BY x FORMAT RowBinary; + +DROP TABLE IF EXISTS t_uuid; diff --git a/tests/queries/0_stateless/02862_index_inverted_incorrect_args.reference b/tests/queries/0_stateless/02862_index_inverted_incorrect_args.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02862_index_inverted_incorrect_args.sql b/tests/queries/0_stateless/02862_index_inverted_incorrect_args.sql new file mode 100644 index 00000000000..0678023f2f4 --- /dev/null +++ b/tests/queries/0_stateless/02862_index_inverted_incorrect_args.sql @@ -0,0 +1,9 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/52019 +DROP TABLE IF EXISTS tab; +SET allow_experimental_inverted_index=1; +CREATE TABLE tab (`k` UInt64, `s` Map(String, String), INDEX af mapKeys(s) TYPE inverted(2) GRANULARITY 1) ENGINE = MergeTree ORDER BY k SETTINGS index_granularity = 2, index_granularity_bytes = '10Mi'; +INSERT INTO tab (k) VALUES (0); +SELECT * FROM tab PREWHERE (s[NULL]) = 'Click a03' SETTINGS allow_experimental_analyzer=1; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } +SELECT * FROM tab PREWHERE (s[1]) = 'Click a03' SETTINGS allow_experimental_analyzer=1; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT * FROM tab PREWHERE (s['foo']) = 'Click a03' SETTINGS allow_experimental_analyzer=1; +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02862_uuid_reinterpret_as_numeric.reference b/tests/queries/0_stateless/02862_uuid_reinterpret_as_numeric.reference new file mode 100644 index 00000000000..a874ad9ebc5 --- /dev/null +++ b/tests/queries/0_stateless/02862_uuid_reinterpret_as_numeric.reference @@ -0,0 +1,5 @@ +61f0c404-5cb3-11e7-907b-a6006ad3dba0 +403229640000000000 6.034192082918747e163 +-25 4583 1555239399 7057356139103719911 -148231516101255056243829344033567469081 192050850819683407219545263398200742375 +231 4583 1555239399 7057356139103719911 192050850819683407219545263398200742375 192050850819683407219545263398200742375 +00000000-5cb3-11e7-0000-000000000000 diff --git a/tests/queries/0_stateless/02862_uuid_reinterpret_as_numeric.sql b/tests/queries/0_stateless/02862_uuid_reinterpret_as_numeric.sql new file mode 100644 index 00000000000..d6369835f04 --- /dev/null +++ b/tests/queries/0_stateless/02862_uuid_reinterpret_as_numeric.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS t_uuid; +CREATE TABLE t_uuid (x UUID) ENGINE=MergeTree ORDER BY x; + +INSERT INTO t_uuid VALUES ('61f0c404-5cb3-11e7-907b-a6006ad3dba0'); + +SELECT reinterpretAsUUID(x) FROM t_uuid; +SELECT reinterpretAsFloat32(x), reinterpretAsFloat64(x) FROM t_uuid; +SELECT reinterpretAsInt8(x), reinterpretAsInt16(x), reinterpretAsInt32(x), reinterpretAsInt64(x), reinterpretAsInt128(x), reinterpretAsInt256(x) FROM t_uuid; +SELECT reinterpretAsUInt8(x), reinterpretAsUInt16(x), reinterpretAsUInt32(x), reinterpretAsUInt64(x), reinterpretAsUInt128(x), reinterpretAsUInt256(x) FROM t_uuid; + +SELECT reinterpretAsUUID(reinterpretAsUInt128(reinterpretAsUInt32(reinterpretAsUInt256(x)))) FROM t_uuid; + +DROP TABLE IF EXISTS t_uuid; diff --git a/tests/queries/0_stateless/02863_delayed_source_with_totals_and_extremes.reference b/tests/queries/0_stateless/02863_delayed_source_with_totals_and_extremes.reference new file mode 100644 index 00000000000..4657a3024bf --- /dev/null +++ b/tests/queries/0_stateless/02863_delayed_source_with_totals_and_extremes.reference @@ -0,0 +1,7 @@ +3 + +3 + +3 +3 +1 diff --git a/tests/queries/0_stateless/02863_delayed_source_with_totals_and_extremes.sql b/tests/queries/0_stateless/02863_delayed_source_with_totals_and_extremes.sql new file mode 100644 index 00000000000..db2c3d92f47 --- /dev/null +++ b/tests/queries/0_stateless/02863_delayed_source_with_totals_and_extremes.sql @@ -0,0 +1,16 @@ +-- Tags: no-parallel +-- Tag no-parallel: failpoint is used which can force DelayedSource on other tests + +DROP TABLE IF EXISTS 02863_delayed_source; + +CREATE TABLE 02863_delayed_source(a Int64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/02863_delayed_source/{replica}', 'r1') ORDER BY a; +INSERT INTO 02863_delayed_source VALUES (1), (2); + +SYSTEM ENABLE FAILPOINT use_delayed_remote_source; + +SELECT sum(a) FROM remote('127.0.0.4', currentDatabase(), '02863_delayed_source') WITH TOTALS SETTINGS extremes = 1; +SELECT max(explain like '%Delayed%') FROM (EXPLAIN PIPELINE graph=1 SELECT sum(a) FROM remote('127.0.0.4', currentDatabase(), '02863_delayed_source') WITH TOTALS SETTINGS extremes = 1); + +SYSTEM DISABLE FAILPOINT use_delayed_remote_source; + +DROP TABLE 02863_delayed_source; \ No newline at end of file diff --git a/tests/queries/0_stateless/02863_mutation_where_in_set_result_cache_pipeline_stuck_bug.reference b/tests/queries/0_stateless/02863_mutation_where_in_set_result_cache_pipeline_stuck_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02863_mutation_where_in_set_result_cache_pipeline_stuck_bug.sql b/tests/queries/0_stateless/02863_mutation_where_in_set_result_cache_pipeline_stuck_bug.sql new file mode 100644 index 00000000000..4c30795758e --- /dev/null +++ b/tests/queries/0_stateless/02863_mutation_where_in_set_result_cache_pipeline_stuck_bug.sql @@ -0,0 +1,10 @@ +drop table if exists tab; +create table tab (x UInt32, y UInt32) engine = MergeTree order by x; + +insert into tab select number, number from numbers(10); +insert into tab select number, number from numbers(20); + +set mutations_sync=2; + +alter table tab delete where x > 1000 and y in (select sum(number + 1) from numbers_mt(1e7) group by number % 2 with totals); +drop table if exists tab; diff --git a/tests/queries/0_stateless/data_parquet/nine_byte_decimals_from_spark.parquet b/tests/queries/0_stateless/data_parquet/nine_byte_decimals_from_spark.parquet new file mode 100644 index 00000000000..43fcd94e606 Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/nine_byte_decimals_from_spark.parquet differ diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib index 1805b56f8dc..e0e11990d83 100755 --- a/tests/queries/0_stateless/replication.lib +++ b/tests/queries/0_stateless/replication.lib @@ -37,7 +37,7 @@ function try_sync_replicas() pids[${i}]=$! i=$((i + 1)) done - for pid in ${pids[*]}; do + for pid in "${pids[@]}"; do wait $pid || (echo "Failed to sync some replicas" && exit 1) done echo "Replication did not hang: synced all replicas of $table_name_prefix" @@ -114,3 +114,5 @@ function check_replication_consistency() fi } + +# vi: ft=bash diff --git a/tests/queries/1_stateful/00061_storage_buffer.sql b/tests/queries/1_stateful/00061_storage_buffer.sql index e1f67abda20..e3cda3de36d 100644 --- a/tests/queries/1_stateful/00061_storage_buffer.sql +++ b/tests/queries/1_stateful/00061_storage_buffer.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS test.hits_dst; DROP TABLE IF EXISTS test.hits_buffer; CREATE TABLE test.hits_dst AS test.hits; -CREATE TABLE test.hits_buffer AS test.hits_dst ENGINE = Buffer(test, hits_dst, 8, 1, 10, 10000, 100000, 10000000, 100000000); +CREATE TABLE test.hits_buffer AS test.hits_dst ENGINE = Buffer(test, hits_dst, 8, 600, 600, 1000000, 1000000, 100000000, 1000000000); INSERT INTO test.hits_buffer SELECT * FROM test.hits WHERE CounterID = 800784; SELECT count() FROM test.hits_buffer; diff --git a/tests/queries/1_stateful/00072_compare_date_and_string_index.sql b/tests/queries/1_stateful/00072_compare_date_and_string_index.sql index d652b1bc559..424e6c2dfee 100644 --- a/tests/queries/1_stateful/00072_compare_date_and_string_index.sql +++ b/tests/queries/1_stateful/00072_compare_date_and_string_index.sql @@ -15,8 +15,8 @@ SELECT count() FROM test.hits WHERE EventDate IN (toDate('2014-03-18'), toDate(' SELECT count() FROM test.hits WHERE EventDate = concat('2014-0', '3-18'); DROP TABLE IF EXISTS test.hits_indexed_by_time; -CREATE TABLE test.hits_indexed_by_time (EventDate Date, EventTime DateTime('Asia/Dubai')) ENGINE = MergeTree ORDER BY (EventDate, EventTime); -INSERT INTO test.hits_indexed_by_time SELECT EventDate, EventTime FROM test.hits; +CREATE TABLE test.hits_indexed_by_time (EventDate Date, EventTime DateTime('Asia/Dubai')) ENGINE = MergeTree ORDER BY (EventDate, EventTime) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +INSERT INTO test.hits_indexed_by_time SELECT EventDate, EventTime FROM test.hits SETTINGS max_block_size = 65000; SELECT count() FROM test.hits_indexed_by_time WHERE EventTime = '2014-03-18 01:02:03'; SELECT count() FROM test.hits_indexed_by_time WHERE EventTime < '2014-03-18 01:02:03'; diff --git a/tests/queries/1_stateful/00162_mmap_compression_none.sql b/tests/queries/1_stateful/00162_mmap_compression_none.sql index 2178644214a..d2cbcea8aaa 100644 --- a/tests/queries/1_stateful/00162_mmap_compression_none.sql +++ b/tests/queries/1_stateful/00162_mmap_compression_none.sql @@ -1,5 +1,5 @@ DROP TABLE IF EXISTS hits_none; -CREATE TABLE hits_none (Title String CODEC(NONE)) ENGINE = MergeTree ORDER BY tuple(); +CREATE TABLE hits_none (Title String CODEC(NONE)) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO hits_none SELECT Title FROM test.hits; SET min_bytes_to_use_mmap_io = 1; diff --git a/tests/queries/1_stateful/00174_distinct_in_order.sql b/tests/queries/1_stateful/00174_distinct_in_order.sql index aac54d46181..301ff36dd42 100644 --- a/tests/queries/1_stateful/00174_distinct_in_order.sql +++ b/tests/queries/1_stateful/00174_distinct_in_order.sql @@ -4,9 +4,9 @@ drop table if exists distinct_in_order sync; drop table if exists ordinary_distinct sync; select '-- DISTINCT columns are the same as in ORDER BY'; -create table distinct_in_order (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate); +create table distinct_in_order (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into distinct_in_order select distinct CounterID, EventDate from test.hits order by CounterID, EventDate settings optimize_distinct_in_order=1; -create table ordinary_distinct (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate); +create table ordinary_distinct (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into ordinary_distinct select distinct CounterID, EventDate from test.hits order by CounterID, EventDate settings optimize_distinct_in_order=0; select distinct * from distinct_in_order except select * from ordinary_distinct; @@ -14,9 +14,9 @@ drop table if exists distinct_in_order sync; drop table if exists ordinary_distinct sync; select '-- DISTINCT columns has prefix in ORDER BY columns'; -create table distinct_in_order (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate); +create table distinct_in_order (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into distinct_in_order select distinct CounterID, EventDate from test.hits order by CounterID settings optimize_distinct_in_order=1; -create table ordinary_distinct (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate); +create table ordinary_distinct (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into ordinary_distinct select distinct CounterID, EventDate from test.hits order by CounterID settings optimize_distinct_in_order=0; select distinct * from distinct_in_order except select * from ordinary_distinct; diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh index ef70c82aefc..12bc0002191 100644 --- a/tests/queries/shell_config.sh +++ b/tests/queries/shell_config.sh @@ -155,3 +155,23 @@ function random_str() local n=$1 && shift tr -cd '[:lower:]' < /dev/urandom | head -c"$n" } + +function query_with_retry +{ + local query="$1" && shift + + local retry=0 + until [ $retry -ge 5 ] + do + local result + result="$($CLICKHOUSE_CLIENT "$@" --query="$query" 2>&1)" + if [ "$?" == 0 ]; then + echo -n "$result" + return + else + retry=$((retry + 1)) + sleep 3 + fi + done + echo "Query '$query' failed with '$result'" +} diff --git a/tests/sqllogic/connection.py b/tests/sqllogic/connection.py index 0033c29c41c..a49e8f5c62f 100644 --- a/tests/sqllogic/connection.py +++ b/tests/sqllogic/connection.py @@ -62,7 +62,8 @@ def default_clickhouse_odbc_conn_str(): return str( OdbcConnectingArgs.create_from_kw( dsn="ClickHouse DSN (ANSI)", - Url="http://localhost:8123/query?default_format=ODBCDriver2&default_table_engine=MergeTree&union_default_mode=DISTINCT&group_by_use_nulls=1&join_use_nulls=1&allow_create_index_without_type=1", + Timeout="300", + Url="http://localhost:8123/query?default_format=ODBCDriver2&default_table_engine=MergeTree&union_default_mode=DISTINCT&group_by_use_nulls=1&join_use_nulls=1&allow_create_index_without_type=1&create_index_ignore_unique=1", ) ) diff --git a/tests/sqllogic/runner.py b/tests/sqllogic/runner.py index 1cf4c19c649..5f4baf8e59b 100755 --- a/tests/sqllogic/runner.py +++ b/tests/sqllogic/runner.py @@ -186,10 +186,10 @@ def mode_check_statements(parser): out_stages_dir = os.path.join(out_dir, f"{args.mode}-stages") - complete_sqlite_dir = os.path.join(out_stages_dir, "complete-sqlite") + complete_sqlite_dir = os.path.join(out_stages_dir, "statements-sqlite") os.makedirs(complete_sqlite_dir, exist_ok=True) - reports["complete-sqlite"] = run_all_tests_in_parallel( + reports["statements-sqlite"] = run_all_tests_in_parallel( setup_kwargs=as_kwargs( engine=Engines.SQLITE, ), @@ -224,6 +224,64 @@ def mode_check_statements(parser): parser.set_defaults(func=calle) +def mode_check_complete(parser): + parser.add_argument("--input-dir", metavar="DIR", required=True) + parser.add_argument("--out-dir", metavar="DIR", required=True) + + def calle(args): + input_dir = os.path.realpath(args.input_dir) + out_dir = os.path.realpath(args.out_dir) + + if not os.path.exists(input_dir): + raise FileNotFoundError( + input_dir, f"check statements: no such file or directory {input_dir}" + ) + + if not os.path.isdir(input_dir): + raise NotADirectoryError( + input_dir, f"check statements:: not a dir {input_dir}" + ) + + reports = dict() + + out_stages_dir = os.path.join(out_dir, f"{args.mode}-stages") + + complete_sqlite_dir = os.path.join(out_stages_dir, "complete-sqlite") + os.makedirs(complete_sqlite_dir, exist_ok=True) + + reports["complete-sqlite"] = run_all_tests_in_parallel( + setup_kwargs=as_kwargs( + engine=Engines.SQLITE, + ), + runner_kwargs=as_kwargs( + verify_mode=False, + stop_at_statement_error=True, + ), + input_dir=input_dir, + output_dir=complete_sqlite_dir, + ) + + verify_clickhouse_dir = os.path.join(out_stages_dir, "complete-clickhouse") + os.makedirs(verify_clickhouse_dir, exist_ok=True) + + reports["complete-clickhouse"] = run_all_tests_in_parallel( + setup_kwargs=as_kwargs( + engine=Engines.ODBC, + conn_str=default_clickhouse_odbc_conn_str(), + ), + runner_kwargs=as_kwargs( + verify_mode=True, + stop_at_statement_error=True, + ), + input_dir=complete_sqlite_dir, + output_dir=verify_clickhouse_dir, + ) + + statements_report(reports, out_dir, args.mode) + + parser.set_defaults(func=calle) + + def make_actual_report(reports): return {stage: report.get_map() for stage, report in reports.items()} @@ -399,16 +457,22 @@ def parse_args(): ) subparsers = parser.add_subparsers(dest="mode") + mode_check_complete( + subparsers.add_parser( + "complete-test", + help="Run all tests. Check that all statements and queries are passed", + ) + ) mode_check_statements( subparsers.add_parser( "statements-test", - help="Run all test. Check that all statements are passed", + help="Run all tests. Check that all statements are passed", ) ) mode_self_test( subparsers.add_parser( "self-test", - help="Run all test. Check that all statements are passed", + help="Run all tests. Check that all statements are passed", ) ) args = parser.parse_args() diff --git a/tests/sqllogic/self-test/canonic_report.json b/tests/sqllogic/self-test/canonic_report.json index 0cd1aa4b43b..09adc0e1c1d 100644 --- a/tests/sqllogic/self-test/canonic_report.json +++ b/tests/sqllogic/self-test/canonic_report.json @@ -1 +1 @@ -{"sqlite-complete": {"dbms_name": "sqlite", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 14, "fail": 4}, "total": {"success": 18, "fail": 4}}, "input_dir": "/clickhouse-tests/sqllogic/self-test", "output_dir": "/test_output/self-test/self-test-stages/sqlite-complete", "tests": {"test.test": {"test_name": "test.test", "test_file": "/clickhouse-tests/sqllogic/self-test/test.test", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 14, "fail": 4}, "total": {"success": 18, "fail": 4}}, "requests": {"5": {"status": "success", "position": 5, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER)", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "25": {"status": "error", "position": 25, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "query execution failed with an exception, exception: no such column: c"}, "38": {"status": "success", "position": 38, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "44": {"status": "error", "position": 44, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: no such column: c"}, "49": {"status": "success", "position": 49, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "55": {"status": "success", "position": 55, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "64": {"status": "success", "position": 64, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "70": {"status": "success", "position": 70, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "76": {"status": "success", "position": 76, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "82": {"status": "error", "position": 82, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "88": {"status": "error", "position": 88, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "94": {"status": "success", "position": 94, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "99": {"status": "success", "position": 99, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "105": {"status": "success", "position": 105, "request_type": "query", "request": "SELECT NULL as a", "reason": "success"}, "116": {"status": "success", "position": 116, "request_type": "query", "request": "SELECT NULL", "reason": "success"}, "121": {"status": "success", "position": 121, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "126": {"status": "success", "position": 126, "request_type": "query", "request": "WITH RECURSIVE cnt(x) AS ( SELECT 1 UNION ALL SELECT x+1 FROM cnt LIMIT 20 ) SELECT x FROM cnt;", "reason": "success"}}}}}, "sqlite-vs-sqlite": {"dbms_name": "sqlite", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 15, "fail": 3}, "total": {"success": 19, "fail": 3}}, "input_dir": "/test_output/self-test/self-test-stages/sqlite-complete", "output_dir": "/test_output/self-test/self-test-stages/sqlite-vs-sqlite", "tests": {"test.test": {"test_name": "test.test", "test_file": "/test_output/self-test/self-test-stages/sqlite-complete/test.test", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 15, "fail": 3}, "total": {"success": 19, "fail": 3}}, "requests": {"5": {"status": "success", "position": 5, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER)", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "28": {"status": "success", "position": 28, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "success"}, "42": {"status": "success", "position": 42, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "48": {"status": "error", "position": 48, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: no such column: c"}, "54": {"status": "success", "position": 54, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "66": {"status": "success", "position": 66, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "72": {"status": "success", "position": 72, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "78": {"status": "success", "position": 78, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "84": {"status": "success", "position": 84, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "90": {"status": "error", "position": 90, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "96": {"status": "error", "position": 96, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "102": {"status": "success", "position": 102, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "107": {"status": "success", "position": 107, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "113": {"status": "success", "position": 113, "request_type": "query", "request": "SELECT NULL as a", "reason": "success"}, "124": {"status": "success", "position": 124, "request_type": "query", "request": "SELECT NULL", "reason": "success"}, "129": {"status": "success", "position": 129, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "135": {"status": "success", "position": 135, "request_type": "query", "request": "WITH RECURSIVE cnt(x) AS ( SELECT 1 UNION ALL SELECT x+1 FROM cnt LIMIT 20 ) SELECT x FROM cnt;", "reason": "success"}}}}}, "clickhouse-complete": {"dbms_name": "ClickHouse", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 15, "fail": 4}, "total": {"success": 19, "fail": 4}}, "input_dir": "/clickhouse-tests/sqllogic/self-test", "output_dir": "/test_output/self-test/self-test-stages/clickhouse-complete", "tests": {"test.test": {"test_name": "test.test", "test_file": "/clickhouse-tests/sqllogic/self-test/test.test", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 15, "fail": 4}, "total": {"success": 19, "fail": 4}}, "requests": {"1": {"status": "success", "position": 1, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER) ENGINE = MergeTree() PRIMARY KEY tuple()", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "25": {"status": "error", "position": 25, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "query execution failed with an exception, exception: ('HY000', \"[HY000] HTTP status code: 404\nReceived error:\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1 ORDER BY c ASC, a ASC', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.3.1.1654 (official build))\n\n (1) (SQLExecDirectW)\")"}, "32": {"status": "success", "position": 32, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "44": {"status": "error", "position": 44, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: ('HY000', \"[HY000] HTTP status code: 404\nReceived error:\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.3.1.1654 (official build))\n\n (1) (SQLExecDirectW)\")"}, "49": {"status": "success", "position": 49, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "55": {"status": "success", "position": 55, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "64": {"status": "success", "position": 64, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "70": {"status": "success", "position": 70, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "76": {"status": "success", "position": 76, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "82": {"status": "error", "position": 82, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "88": {"status": "error", "position": 88, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "94": {"status": "success", "position": 94, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "99": {"status": "success", "position": 99, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "105": {"status": "success", "position": 105, "request_type": "query", "request": "SELECT NULL as a", "reason": "success"}, "110": {"status": "success", "position": 110, "request_type": "query", "request": "SELECT CAST(NULL AS Nullable(INTEGER))", "reason": "success"}, "116": {"status": "success", "position": 116, "request_type": "query", "request": "SELECT NULL", "reason": "success"}, "121": {"status": "success", "position": 121, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "139": {"status": "success", "position": 139, "request_type": "query", "request": "SELECT number+1 from system.numbers LIMIT 20", "reason": "success"}}}}}, "clickhouse-vs-clickhouse": {"dbms_name": "ClickHouse", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 16, "fail": 3}, "total": {"success": 20, "fail": 3}}, "input_dir": "/test_output/self-test/self-test-stages/clickhouse-complete", "output_dir": "/test_output/self-test/self-test-stages/clickhouse-vs-clickhouse", "tests": {"test.test": {"test_name": "test.test", "test_file": "/test_output/self-test/self-test-stages/clickhouse-complete/test.test", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 16, "fail": 3}, "total": {"success": 20, "fail": 3}}, "requests": {"1": {"status": "success", "position": 1, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER) ENGINE = MergeTree() PRIMARY KEY tuple()", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "28": {"status": "success", "position": 28, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "success"}, "36": {"status": "success", "position": 36, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "48": {"status": "error", "position": 48, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: ('HY000', \"[HY000] HTTP status code: 404\nReceived error:\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.3.1.1654 (official build))\n\n (1) (SQLExecDirectW)\")"}, "54": {"status": "success", "position": 54, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "66": {"status": "success", "position": 66, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "72": {"status": "success", "position": 72, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "78": {"status": "success", "position": 78, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "84": {"status": "success", "position": 84, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "90": {"status": "error", "position": 90, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "96": {"status": "error", "position": 96, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "102": {"status": "success", "position": 102, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "107": {"status": "success", "position": 107, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "113": {"status": "success", "position": 113, "request_type": "query", "request": "SELECT NULL as a", "reason": "success"}, "118": {"status": "success", "position": 118, "request_type": "query", "request": "SELECT CAST(NULL AS Nullable(INTEGER))", "reason": "success"}, "124": {"status": "success", "position": 124, "request_type": "query", "request": "SELECT NULL", "reason": "success"}, "129": {"status": "success", "position": 129, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "148": {"status": "success", "position": 148, "request_type": "query", "request": "SELECT number+1 from system.numbers LIMIT 20", "reason": "success"}}}}}, "sqlite-vs-clickhouse": {"dbms_name": "ClickHouse", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 13, "fail": 6}, "total": {"success": 17, "fail": 6}}, "input_dir": "/test_output/self-test/self-test-stages/sqlite-complete", "output_dir": "/test_output/self-test/self-test-stages/sqlite-vs-clickhouse", "tests": {"test.test": {"test_name": "test.test", "test_file": "/test_output/self-test/self-test-stages/sqlite-complete/test.test", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 13, "fail": 6}, "total": {"success": 17, "fail": 6}}, "requests": {"1": {"status": "success", "position": 1, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER) ENGINE = MergeTree() PRIMARY KEY tuple()", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "28": {"status": "error", "position": 28, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "canonic and actual results have different exceptions, details: canonic: query execution failed with an exception, original is: no such column: c, actual: query execution failed with an exception, original is: ('HY000', \"[HY000] HTTP status code: 404\nReceived error:\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1 ORDER BY c ASC, a ASC', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.3.1.1654 (official build))\n\n (1) (SQLExecDirectW)\")"}, "36": {"status": "success", "position": 36, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "48": {"status": "error", "position": 48, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: ('HY000', \"[HY000] HTTP status code: 404\nReceived error:\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.3.1.1654 (official build))\n\n (1) (SQLExecDirectW)\")"}, "54": {"status": "success", "position": 54, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "66": {"status": "success", "position": 66, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "72": {"status": "success", "position": 72, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "78": {"status": "success", "position": 78, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "84": {"status": "success", "position": 84, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "90": {"status": "error", "position": 90, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "96": {"status": "error", "position": 96, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "102": {"status": "success", "position": 102, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "107": {"status": "success", "position": 107, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "113": {"status": "error", "position": 113, "request_type": "query", "request": "SELECT NULL as a", "reason": "actual result has exception and canonic result doesn't, details: actual: query execution failed with an exception, original is: ('ODBC SQL type 0 is not yet supported. column-index=0 type=0', 'HY106')"}, "118": {"status": "success", "position": 118, "request_type": "query", "request": "SELECT CAST(NULL AS Nullable(INTEGER))", "reason": "success"}, "124": {"status": "error", "position": 124, "request_type": "query", "request": "SELECT NULL", "reason": "actual result has exception and canonic result doesn't, details: actual: query execution failed with an exception, original is: ('ODBC SQL type 0 is not yet supported. column-index=0 type=0', 'HY106')"}, "129": {"status": "success", "position": 129, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "148": {"status": "success", "position": 148, "request_type": "query", "request": "SELECT number+1 from system.numbers LIMIT 20", "reason": "success"}}}}}} +{"sqlite-complete": {"dbms_name": "sqlite", "stats": {"statements": {"success": 6, "fail": 0}, "queries": {"success": 14, "fail": 5}, "total": {"success": 20, "fail": 5}}, "input_dir": "/clickhouse-tests/sqllogic/self-test", "output_dir": "/test_output/self-test/self-test-stages/sqlite-complete", "tests": {"test.test": {"test_name": "test.test", "test_file": "/clickhouse-tests/sqllogic/self-test/test.test", "stats": {"statements": {"success": 6, "fail": 0}, "queries": {"success": 14, "fail": 5}, "total": {"success": 20, "fail": 5}}, "requests": {"5": {"status": "success", "position": 5, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER)", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "25": {"status": "error", "position": 25, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "query execution failed with an exception, exception: no such column: c"}, "38": {"status": "success", "position": 38, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "44": {"status": "error", "position": 44, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: no such column: c"}, "49": {"status": "success", "position": 49, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "55": {"status": "success", "position": 55, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "64": {"status": "success", "position": 64, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "70": {"status": "success", "position": 70, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "76": {"status": "success", "position": 76, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "82": {"status": "error", "position": 82, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "88": {"status": "error", "position": 88, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "94": {"status": "success", "position": 94, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "99": {"status": "success", "position": 99, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "105": {"status": "success", "position": 105, "request_type": "query", "request": "SELECT NULL as a", "reason": "success"}, "116": {"status": "success", "position": 116, "request_type": "query", "request": "SELECT NULL", "reason": "success"}, "121": {"status": "success", "position": 121, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "126": {"status": "success", "position": 126, "request_type": "query", "request": "WITH RECURSIVE cnt(x) AS ( SELECT 1 UNION ALL SELECT x+1 FROM cnt LIMIT 20 ) SELECT x FROM cnt;", "reason": "success"}, "145": {"status": "success", "position": 145, "request_type": "statement", "request": "CREATE TABLE tab0(pk INTEGER PRIMARY KEY, col0 INTEGER, col1 FLOAT, col2 TEXT, col3 INTEGER, col4 FLOAT, col5 TEXT)", "reason": "success"}, "149": {"status": "success", "position": 149, "request_type": "statement", "request": "INSERT INTO tab0 VALUES(0,535,860.48,'uxbns',253,640.58,'jvqkl')", "reason": "success"}, "152": {"status": "error", "position": 152, "request_type": "query", "request": "SELECT + col2 AS col5 FROM tab0 WHERE NOT ( col0 ) * - - col4 IS NULL", "reason": "Got non-integer result 'uxbns' for I type."}}}}}, "sqlite-vs-sqlite": {"dbms_name": "sqlite", "stats": {"statements": {"success": 6, "fail": 0}, "queries": {"success": 15, "fail": 4}, "total": {"success": 21, "fail": 4}}, "input_dir": "/test_output/self-test/self-test-stages/sqlite-complete", "output_dir": "/test_output/self-test/self-test-stages/sqlite-vs-sqlite", "tests": {"test.test": {"test_name": "test.test", "test_file": "/test_output/self-test/self-test-stages/sqlite-complete/test.test", "stats": {"statements": {"success": 6, "fail": 0}, "queries": {"success": 15, "fail": 4}, "total": {"success": 21, "fail": 4}}, "requests": {"5": {"status": "success", "position": 5, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER)", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "28": {"status": "success", "position": 28, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "success"}, "42": {"status": "success", "position": 42, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "48": {"status": "error", "position": 48, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: no such column: c"}, "54": {"status": "success", "position": 54, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "66": {"status": "success", "position": 66, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "72": {"status": "success", "position": 72, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "78": {"status": "success", "position": 78, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "84": {"status": "success", "position": 84, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "90": {"status": "error", "position": 90, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "96": {"status": "error", "position": 96, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "102": {"status": "success", "position": 102, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "107": {"status": "success", "position": 107, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "113": {"status": "success", "position": 113, "request_type": "query", "request": "SELECT NULL as a", "reason": "success"}, "124": {"status": "success", "position": 124, "request_type": "query", "request": "SELECT NULL", "reason": "success"}, "129": {"status": "success", "position": 129, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "135": {"status": "success", "position": 135, "request_type": "query", "request": "WITH RECURSIVE cnt(x) AS ( SELECT 1 UNION ALL SELECT x+1 FROM cnt LIMIT 20 ) SELECT x FROM cnt;", "reason": "success"}, "154": {"status": "success", "position": 154, "request_type": "statement", "request": "CREATE TABLE tab0(pk INTEGER PRIMARY KEY, col0 INTEGER, col1 FLOAT, col2 TEXT, col3 INTEGER, col4 FLOAT, col5 TEXT)", "reason": "success"}, "158": {"status": "success", "position": 158, "request_type": "statement", "request": "INSERT INTO tab0 VALUES(0,535,860.48,'uxbns',253,640.58,'jvqkl')", "reason": "success"}, "161": {"status": "error", "position": 161, "request_type": "query", "request": "SELECT + col2 AS col5 FROM tab0 WHERE NOT ( col0 ) * - - col4 IS NULL", "reason": "Got non-integer result 'uxbns' for I type."}}}}}, "clickhouse-complete": {"dbms_name": "ClickHouse", "stats": {"statements": {"success": 6, "fail": 0}, "queries": {"success": 15, "fail": 4}, "total": {"success": 21, "fail": 4}}, "input_dir": "/clickhouse-tests/sqllogic/self-test", "output_dir": "/test_output/self-test/self-test-stages/clickhouse-complete", "tests": {"test.test": {"test_name": "test.test", "test_file": "/clickhouse-tests/sqllogic/self-test/test.test", "stats": {"statements": {"success": 6, "fail": 0}, "queries": {"success": 15, "fail": 4}, "total": {"success": 21, "fail": 4}}, "requests": {"1": {"status": "success", "position": 1, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER) ENGINE = MergeTree() PRIMARY KEY tuple()", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "25": {"status": "error", "position": 25, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "query execution failed with an exception, exception: ('HY000', \"[HY000] HTTP status code: 404\\nReceived error:\\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1 ORDER BY c ASC, a ASC', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.7.1.1)\\n\\n (1) (SQLExecDirectW)\")"}, "32": {"status": "success", "position": 32, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "44": {"status": "error", "position": 44, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: ('HY000', \"[HY000] HTTP status code: 404\\nReceived error:\\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.7.1.1)\\n\\n (1) (SQLExecDirectW)\")"}, "49": {"status": "success", "position": 49, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "55": {"status": "success", "position": 55, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "64": {"status": "success", "position": 64, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "70": {"status": "success", "position": 70, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "76": {"status": "success", "position": 76, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "82": {"status": "error", "position": 82, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "88": {"status": "error", "position": 88, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "94": {"status": "success", "position": 94, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "99": {"status": "success", "position": 99, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "105": {"status": "success", "position": 105, "request_type": "query", "request": "SELECT NULL as a", "reason": "success"}, "110": {"status": "success", "position": 110, "request_type": "query", "request": "SELECT CAST(NULL AS Nullable(INTEGER))", "reason": "success"}, "116": {"status": "success", "position": 116, "request_type": "query", "request": "SELECT NULL", "reason": "success"}, "121": {"status": "success", "position": 121, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "139": {"status": "success", "position": 139, "request_type": "query", "request": "SELECT number+1 from system.numbers LIMIT 20", "reason": "success"}, "145": {"status": "success", "position": 145, "request_type": "statement", "request": "CREATE TABLE tab0(pk INTEGER PRIMARY KEY, col0 INTEGER, col1 FLOAT, col2 TEXT, col3 INTEGER, col4 FLOAT, col5 TEXT)", "reason": "success"}, "149": {"status": "success", "position": 149, "request_type": "statement", "request": "INSERT INTO tab0 VALUES(0,535,860.48,'uxbns',253,640.58,'jvqkl')", "reason": "success"}}}}}, "clickhouse-vs-clickhouse": {"dbms_name": "ClickHouse", "stats": {"statements": {"success": 6, "fail": 0}, "queries": {"success": 16, "fail": 3}, "total": {"success": 22, "fail": 3}}, "input_dir": "/test_output/self-test/self-test-stages/clickhouse-complete", "output_dir": "/test_output/self-test/self-test-stages/clickhouse-vs-clickhouse", "tests": {"test.test": {"test_name": "test.test", "test_file": "/test_output/self-test/self-test-stages/clickhouse-complete/test.test", "stats": {"statements": {"success": 6, "fail": 0}, "queries": {"success": 16, "fail": 3}, "total": {"success": 22, "fail": 3}}, "requests": {"1": {"status": "success", "position": 1, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER) ENGINE = MergeTree() PRIMARY KEY tuple()", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "28": {"status": "success", "position": 28, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "success"}, "36": {"status": "success", "position": 36, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "48": {"status": "error", "position": 48, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: ('HY000', \"[HY000] HTTP status code: 404\\nReceived error:\\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.7.1.1)\\n\\n (1) (SQLExecDirectW)\")"}, "54": {"status": "success", "position": 54, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "66": {"status": "success", "position": 66, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "72": {"status": "success", "position": 72, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "78": {"status": "success", "position": 78, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "84": {"status": "success", "position": 84, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "90": {"status": "error", "position": 90, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "96": {"status": "error", "position": 96, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "102": {"status": "success", "position": 102, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "107": {"status": "success", "position": 107, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "113": {"status": "success", "position": 113, "request_type": "query", "request": "SELECT NULL as a", "reason": "success"}, "118": {"status": "success", "position": 118, "request_type": "query", "request": "SELECT CAST(NULL AS Nullable(INTEGER))", "reason": "success"}, "124": {"status": "success", "position": 124, "request_type": "query", "request": "SELECT NULL", "reason": "success"}, "129": {"status": "success", "position": 129, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "148": {"status": "success", "position": 148, "request_type": "query", "request": "SELECT number+1 from system.numbers LIMIT 20", "reason": "success"}, "154": {"status": "success", "position": 154, "request_type": "statement", "request": "CREATE TABLE tab0(pk INTEGER PRIMARY KEY, col0 INTEGER, col1 FLOAT, col2 TEXT, col3 INTEGER, col4 FLOAT, col5 TEXT)", "reason": "success"}, "158": {"status": "success", "position": 158, "request_type": "statement", "request": "INSERT INTO tab0 VALUES(0,535,860.48,'uxbns',253,640.58,'jvqkl')", "reason": "success"}}}}}, "sqlite-vs-clickhouse": {"dbms_name": "ClickHouse", "stats": {"statements": {"success": 6, "fail": 0}, "queries": {"success": 13, "fail": 6}, "total": {"success": 19, "fail": 6}}, "input_dir": "/test_output/self-test/self-test-stages/sqlite-complete", "output_dir": "/test_output/self-test/self-test-stages/sqlite-vs-clickhouse", "tests": {"test.test": {"test_name": "test.test", "test_file": "/test_output/self-test/self-test-stages/sqlite-complete/test.test", "stats": {"statements": {"success": 6, "fail": 0}, "queries": {"success": 13, "fail": 6}, "total": {"success": 19, "fail": 6}}, "requests": {"1": {"status": "success", "position": 1, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER) ENGINE = MergeTree() PRIMARY KEY tuple()", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "28": {"status": "error", "position": 28, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "canonic and actual results have different exceptions, details: canonic: query execution failed with an exception, original is: no such column: c, actual: query execution failed with an exception, original is: ('HY000', \"[HY000] HTTP status code: 404\\nReceived error:\\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1 ORDER BY c ASC, a ASC', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.7.1.1)\\n\\n (1) (SQLExecDirectW)\")"}, "36": {"status": "success", "position": 36, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "48": {"status": "error", "position": 48, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: ('HY000', \"[HY000] HTTP status code: 404\\nReceived error:\\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.7.1.1)\\n\\n (1) (SQLExecDirectW)\")"}, "54": {"status": "success", "position": 54, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "66": {"status": "success", "position": 66, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "72": {"status": "success", "position": 72, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "78": {"status": "success", "position": 78, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "84": {"status": "success", "position": 84, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "90": {"status": "error", "position": 90, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "96": {"status": "error", "position": 96, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "102": {"status": "success", "position": 102, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "107": {"status": "success", "position": 107, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "113": {"status": "error", "position": 113, "request_type": "query", "request": "SELECT NULL as a", "reason": "actual result has exception and canonic result doesn't, details: actual: query execution failed with an exception, original is: ('ODBC SQL type 0 is not yet supported. column-index=0 type=0', 'HY106')"}, "118": {"status": "success", "position": 118, "request_type": "query", "request": "SELECT CAST(NULL AS Nullable(INTEGER))", "reason": "success"}, "124": {"status": "error", "position": 124, "request_type": "query", "request": "SELECT NULL", "reason": "actual result has exception and canonic result doesn't, details: actual: query execution failed with an exception, original is: ('ODBC SQL type 0 is not yet supported. column-index=0 type=0', 'HY106')"}, "129": {"status": "success", "position": 129, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "148": {"status": "success", "position": 148, "request_type": "query", "request": "SELECT number+1 from system.numbers LIMIT 20", "reason": "success"}, "154": {"status": "success", "position": 154, "request_type": "statement", "request": "CREATE TABLE tab0(pk INTEGER PRIMARY KEY, col0 INTEGER, col1 FLOAT, col2 TEXT, col3 INTEGER, col4 FLOAT, col5 TEXT)", "reason": "success"}, "158": {"status": "success", "position": 158, "request_type": "statement", "request": "INSERT INTO tab0 VALUES(0,535,860.48,'uxbns',253,640.58,'jvqkl')", "reason": "success"}}}}}} \ No newline at end of file diff --git a/tests/sqllogic/self-test/test.test b/tests/sqllogic/self-test/test.test index 85b27ed7d60..503153acef8 100644 --- a/tests/sqllogic/self-test/test.test +++ b/tests/sqllogic/self-test/test.test @@ -142,4 +142,13 @@ SELECT number+1 from system.numbers LIMIT 20 ---- 20 values hashing to 52c46dff81346ead02fcf6245c762b1a +# Debug how incorrect result type parses +statement ok +CREATE TABLE tab0(pk INTEGER PRIMARY KEY, col0 INTEGER, col1 FLOAT, col2 TEXT, col3 INTEGER, col4 FLOAT, col5 TEXT) +statement ok +INSERT INTO tab0 VALUES(0,535,860.48,'uxbns',253,640.58,'jvqkl') + +skipif ClickHouse +query I rowsort label-20 +SELECT + col2 AS col5 FROM tab0 WHERE NOT ( col0 ) * - - col4 IS NULL diff --git a/tests/sqllogic/test_parser.py b/tests/sqllogic/test_parser.py index 42adb83809f..f6ad955e7b0 100755 --- a/tests/sqllogic/test_parser.py +++ b/tests/sqllogic/test_parser.py @@ -9,7 +9,13 @@ from enum import Enum from hashlib import md5 from functools import reduce -from exceptions import Error, ProgramError, ErrorWithParent, DataResultDiffer +from exceptions import ( + Error, + ProgramError, + ErrorWithParent, + DataResultDiffer, + QueryExecutionError, +) logger = logging.getLogger("parser") @@ -480,6 +486,7 @@ class QueryResult: for row in rows: res_row = [] for c, t in zip(row, types): + logger.debug(f"Builging row. c:{c} t:{t}") if c is None: res_row.append("NULL") continue @@ -490,7 +497,12 @@ class QueryResult: else: res_row.append(str(c)) elif t == "I": - res_row.append(str(int(c))) + try: + res_row.append(str(int(c))) + except ValueError as ex: + raise QueryExecutionError( + f"Got non-integer result '{c}' for I type." + ) elif t == "R": res_row.append(f"{c:.3f}") diff --git a/tests/sqllogic/test_runner.py b/tests/sqllogic/test_runner.py index 3df38e7fce5..f9ed23566b4 100644 --- a/tests/sqllogic/test_runner.py +++ b/tests/sqllogic/test_runner.py @@ -361,7 +361,7 @@ class TestRunner: continue if block.get_block_type() == test_parser.BlockType.control: - clogger.debug("Skip control block", name_pos) + clogger.debug("Skip control block %s", name_pos) block.dump_to(out_stream) continue @@ -374,13 +374,14 @@ class TestRunner: continue request = block.get_request() - exec_res = execute_request(request, self.connection) if block.get_block_type() in self.skip_request_types: clogger.debug("Runtime skip block for %s", self.dbms_name) block.dump_to(out_stream) continue + exec_res = execute_request(request, self.connection) + if block.get_block_type() == test_parser.BlockType.statement: try: clogger.debug("this is statement") diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index 47dd2fc9f2d..4e1184cc9a5 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -20,7 +20,6 @@ if (ENABLE_UTILS) add_subdirectory (zookeeper-cli) add_subdirectory (zookeeper-dump-tree) add_subdirectory (zookeeper-remove-by-list) - add_subdirectory (wikistat-loader) add_subdirectory (check-marks) add_subdirectory (checksum-for-compressed-block) add_subdirectory (check-mysql-binlog) diff --git a/utils/backup/print_backup_info.py b/utils/backup/print_backup_info.py new file mode 100755 index 00000000000..54e5c745a8c --- /dev/null +++ b/utils/backup/print_backup_info.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +""" +print_backup_info: Extract information about a backup from ".backup" file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Usage: print_backup_info +""" +import sys +import os +import xml.etree.ElementTree as ET + + +def main(): + if len(sys.argv) != 2: + print(__doc__) + sys.exit(1) + backup_xml = sys.argv[1] + + if not os.path.isfile(backup_xml): + print("error: {} does not exist".format(backup_xml)) + sys.exit(1) + + # Process the file line-by-line + tree = ET.parse(backup_xml) + root = tree.getroot() + contents = root.find("contents") + + version_node = root.find("version") + version = int(version_node.text) if (version_node != None) else None + + timestamp_node = root.find("timestamp") + timestamp = timestamp_node.text if (timestamp_node != None) else None + + base_backup_node = root.find("base_backup") + base_backup = base_backup_node.text if (base_backup_node != None) else None + + number_of_files = 0 + size_of_files = 0 + number_of_files_from_base_backup = 0 + size_of_files_from_base_backup = 0 + databases = set() + tables = {} + + for file in contents: + name = file.find("name").text + size = int(file.find("size").text) + + use_base_node = file.find("use_base") + use_base = (use_base_node.text == "true") if (use_base_node != None) else False + + if use_base: + base_size_node = file.find("base_size") + base_size = int(base_size_node.text) if (base_size_node != None) else size + else: + base_size = 0 + + data_file_node = file.find("data_file") + data_file = data_file_node.text if (data_file_node != None) else name + + has_data_file = name == data_file + + if has_data_file: + if size > base_size: + number_of_files += 1 + size_of_files += size - base_size + if base_size > 0: + number_of_files_from_base_backup += 1 + size_of_files_from_base_backup += base_size + + table_name = extract_table_name_from_path(name) + if table_name: + if table_name not in tables: + tables[table_name] = [0, 0, 0, 0] + if not name.endswith(".sql") and has_data_file: + table_info = tables[table_name] + if size > base_size: + table_info[0] += 1 + table_info[1] += size - base_size + if base_size > 0: + table_info[2] += 1 + table_info[3] += base_size + tables[table_name] = table_info + + database_name = extract_database_name_from_path(name) + if database_name: + databases.add(database_name) + + size_of_backup = size_of_files + os.path.getsize(backup_xml) + + print(f"version={version}") + print(f"timestamp={timestamp}") + print(f"base_backup={base_backup}") + print(f"size_of_backup={size_of_backup}") + print(f"number_of_files={number_of_files}") + print(f"size_of_files={size_of_files}") + print(f"number_of_files_from_base_backup={number_of_files_from_base_backup}") + print(f"size_of_files_from_base_backup={size_of_files_from_base_backup}") + print(f"number_of_databases={len(databases)}") + print(f"number_of_tables={len(tables)}") + + print() + + print(f"{len(databases)} database(s):") + for database_name in sorted(databases): + print(database_name) + + print() + + print(f"{len(tables)} table(s):") + table_info_format = "{:>70} | {:>20} | {:>20} | {:>26} | {:>30}" + table_info_separator_line = ( + "{:->70}-+-{:->20}-+-{:->20}-+-{:->26}-+-{:->30}".format("", "", "", "", "") + ) + table_info_title_line = table_info_format.format( + "table name", + "num_files", + "size_of_files", + "num_files_from_base_backup", + "size_of_files_from_base_backup", + ) + print(table_info_title_line) + print(table_info_separator_line) + for table_name in sorted(tables): + table_info = tables[table_name] + print( + table_info_format.format( + table_name, table_info[0], table_info[1], table_info[2], table_info[3] + ) + ) + + +# Extracts a table name from a path inside a backup. +# For example, extracts 'default.tbl' from 'shards/1/replicas/1/data/default/tbl/all_0_0_0/data.bin'. +def extract_table_name_from_path(path): + path = strip_shards_replicas_from_path(path) + if not path: + return None + if path.startswith("metadata/"): + path = path[len("metadata/") :] + sep = path.find("/") + if sep == -1: + return None + database_name = path[:sep] + path = path[sep + 1 :] + sep = path.find(".sql") + if sep == -1: + return None + table_name = path[:sep] + return database_name + "." + table_name + if path.startswith("data/"): + path = path[len("data/") :] + sep = path.find("/") + if sep == -1: + return None + database_name = path[:sep] + path = path[sep + 1 :] + sep = path.find("/") + if sep == -1: + return None + table_name = path[:sep] + return database_name + "." + table_name + return None + + +# Extracts a database name from a path inside a backup. +# For example, extracts 'default' from 'shards/1/replicas/1/data/default/tbl/all_0_0_0/data.bin'. +def extract_database_name_from_path(path): + path = strip_shards_replicas_from_path(path) + if not path: + return None + if path.startswith("metadata/"): + path = path[len("metadata/") :] + sep = path.find(".sql") + if sep == -1 or path.find("/") != -1: + return None + return path[:sep] + if path.startswith("data/"): + path = path[len("data/") :] + sep = path.find("/") + if sep == -1: + return None + return path[:sep] + return None + + +# Removes a prefix "shards//replicas//" from a path. +def strip_shards_replicas_from_path(path): + if path.startswith("shards"): + sep = path.find("/") + if sep == -1: + return None + sep = path.find("/", sep + 1) + if sep == -1: + return None + path = path[sep + 1 :] + if path.startswith("replicas"): + sep = path.find("/") + if sep == -1: + return None + sep = path.find("/", sep + 1) + if sep == -1: + return None + path = path[sep + 1 :] + return path + + +if __name__ == "__main__": + main() diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 80aeadd8738..484b6e3ca9d 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -41,6 +41,7 @@ AsynchronousMetricsUpdateInterval AsynchronousReadWait Authenticator Authenticators +AutoFDO AutoML Autocompletion AvroConfluent @@ -98,6 +99,7 @@ BlockWriteOps BlockWriteTime Bool BrokenDistributedFilesToInsert +Bugfix BuildID BuilderBinAarch BuilderBinAmd @@ -146,6 +148,7 @@ ChannelID Cidr Ciphertext CityHash +ClickBench ClickCat ClickHouse ClickHouse's @@ -312,6 +315,7 @@ Greenwald HDDs HHMM HMAC +HNSW HSTS HTTPConnection HTTPThreads @@ -694,6 +698,7 @@ Promtail Protobuf ProtobufSingle ProxySQL +PyArrow PyCharm QEMU QTCreator @@ -918,6 +923,7 @@ URL's URLHash URLHierarchy URLPathHierarchy +USearch UUIDNumToString UUIDStringToNum UUIDs @@ -1083,6 +1089,7 @@ authenticators autocompletion autodetect autodetected +autogenerate autogenerated autogeneration autostart @@ -1092,6 +1099,7 @@ avro avx aws azureBlobStorage +azureBlobStorageCluster backend backoff backticks @@ -1157,6 +1165,7 @@ brotli bson bsoneachrow buffersize +bugfix buildId buildable builtins @@ -1398,6 +1407,7 @@ encodings encryptions endian endsWith +endsWithUTF enum enum's enums @@ -1457,6 +1467,7 @@ formatter freezed fromModifiedJulianDay fromModifiedJulianDayOrNull +fromUTCTimestamp fromUnixTimestamp fromUnixTimestampInJodaSyntax fsync @@ -1885,7 +1896,6 @@ overfitting packetpool packetsize pageviews -pandahouse parallelization parallelize parallelized @@ -2204,6 +2214,7 @@ src stacktrace stacktraces startsWith +startsWithUTF statbox stateful stddev @@ -2228,6 +2239,8 @@ strtoll strtoull struct structs +structureToCapnProtoSchema +structureToProtobufSchema studentTTest studentttest subBitmap @@ -2384,6 +2397,7 @@ toTimeZone toType toTypeName toUInt +toUTCTimestamp toUUID toUUIDOrDefault toUUIDOrNull @@ -2424,6 +2438,7 @@ tsv tui tumbleEnd tumbleStart +tupleConcat tupleDivide tupleDivideByNumber tupleElement @@ -2488,6 +2503,7 @@ uring url urlCluster urls +usearch userspace userver utils diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 1eabc65a10f..87796c35733 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,6 @@ +v23.7.4.5-stable 2023-08-08 +v23.7.3.14-stable 2023-08-05 +v23.7.2.25-stable 2023-08-03 v23.7.1.2470-stable 2023-07-27 v23.6.2.18-stable 2023-07-09 v23.6.1.1524-stable 2023-06-30 @@ -11,6 +14,7 @@ v23.4.4.16-stable 2023-06-17 v23.4.3.48-stable 2023-06-12 v23.4.2.11-stable 2023-05-02 v23.4.1.1943-stable 2023-04-27 +v23.3.9.55-lts 2023-08-21 v23.3.8.21-lts 2023-07-13 v23.3.7.5-lts 2023-06-29 v23.3.6.7-lts 2023-06-28 diff --git a/utils/prepare-time-trace/prepare-time-trace.sh b/utils/prepare-time-trace/prepare-time-trace.sh new file mode 100755 index 00000000000..7cacdec8c94 --- /dev/null +++ b/utils/prepare-time-trace/prepare-time-trace.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# This scripts transforms the output of clang's -ftime-trace JSON files into a format to upload to ClickHouse + +# Example: +# mkdir time_trace +# utils/prepare-time-trace/prepare-time-trace.sh build time_trace + +# See also https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview + +< \"${OUTPUT_DIR}/\$\$\" +" + +# Now you can upload it as follows: + +#cat "$OUTPUT_DIR"/* | clickhouse-client --progress --query "INSERT INTO build_time_trace (extra_column_names, file, library, time, pid, tid, ph, ts, dur, cat, name, detail, count, avgMs, args_name) FORMAT JSONCompactEachRow" diff --git a/utils/wikistat-loader/CMakeLists.txt b/utils/wikistat-loader/CMakeLists.txt deleted file mode 100644 index fc5416dea2e..00000000000 --- a/utils/wikistat-loader/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -clickhouse_add_executable (wikistat-loader main.cpp ${SRCS}) -target_link_libraries (wikistat-loader PRIVATE clickhouse_common_io boost::program_options) diff --git a/utils/wikistat-loader/main.cpp b/utils/wikistat-loader/main.cpp deleted file mode 100644 index 493f1df05da..00000000000 --- a/utils/wikistat-loader/main.cpp +++ /dev/null @@ -1,225 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include - - -/** Reads uncompressed wikistat data from stdin, - * and writes transformed data in tsv format, - * ready to be loaded into ClickHouse. - * - * Input data has format: - * - * aa Wikipedia 1 17224 - * aa.b Main_Page 2 21163 - * - * project, optional subproject, path, hits, total size in bytes. - */ - - -template -static void readString(std::string & s, DB::ReadBuffer & buf) -{ - s.clear(); - - while (!buf.eof()) - { - const char * next_pos; - - if (break_at_dot) - next_pos = find_first_symbols<' ', '\n', '.'>(buf.position(), buf.buffer().end()); - else - next_pos = find_first_symbols<' ', '\n'>(buf.position(), buf.buffer().end()); - - s.append(buf.position(), next_pos - buf.position()); - buf.position() += next_pos - buf.position(); - - if (!buf.hasPendingData()) - continue; - - if (*buf.position() == ' ' || *buf.position() == '\n' || (break_at_dot && *buf.position() == '.')) - return; - } -} - - -/** Reads path before whitespace and decodes %xx sequences (to more compact and handy representation), - * except %2F '/', %26 '&', %3D '=', %3F '?', %23 '#' (to not break structure of URL). - */ -static void readPath(std::string & s, DB::ReadBuffer & buf) -{ - s.clear(); - - while (!buf.eof()) - { - const char * next_pos = find_first_symbols<' ', '\n', '%'>(buf.position(), buf.buffer().end()); - - s.append(buf.position(), next_pos - buf.position()); - buf.position() += next_pos - buf.position(); - - if (!buf.hasPendingData()) - continue; - - if (*buf.position() == ' ' || *buf.position() == '\n') - return; - - if (*buf.position() == '%') - { - ++buf.position(); - - char c1; - char c2; - - if (buf.eof() || *buf.position() == ' ') - break; - - DB::readChar(c1, buf); - - if (buf.eof() || *buf.position() == ' ') - break; - - DB::readChar(c2, buf); - - if ((c1 == '2' && (c2 == 'f' || c2 == '6' || c2 == '3' || c2 == 'F')) - || (c1 == '3' && (c2 == 'd' || c2 == 'f' || c2 == 'D' || c2 == 'F'))) - { - s += '%'; - s += c1; - s += c2; - } - else - s += static_cast(static_cast(unhex(c1)) * 16 + static_cast(unhex(c2))); - } - } -} - - -static void skipUntilNewline(DB::ReadBuffer & buf) -{ - while (!buf.eof()) - { - const char * next_pos = find_first_symbols<'\n'>(buf.position(), buf.buffer().end()); - - buf.position() += next_pos - buf.position(); - - if (!buf.hasPendingData()) - continue; - - if (*buf.position() == '\n') - { - ++buf.position(); - return; - } - } -} - - -namespace DB -{ - namespace ErrorCodes - { - extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; - } -} - - -int main(int argc, char ** argv) -try -{ - boost::program_options::options_description desc("Allowed options"); - desc.add_options() - ("help,h", "produce help message") - ("time", boost::program_options::value()->required(), - "time of data in YYYY-MM-DD hh:mm:ss form") - ; - - boost::program_options::variables_map options; - boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); - - if (options.count("help")) - { - std::cout << "Reads uncompressed wikistat data from stdin and writes transformed data in tsv format." << std::endl; - std::cout << "Usage: " << argv[0] << " --time='YYYY-MM-DD hh:00:00' < in > out" << std::endl; - std::cout << desc << std::endl; - return 1; - } - - std::string time_str = options.at("time").as(); - LocalDateTime time(time_str); - LocalDate date(time_str); - - DB::ReadBufferFromFileDescriptor in(STDIN_FILENO); - DB::WriteBufferFromFileDescriptor out(STDOUT_FILENO); - - std::string project; - std::string subproject; - std::string path; - UInt64 hits = 0; - UInt64 size = 0; - - size_t row_num = 0; - while (!in.eof()) - { - try - { - ++row_num; - readString(project, in); - - if (in.eof()) - break; - - if (*in.position() == '.') - readString(subproject, in); - else - subproject.clear(); - - DB::assertChar(' ', in); - readPath(path, in); - DB::assertChar(' ', in); - DB::readIntText(hits, in); - DB::assertChar(' ', in); - DB::readIntText(size, in); - DB::assertChar('\n', in); - } - catch (const DB::Exception & e) - { - /// Sometimes, input data has errors. For example, look at first lines in pagecounts-20130210-130000.gz - /// To save rest of data, just skip lines with errors. - if (e.code() == DB::ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED) - { - std::cerr << "At row " << row_num << ": " << DB::getCurrentExceptionMessage(false) << '\n'; - skipUntilNewline(in); - continue; - } - else - throw; - } - - DB::writeText(date, out); - DB::writeChar('\t', out); - DB::writeText(time, out); - DB::writeChar('\t', out); - DB::writeText(project, out); - DB::writeChar('\t', out); - DB::writeText(subproject, out); - DB::writeChar('\t', out); - DB::writeText(path, out); - DB::writeChar('\t', out); - DB::writeText(hits, out); - DB::writeChar('\t', out); - DB::writeText(size, out); - DB::writeChar('\n', out); - } - - return 0; -} -catch (...) -{ - std::cerr << DB::getCurrentExceptionMessage(true) << '\n'; - throw; -}