diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 110c06631c7..a324d20abc9 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -79,7 +79,7 @@ jobs: with: name: changed_images path: ${{ runner.temp }}/changed_images.json - CompatibilityCheck: + CompatibilityCheckX86: needs: [BuilderDebRelease] runs-on: [self-hosted, style-checker] steps: @@ -98,12 +98,43 @@ jobs: uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: CompatibilityCheck + - name: CompatibilityCheckX86 run: | sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py + cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + CompatibilityCheckAarch64: + needs: [BuilderDebAarch64] + runs-on: [self-hosted, style-checker] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/compatibility_check + REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse + REPORTS_PATH=${{runner.temp}}/reports_dir + EOF + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: CompatibilityCheckAarch64 + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc - name: Cleanup if: always() run: | @@ -741,7 +772,8 @@ jobs: - FunctionalStatefulTestDebug - StressTestTsan - IntegrationTestsRelease - - CompatibilityCheck + - CompatibilityCheckX86 + - CompatibilityCheckAarch64 runs-on: [self-hosted, style-checker] steps: - name: Check out repository code diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 7e045992dee..b70fe256833 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -110,7 +110,7 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - CompatibilityCheck: + CompatibilityCheckX86: needs: [BuilderDebRelease] runs-on: [self-hosted, style-checker] steps: @@ -129,12 +129,43 @@ jobs: uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: CompatibilityCheck + - name: CompatibilityCheckX86 run: | sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py + cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + CompatibilityCheckAarch64: + needs: [BuilderDebAarch64] + runs-on: [self-hosted, style-checker] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/compatibility_check + REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse + REPORTS_PATH=${{runner.temp}}/reports_dir + EOF + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: CompatibilityCheckAarch64 + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc - name: Cleanup if: always() run: | @@ -3124,7 +3155,8 @@ jobs: - PerformanceComparisonX86-1 - PerformanceComparisonX86-2 - PerformanceComparisonX86-3 - - CompatibilityCheck + - CompatibilityCheckX86 + - CompatibilityCheckAarch64 - ASTFuzzerTestDebug - ASTFuzzerTestAsan - ASTFuzzerTestTsan diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 7d410f833c5..ff98739db00 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -174,7 +174,7 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" - CompatibilityCheck: + CompatibilityCheckX86: needs: [BuilderDebRelease] runs-on: [self-hosted, style-checker] steps: @@ -193,12 +193,43 @@ jobs: uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: CompatibilityCheck + - name: CompatibilityCheckX86 run: | sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py + cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + CompatibilityCheckAarch64: + needs: [BuilderDebAarch64] + runs-on: [self-hosted, style-checker] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/compatibility_check + REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse + REPORTS_PATH=${{runner.temp}}/reports_dir + EOF + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: CompatibilityCheckAarch64 + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc - name: Cleanup if: always() run: | @@ -4792,7 +4823,8 @@ jobs: - UnitTestsMsan - UnitTestsUBsan - UnitTestsReleaseClang - - CompatibilityCheck + - CompatibilityCheckX86 + - CompatibilityCheckAarch64 - IntegrationTestsFlakyCheck - SQLancerTestRelease - SQLancerTestDebug diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 4d2a99c2106..74ec1163cc9 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -71,7 +71,7 @@ jobs: with: name: changed_images path: ${{ runner.temp }}/changed_images.json - CompatibilityCheck: + CompatibilityCheckX86: needs: [BuilderDebRelease] runs-on: [self-hosted, style-checker] steps: @@ -90,12 +90,43 @@ jobs: uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: CompatibilityCheck + - name: CompatibilityCheckX86 run: | sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py + cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + CompatibilityCheckAarch64: + needs: [BuilderDebAarch64] + runs-on: [self-hosted, style-checker] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/compatibility_check + REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse + REPORTS_PATH=${{runner.temp}}/reports_dir + EOF + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: CompatibilityCheckAarch64 + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc - name: Cleanup if: always() run: | @@ -1947,7 +1978,8 @@ jobs: - IntegrationTestsTsan1 - IntegrationTestsTsan2 - IntegrationTestsTsan3 - - CompatibilityCheck + - CompatibilityCheckX86 + - CompatibilityCheckAarch64 runs-on: [self-hosted, style-checker] steps: - name: Check out repository code diff --git a/base/poco/MongoDB/include/Poco/MongoDB/Connection.h b/base/poco/MongoDB/include/Poco/MongoDB/Connection.h index de669aa90dd..dcb813b75bc 100644 --- a/base/poco/MongoDB/include/Poco/MongoDB/Connection.h +++ b/base/poco/MongoDB/include/Poco/MongoDB/Connection.h @@ -90,6 +90,9 @@ namespace MongoDB Poco::Net::SocketAddress address() const; /// Returns the address of the MongoDB server. + + const std::string & uri() const; + /// Returns the uri on which the connection was made. void connect(const std::string & hostAndPort); /// Connects to the given MongoDB server. @@ -148,6 +151,7 @@ namespace MongoDB private: Poco::Net::SocketAddress _address; Poco::Net::StreamSocket _socket; + std::string _uri; }; @@ -158,6 +162,10 @@ namespace MongoDB { return _address; } + inline const std::string & Connection::uri() const + { + return _uri; + } } diff --git a/base/poco/MongoDB/src/Connection.cpp b/base/poco/MongoDB/src/Connection.cpp index 56bb192cec2..38c31d2250a 100644 --- a/base/poco/MongoDB/src/Connection.cpp +++ b/base/poco/MongoDB/src/Connection.cpp @@ -145,68 +145,155 @@ void Connection::connect(const Poco::Net::StreamSocket& socket) void Connection::connect(const std::string& uri, SocketFactory& socketFactory) { - Poco::URI theURI(uri); - if (theURI.getScheme() != "mongodb") throw Poco::UnknownURISchemeException(uri); + std::vector strAddresses; + std::string newURI; - std::string userInfo = theURI.getUserInfo(); - std::string host = theURI.getHost(); - Poco::UInt16 port = theURI.getPort(); - if (port == 0) port = 27017; + if (uri.find(',') != std::string::npos) + { + size_t pos; + size_t head = 0; + if ((pos = uri.find("@")) != std::string::npos) + { + head = pos + 1; + } + else if ((pos = uri.find("://")) != std::string::npos) + { + head = pos + 3; + } - std::string databaseName = theURI.getPath(); - if (!databaseName.empty() && databaseName[0] == '/') databaseName.erase(0, 1); - if (databaseName.empty()) databaseName = "admin"; + std::string tempstr; + std::string::const_iterator it = uri.begin(); + it += head; + size_t tail = head; + for (;it != uri.end() && *it != '?' && *it != '/'; ++it) + { + tempstr += *it; + tail++; + } - bool ssl = false; - Poco::Timespan connectTimeout; - Poco::Timespan socketTimeout; - std::string authMechanism = Database::AUTH_SCRAM_SHA1; + it = tempstr.begin(); + std::string token; + for (;it != tempstr.end(); ++it) + { + if (*it == ',') + { + newURI = uri.substr(0, head) + token + uri.substr(tail, uri.length()); + strAddresses.push_back(newURI); + token = ""; + } + else + { + token += *it; + } + } + newURI = uri.substr(0, head) + token + uri.substr(tail, uri.length()); + strAddresses.push_back(newURI); + } + else + { + strAddresses.push_back(uri); + } - Poco::URI::QueryParameters params = theURI.getQueryParameters(); - for (Poco::URI::QueryParameters::const_iterator it = params.begin(); it != params.end(); ++it) - { - if (it->first == "ssl") - { - ssl = (it->second == "true"); - } - else if (it->first == "connectTimeoutMS") - { - connectTimeout = static_cast(1000)*Poco::NumberParser::parse(it->second); - } - else if (it->first == "socketTimeoutMS") - { - socketTimeout = static_cast(1000)*Poco::NumberParser::parse(it->second); - } - else if (it->first == "authMechanism") - { - authMechanism = it->second; - } - } + newURI = strAddresses.front(); + Poco::URI theURI(newURI); + if (theURI.getScheme() != "mongodb") throw Poco::UnknownURISchemeException(uri); - connect(socketFactory.createSocket(host, port, connectTimeout, ssl)); + std::string userInfo = theURI.getUserInfo(); + std::string databaseName = theURI.getPath(); + if (!databaseName.empty() && databaseName[0] == '/') databaseName.erase(0, 1); + if (databaseName.empty()) databaseName = "admin"; - if (socketTimeout > 0) - { - _socket.setSendTimeout(socketTimeout); - _socket.setReceiveTimeout(socketTimeout); - } + bool ssl = false; + Poco::Timespan connectTimeout; + Poco::Timespan socketTimeout; + std::string authMechanism = Database::AUTH_SCRAM_SHA1; + std::string readPreference="primary"; - if (!userInfo.empty()) - { - std::string username; - std::string password; - std::string::size_type pos = userInfo.find(':'); - if (pos != std::string::npos) - { - username.assign(userInfo, 0, pos++); - password.assign(userInfo, pos, userInfo.size() - pos); - } - else username = userInfo; + Poco::URI::QueryParameters params = theURI.getQueryParameters(); + for (Poco::URI::QueryParameters::const_iterator it = params.begin(); it != params.end(); ++it) + { + if (it->first == "ssl") + { + ssl = (it->second == "true"); + } + else if (it->first == "connectTimeoutMS") + { + connectTimeout = static_cast(1000)*Poco::NumberParser::parse(it->second); + } + else if (it->first == "socketTimeoutMS") + { + socketTimeout = static_cast(1000)*Poco::NumberParser::parse(it->second); + } + else if (it->first == "authMechanism") + { + authMechanism = it->second; + } + else if (it->first == "readPreference") + { + readPreference= it->second; + } + } - Database database(databaseName); - if (!database.authenticate(*this, username, password, authMechanism)) - throw Poco::NoPermissionException(Poco::format("Access to MongoDB database %s denied for user %s", databaseName, username)); - } + for (std::vector::const_iterator it = strAddresses.cbegin();it != strAddresses.cend(); ++it) + { + newURI = *it; + theURI = Poco::URI(newURI); + + std::string host = theURI.getHost(); + Poco::UInt16 port = theURI.getPort(); + if (port == 0) port = 27017; + + connect(socketFactory.createSocket(host, port, connectTimeout, ssl)); + _uri = newURI; + if (socketTimeout > 0) + { + _socket.setSendTimeout(socketTimeout); + _socket.setReceiveTimeout(socketTimeout); + } + if (strAddresses.size() > 1) + { + Poco::MongoDB::QueryRequest request("admin.$cmd"); + request.setNumberToReturn(1); + request.selector().add("isMaster", 1); + Poco::MongoDB::ResponseMessage response; + + sendRequest(request, response); + _uri = newURI; + if (!response.documents().empty()) + { + Poco::MongoDB::Document::Ptr doc = response.documents()[0]; + if (doc->get("ismaster") && readPreference == "primary") + { + break; + } + else if (!doc->get("ismaster") && readPreference == "secondary") + { + break; + } + else if (it + 1 == strAddresses.cend()) + { + throw Poco::URISyntaxException(uri); + } + } + } + } + if (!userInfo.empty()) + { + std::string username; + std::string password; + std::string::size_type pos = userInfo.find(':'); + if (pos != std::string::npos) + { + username.assign(userInfo, 0, pos++); + password.assign(userInfo, pos, userInfo.size() - pos); + } + else username = userInfo; + + Database database(databaseName); + + if (!database.authenticate(*this, username, password, authMechanism)) + throw Poco::NoPermissionException(Poco::format("Access to MongoDB database %s denied for user %s", databaseName, username)); + } } diff --git a/docs/changelogs/v22.12.4.76-stable.md b/docs/changelogs/v22.12.4.76-stable.md new file mode 100644 index 00000000000..79569ff841e --- /dev/null +++ b/docs/changelogs/v22.12.4.76-stable.md @@ -0,0 +1,55 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.12.4.76-stable (cb5772db805) FIXME as compared to v22.12.3.5-stable (893de538f02) + +#### Performance Improvement +* Backported in [#45704](https://github.com/ClickHouse/ClickHouse/issues/45704): Fixed performance of short `SELECT` queries that read from tables with large number of`Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#46378](https://github.com/ClickHouse/ClickHouse/issues/46378): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Bug Fix +* Backported in [#45672](https://github.com/ClickHouse/ClickHouse/issues/45672): Fix wiping sensitive info in logs. [#45603](https://github.com/ClickHouse/ClickHouse/pull/45603) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Build/Testing/Packaging Improvement +* Backported in [#45200](https://github.com/ClickHouse/ClickHouse/issues/45200): Fix zookeeper downloading, update the version, and optimize the image size. [#44853](https://github.com/ClickHouse/ClickHouse/pull/44853) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#46116](https://github.com/ClickHouse/ClickHouse/issues/46116): Remove the dependency on the `adduser` tool from the packages, because we don't use it. This fixes [#44934](https://github.com/ClickHouse/ClickHouse/issues/44934). [#45011](https://github.com/ClickHouse/ClickHouse/pull/45011) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#46035](https://github.com/ClickHouse/ClickHouse/issues/46035): Add systemd.service file for clickhouse-keeper. Fixes [#44293](https://github.com/ClickHouse/ClickHouse/issues/44293). [#45568](https://github.com/ClickHouse/ClickHouse/pull/45568) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#46484](https://github.com/ClickHouse/ClickHouse/issues/46484): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#46509](https://github.com/ClickHouse/ClickHouse/issues/46509): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#47058](https://github.com/ClickHouse/ClickHouse/issues/47058): Fix error during server startup on old distros (e.g. Amazon Linux 2) and on ARM that glibc 2.28 symbols are not found. [#47008](https://github.com/ClickHouse/ClickHouse/pull/47008) ([Robert Schulze](https://github.com/rschu1ze)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#45904](https://github.com/ClickHouse/ClickHouse/issues/45904): Fixed bug with non-parsable default value for EPHEMERAL column in table metadata. [#44026](https://github.com/ClickHouse/ClickHouse/pull/44026) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#45321](https://github.com/ClickHouse/ClickHouse/issues/45321): Fixed a bug in normalization of a `DEFAULT` expression in `CREATE TABLE` statement. The second argument of function `in` (or the right argument of operator `IN`) might be replaced with the result of its evaluation during CREATE query execution. Fixes [#44496](https://github.com/ClickHouse/ClickHouse/issues/44496). [#44547](https://github.com/ClickHouse/ClickHouse/pull/44547) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#45000](https://github.com/ClickHouse/ClickHouse/issues/45000): Another fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#45553](https://github.com/ClickHouse/ClickHouse/issues/45553): Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in xml config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Backported in [#46226](https://github.com/ClickHouse/ClickHouse/issues/46226): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)). +* Backported in [#46218](https://github.com/ClickHouse/ClickHouse/issues/46218): Fix reading of non existing nested columns with multiple level in compact parts. [#46045](https://github.com/ClickHouse/ClickHouse/pull/46045) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#46446](https://github.com/ClickHouse/ClickHouse/issues/46446): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#46678](https://github.com/ClickHouse/ClickHouse/issues/46678): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#46872](https://github.com/ClickHouse/ClickHouse/issues/46872): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#46954](https://github.com/ClickHouse/ClickHouse/issues/46954): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Automatically merge green backport PRs and green approved PRs [#41110](https://github.com/ClickHouse/ClickHouse/pull/41110) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Improve release scripts [#45074](https://github.com/ClickHouse/ClickHouse/pull/45074) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Get rid of progress timestamps in release publishing [#45818](https://github.com/ClickHouse/ClickHouse/pull/45818) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Improve install_check.py [#46458](https://github.com/ClickHouse/ClickHouse/pull/46458) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix dependencies for InstallPackagesTestAarch64 [#46597](https://github.com/ClickHouse/ClickHouse/pull/46597) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v22.8.14.53-lts.md b/docs/changelogs/v22.8.14.53-lts.md new file mode 100644 index 00000000000..5978080fa3a --- /dev/null +++ b/docs/changelogs/v22.8.14.53-lts.md @@ -0,0 +1,40 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.8.14.53-lts (4ea67c40077) FIXME as compared to v22.8.13.20-lts (e4817946d18) + +#### Performance Improvement +* Backported in [#45845](https://github.com/ClickHouse/ClickHouse/issues/45845): Fixed performance of short `SELECT` queries that read from tables with large number of`Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#46374](https://github.com/ClickHouse/ClickHouse/issues/46374): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#46358](https://github.com/ClickHouse/ClickHouse/issues/46358): Allow using Vertical merge algorithm with parts in Compact format. This will allow ClickHouse server to use much less memory for background operations. This closes [#46084](https://github.com/ClickHouse/ClickHouse/issues/46084). [#46282](https://github.com/ClickHouse/ClickHouse/pull/46282) ([Anton Popov](https://github.com/CurtizJ)). + +#### Build/Testing/Packaging Improvement +* Backported in [#46112](https://github.com/ClickHouse/ClickHouse/issues/46112): Remove the dependency on the `adduser` tool from the packages, because we don't use it. This fixes [#44934](https://github.com/ClickHouse/ClickHouse/issues/44934). [#45011](https://github.com/ClickHouse/ClickHouse/pull/45011) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#46482](https://github.com/ClickHouse/ClickHouse/issues/46482): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#46505](https://github.com/ClickHouse/ClickHouse/issues/46505): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#45908](https://github.com/ClickHouse/ClickHouse/issues/45908): Fixed bug with non-parsable default value for EPHEMERAL column in table metadata. [#44026](https://github.com/ClickHouse/ClickHouse/pull/44026) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#46238](https://github.com/ClickHouse/ClickHouse/issues/46238): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)). +* Backported in [#45727](https://github.com/ClickHouse/ClickHouse/issues/45727): Fix key description when encountering duplicate primary keys. This can happen in projections. See [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590) for details. [#45686](https://github.com/ClickHouse/ClickHouse/pull/45686) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#46394](https://github.com/ClickHouse/ClickHouse/issues/46394): Fix `SYSTEM UNFREEZE` queries failing with the exception `CANNOT_PARSE_INPUT_ASSERTION_FAILED`. [#46325](https://github.com/ClickHouse/ClickHouse/pull/46325) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Backported in [#46442](https://github.com/ClickHouse/ClickHouse/issues/46442): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#46674](https://github.com/ClickHouse/ClickHouse/issues/46674): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#46879](https://github.com/ClickHouse/ClickHouse/issues/46879): Fix MSan report in the `maxIntersections` function. This closes [#43126](https://github.com/ClickHouse/ClickHouse/issues/43126). [#46847](https://github.com/ClickHouse/ClickHouse/pull/46847) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#46871](https://github.com/ClickHouse/ClickHouse/issues/46871): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Get rid of progress timestamps in release publishing [#45818](https://github.com/ClickHouse/ClickHouse/pull/45818) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.1.4.58-stable.md b/docs/changelogs/v23.1.4.58-stable.md new file mode 100644 index 00000000000..d1ffe87f58e --- /dev/null +++ b/docs/changelogs/v23.1.4.58-stable.md @@ -0,0 +1,47 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.1.4.58-stable (9ed562163a5) FIXME as compared to v23.1.3.5-stable (548b494bcce) + +#### Performance Improvement +* Backported in [#46380](https://github.com/ClickHouse/ClickHouse/issues/46380): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Improvement +* Backported in [#46985](https://github.com/ClickHouse/ClickHouse/issues/46985): - Apply `ALTER TABLE table_name ON CLUSTER cluster MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name'` to all replicas. Because `ALTER TABLE t MOVE` is not replicated. [#46402](https://github.com/ClickHouse/ClickHouse/pull/46402) ([lizhuoyu5](https://github.com/lzydmxy)). +* Backported in [#46778](https://github.com/ClickHouse/ClickHouse/issues/46778): Backward compatibility for T64 codec support for IPv4. [#46747](https://github.com/ClickHouse/ClickHouse/pull/46747) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#47020](https://github.com/ClickHouse/ClickHouse/issues/47020): Allow IPv4 in range(). [#46995](https://github.com/ClickHouse/ClickHouse/pull/46995) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). + +#### Build/Testing/Packaging Improvement +* Backported in [#46031](https://github.com/ClickHouse/ClickHouse/issues/46031): Add systemd.service file for clickhouse-keeper. Fixes [#44293](https://github.com/ClickHouse/ClickHouse/issues/44293). [#45568](https://github.com/ClickHouse/ClickHouse/pull/45568) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#46477](https://github.com/ClickHouse/ClickHouse/issues/46477): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#46511](https://github.com/ClickHouse/ClickHouse/issues/46511): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#46228](https://github.com/ClickHouse/ClickHouse/issues/46228): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)). +* Backported in [#46967](https://github.com/ClickHouse/ClickHouse/issues/46967): Backward compatibility - allow implicit narrowing conversion from UInt64 to IPv4 - required for "INSERT ... VALUES ..." expression. [#45865](https://github.com/ClickHouse/ClickHouse/pull/45865) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#46220](https://github.com/ClickHouse/ClickHouse/issues/46220): Fix reading of non existing nested columns with multiple level in compact parts. [#46045](https://github.com/ClickHouse/ClickHouse/pull/46045) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#46751](https://github.com/ClickHouse/ClickHouse/issues/46751): Follow-up fix for Replace domain IP types (IPv4, IPv6) with native https://github.com/ClickHouse/ClickHouse/pull/43221. [#46087](https://github.com/ClickHouse/ClickHouse/pull/46087) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#46448](https://github.com/ClickHouse/ClickHouse/issues/46448): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#46680](https://github.com/ClickHouse/ClickHouse/issues/46680): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#46873](https://github.com/ClickHouse/ClickHouse/issues/46873): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#46956](https://github.com/ClickHouse/ClickHouse/issues/46956): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Support DELETE ON CLUSTER [#45786](https://github.com/ClickHouse/ClickHouse/pull/45786) ([Alexander Gololobov](https://github.com/davenger)). +* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Wait for background tasks in ~UploadHelper [#46334](https://github.com/ClickHouse/ClickHouse/pull/46334) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Improve install_check.py [#46458](https://github.com/ClickHouse/ClickHouse/pull/46458) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix dependencies for InstallPackagesTestAarch64 [#46597](https://github.com/ClickHouse/ClickHouse/pull/46597) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.2.2.20-stable.md b/docs/changelogs/v23.2.2.20-stable.md new file mode 100644 index 00000000000..60aeaa66cbf --- /dev/null +++ b/docs/changelogs/v23.2.2.20-stable.md @@ -0,0 +1,30 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.2.2.20-stable (f6c269c8df2) FIXME as compared to v23.2.1.2537-stable (52bf836e03a) + +#### Improvement +* Backported in [#46914](https://github.com/ClickHouse/ClickHouse/issues/46914): Allow PREWHERE for Merge with different DEFAULT expression for column. [#46831](https://github.com/ClickHouse/ClickHouse/pull/46831) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#47022](https://github.com/ClickHouse/ClickHouse/issues/47022): Allow IPv4 in range(). [#46995](https://github.com/ClickHouse/ClickHouse/pull/46995) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). + +#### Bug Fix +* Backported in [#46828](https://github.com/ClickHouse/ClickHouse/issues/46828): Combined PREWHERE column accumulated from multiple PREWHERE in some cases didn't contain 0's from previous steps. The fix is to apply final filter if we know that it wasn't applied from more than 1 last step. [#46785](https://github.com/ClickHouse/ClickHouse/pull/46785) ([Alexander Gololobov](https://github.com/davenger)). + +#### Build/Testing/Packaging Improvement +* Backported in [#47062](https://github.com/ClickHouse/ClickHouse/issues/47062): Fix error during server startup on old distros (e.g. Amazon Linux 2) and on ARM that glibc 2.28 symbols are not found. [#47008](https://github.com/ClickHouse/ClickHouse/pull/47008) ([Robert Schulze](https://github.com/rschu1ze)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#46895](https://github.com/ClickHouse/ClickHouse/issues/46895): Fixed a bug in automatic retries of `DROP TABLE` query with `ReplicatedMergeTree` tables and `Atomic` databases. In rare cases it could lead to `Can't get data for node /zk_path/log_pointer` and `The specified key does not exist` errors if ZooKeeper session expired during DROP and a new replicated table with the same path in ZooKeeper was created in parallel. [#46384](https://github.com/ClickHouse/ClickHouse/pull/46384) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#46865](https://github.com/ClickHouse/ClickHouse/issues/46865): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#46958](https://github.com/ClickHouse/ClickHouse/issues/46958): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* More concise logging at trace level for PREWHERE steps [#46771](https://github.com/ClickHouse/ClickHouse/pull/46771) ([Alexander Gololobov](https://github.com/davenger)). +* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/en/development/architecture.md b/docs/en/development/architecture.md index 50b338844df..dd46b294ddd 100644 --- a/docs/en/development/architecture.md +++ b/docs/en/development/architecture.md @@ -172,7 +172,7 @@ Global thread pool is `GlobalThreadPool` singleton class. To allocate thread fro Global pool is universal and all pools described below are implemented on top of it. This can be thought of as a hierarchy of pools. Any specialized pool takes its threads from the global pool using `ThreadPool` class. So the main purpose of any specialized pool is to apply limit on the number of simultaneous jobs and do job scheduling. If there are more jobs scheduled than threads in a pool, `ThreadPool` accumulates jobs in a queue with priorities. Each job has an integer priority. Default priority is zero. All jobs with higher priority values are started before any job with lower priority value. But there is no difference between already executing jobs, thus priority matters only when the pool in overloaded. -IO thread pool is implemented as a plain `ThreadPool` accessible via `IOThreadPool::get()` method. It is configured in the same way as global pool with `max_io_thread_pool_size`, `max_io_thread_pool_free_size` and `io_thread_pool_queue_size` settings. The main purpose of IO thread pool is to avoid exhaustion of the global pool with IO jobs, which could prevent queries from fully utilizing CPU. +IO thread pool is implemented as a plain `ThreadPool` accessible via `IOThreadPool::get()` method. It is configured in the same way as global pool with `max_io_thread_pool_size`, `max_io_thread_pool_free_size` and `io_thread_pool_queue_size` settings. The main purpose of IO thread pool is to avoid exhaustion of the global pool with IO jobs, which could prevent queries from fully utilizing CPU. Backup to S3 does significant amount of IO operations and to avoid impact on interactive queries there is a separate `BackupsIOThreadPool` configured with `max_backups_io_thread_pool_size`, `max_backups_io_thread_pool_free_size` and `backups_io_thread_pool_queue_size` settings. For periodic task execution there is `BackgroundSchedulePool` class. You can register tasks using `BackgroundSchedulePool::TaskHolder` objects and the pool ensures that no task runs two jobs at the same time. It also allows you to postpone task execution to a specific instant in the future or temporarily deactivate task. Global `Context` provides a few instances of this class for different purposes. For general purpose tasks `Context::getSchedulePool()` is used. diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md index ef422632d3e..e6134043b8e 100644 --- a/docs/en/engines/table-engines/integrations/kafka.md +++ b/docs/en/engines/table-engines/integrations/kafka.md @@ -125,6 +125,10 @@ Groups are flexible and synced on the cluster. For instance, if you have 10 topi 2. Create a table with the desired structure. 3. Create a materialized view that converts data from the engine and puts it into a previously created table. +:::info +Kafka Engine doesn't support columns with default value of type `DEFAULT/MATERIALIZED/EPHEMERAL/ALIAS`. If you need columns with any default type, they can be added at `MATERIALIZED VIEW` level. +::: + When the `MATERIALIZED VIEW` joins the engine, it starts collecting data in the background. This allows you to continually receive messages from Kafka and convert them to the required format using `SELECT`. One kafka table can have as many materialized views as you like, they do not read data from the kafka table directly, but receive new records (in blocks), this way you can write to several tables with different detail level (with grouping - aggregation and without). diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md index 3843ca0fc36..47058a28d12 100644 --- a/docs/en/sql-reference/functions/ip-address-functions.md +++ b/docs/en/sql-reference/functions/ip-address-functions.md @@ -280,12 +280,20 @@ SELECT ## toIPv4OrDefault(string) -Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns 0. +Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns `0.0.0.0` (0 IPv4). ## toIPv4OrNull(string) Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns null. +## toIPv6OrDefault(string) + +Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns `::` (0 IPv6). + +## toIPv6OrNull(string) + +Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns null. + ## toIPv6 Converts a string form of IPv6 address to [IPv6](../../sql-reference/data-types/domains/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value. diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 54977e1b0ab..b3d5060e13c 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -124,6 +124,10 @@ If the data type and default expression are defined explicitly, this expression Default expressions may be defined as an arbitrary expression from table constants and columns. When creating and changing the table structure, it checks that expressions do not contain loops. For INSERT, it checks that expressions are resolvable – that all columns they can be calculated from have been passed. +:::info +Kafka Engine doesn't support columns with default value of type `DEFAULT/MATERIALIZED/EPHEMERAL/ALIAS`. If you need columns with any default type, they can be added at `MATERIALIZED VIEW` level, see [Kafka Engine](../../../engines/table-engines/integrations/kafka.md#description). +::: + ### DEFAULT `DEFAULT expr` diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 9b01e6920a4..761335fb707 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -128,6 +128,7 @@ if (BUILD_STANDALONE_KEEPER) ch_contrib::lz4 ch_contrib::zstd ch_contrib::cityhash + ch_contrib::jemalloc common ch_contrib::double_conversion ch_contrib::dragonbox_to_chars pcg_random diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 1fbdd857379..b637bdea835 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -358,12 +358,27 @@ ZooKeeper::ZooKeeper( if (!args.auth_scheme.empty()) sendAuth(args.auth_scheme, args.identity); - send_thread = ThreadFromGlobalPool([this] { sendThread(); }); - receive_thread = ThreadFromGlobalPool([this] { receiveThread(); }); + try + { + send_thread = ThreadFromGlobalPool([this] { sendThread(); }); + receive_thread = ThreadFromGlobalPool([this] { receiveThread(); }); - initApiVersion(); + initApiVersion(); - ProfileEvents::increment(ProfileEvents::ZooKeeperInit); + ProfileEvents::increment(ProfileEvents::ZooKeeperInit); + } + catch (...) + { + tryLogCurrentException(log, "Failed to connect to ZooKeeper"); + + if (send_thread.joinable()) + send_thread.join(); + + if (receive_thread.joinable()) + receive_thread.join(); + + throw; + } } diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp index f5c79d3be7a..5e1ac1e2d7f 100644 --- a/src/Coordination/CoordinationSettings.cpp +++ b/src/Coordination/CoordinationSettings.cpp @@ -36,7 +36,7 @@ void CoordinationSettings::loadFromConfig(const String & config_elem, const Poco } -const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc"; +const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc,clrs"; KeeperConfigurationAndSettings::KeeperConfigurationAndSettings() : server_id(NOT_EXIST) diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index 6157daad1cd..8a7fdb82fb7 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -148,6 +148,9 @@ void FourLetterCommandFactory::registerCommands(KeeperDispatcher & keeper_dispat FourLetterCommandPtr recalculate_command = std::make_shared(keeper_dispatcher); factory.registerCommand(recalculate_command); + FourLetterCommandPtr clean_resources_command = std::make_shared(keeper_dispatcher); + factory.registerCommand(clean_resources_command); + factory.initializeAllowList(keeper_dispatcher); factory.setInitialize(true); } @@ -524,4 +527,10 @@ String RecalculateCommand::run() return "ok"; } +String CleanResourcesCommand::run() +{ + keeper_dispatcher.cleanResources(); + return "ok"; +} + } diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h index e1fe0333081..c1a91303c05 100644 --- a/src/Coordination/FourLetterCommand.h +++ b/src/Coordination/FourLetterCommand.h @@ -377,7 +377,6 @@ struct RequestLeaderCommand : public IFourLetterCommand ~RequestLeaderCommand() override = default; }; -/// Request to be leader. struct RecalculateCommand : public IFourLetterCommand { explicit RecalculateCommand(KeeperDispatcher & keeper_dispatcher_) @@ -390,4 +389,16 @@ struct RecalculateCommand : public IFourLetterCommand ~RecalculateCommand() override = default; }; +struct CleanResourcesCommand : public IFourLetterCommand +{ + explicit CleanResourcesCommand(KeeperDispatcher & keeper_dispatcher_) + : IFourLetterCommand(keeper_dispatcher_) + { + } + + String name() override { return "clrs"; } + String run() override; + ~CleanResourcesCommand() override = default; +}; + } diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 06c693e45be..a6d16334924 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -9,7 +9,7 @@ #include #include #include - +#include #include #include @@ -17,12 +17,26 @@ #include #include +#if USE_JEMALLOC +# include + +#define STRINGIFY_HELPER(x) #x +#define STRINGIFY(x) STRINGIFY_HELPER(x) + +#endif + namespace CurrentMetrics { extern const Metric KeeperAliveConnections; extern const Metric KeeperOutstandingRequets; } +namespace ProfileEvents +{ + extern const Event MemoryAllocatorPurge; + extern const Event MemoryAllocatorPurgeTimeMicroseconds; +} + namespace fs = std::filesystem; namespace DB @@ -753,4 +767,15 @@ Keeper4LWInfo KeeperDispatcher::getKeeper4LWInfo() const return result; } +void KeeperDispatcher::cleanResources() +{ +#if USE_JEMALLOC + LOG_TRACE(&Poco::Logger::get("KeeperDispatcher"), "Purging unused memory"); + Stopwatch watch; + mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0); + ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurge); + ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurgeTimeMicroseconds, watch.elapsedMicroseconds()); +#endif +} + } diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index 90965d0934e..9371d2fbbac 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -230,6 +230,8 @@ public: { return server->recalculateStorageStats(); } + + static void cleanResources(); }; } diff --git a/src/Dictionaries/MongoDBDictionarySource.cpp b/src/Dictionaries/MongoDBDictionarySource.cpp index bec566c29a6..a9555a94304 100644 --- a/src/Dictionaries/MongoDBDictionarySource.cpp +++ b/src/Dictionaries/MongoDBDictionarySource.cpp @@ -114,7 +114,11 @@ MongoDBDictionarySource::MongoDBDictionarySource( { if (!uri.empty()) { - Poco::URI poco_uri(uri); + // Connect with URI. + Poco::MongoDB::Connection::SocketFactory socket_factory; + connection->connect(uri, socket_factory); + + Poco::URI poco_uri(connection->uri()); // Parse database from URI. This is required for correctness -- the // cursor is created using database name and collection name, so we have @@ -134,10 +138,6 @@ MongoDBDictionarySource::MongoDBDictionarySource( { user.resize(separator); } - - // Connect with URI. - Poco::MongoDB::Connection::SocketFactory socket_factory; - connection->connect(uri, socket_factory); } else { diff --git a/src/Disks/getOrCreateDiskFromAST.cpp b/src/Disks/getOrCreateDiskFromAST.cpp index 5deb9ab11b5..637acff7b95 100644 --- a/src/Disks/getOrCreateDiskFromAST.cpp +++ b/src/Disks/getOrCreateDiskFromAST.cpp @@ -6,9 +6,13 @@ #include #include #include +#include +#include #include #include #include +#include +#include namespace DB { @@ -18,43 +22,85 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -std::string getOrCreateDiskFromDiskAST(const ASTFunction & function, ContextPtr context) +namespace { - /// We need a unique name for a created custom disk, but it needs to be the same - /// after table is reattached or server is restarted, so take a hash of the disk - /// configuration serialized ast as a disk name suffix. - auto disk_setting_string = serializeAST(function, true); - auto disk_name = DiskSelector::TMP_INTERNAL_DISK_PREFIX - + toString(sipHash128(disk_setting_string.data(), disk_setting_string.size())); - - auto result_disk = context->getOrCreateDisk(disk_name, [&](const DisksMap & disks_map) -> DiskPtr { - const auto * function_args_expr = assert_cast(function.arguments.get()); - const auto & function_args = function_args_expr->children; - auto config = getDiskConfigurationFromAST(disk_name, function_args, context); - auto disk = DiskFactory::instance().create(disk_name, *config, disk_name, context, disks_map); - /// Mark that disk can be used without storage policy. - disk->markDiskAsCustom(); - return disk; - }); - - if (!result_disk->isRemote()) + std::string getOrCreateDiskFromDiskAST(const ASTFunction & function, ContextPtr context) { - static constexpr auto custom_disks_base_dir_in_config = "custom_local_disks_base_directory"; - auto disk_path_expected_prefix = context->getConfigRef().getString(custom_disks_base_dir_in_config, ""); + /// We need a unique name for a created custom disk, but it needs to be the same + /// after table is reattached or server is restarted, so take a hash of the disk + /// configuration serialized ast as a disk name suffix. + auto disk_setting_string = serializeAST(function, true); + auto disk_name = DiskSelector::TMP_INTERNAL_DISK_PREFIX + + toString(sipHash128(disk_setting_string.data(), disk_setting_string.size())); - if (disk_path_expected_prefix.empty()) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Base path for custom local disks must be defined in config file by `{}`", - custom_disks_base_dir_in_config); + auto result_disk = context->getOrCreateDisk(disk_name, [&](const DisksMap & disks_map) -> DiskPtr { + const auto * function_args_expr = assert_cast(function.arguments.get()); + const auto & function_args = function_args_expr->children; + auto config = getDiskConfigurationFromAST(disk_name, function_args, context); + auto disk = DiskFactory::instance().create(disk_name, *config, disk_name, context, disks_map); + /// Mark that disk can be used without storage policy. + disk->markDiskAsCustom(); + return disk; + }); - if (!pathStartsWith(result_disk->getPath(), disk_path_expected_prefix)) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Path of the custom local disk must be inside `{}` directory", - disk_path_expected_prefix); + if (!result_disk->isRemote()) + { + static constexpr auto custom_disks_base_dir_in_config = "custom_local_disks_base_directory"; + auto disk_path_expected_prefix = context->getConfigRef().getString(custom_disks_base_dir_in_config, ""); + + if (disk_path_expected_prefix.empty()) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Base path for custom local disks must be defined in config file by `{}`", + custom_disks_base_dir_in_config); + + if (!pathStartsWith(result_disk->getPath(), disk_path_expected_prefix)) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Path of the custom local disk must be inside `{}` directory", + disk_path_expected_prefix); + } + + return disk_name; } + class DiskConfigurationFlattener + { + public: + struct Data + { + ContextPtr context; + }; + + static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; } + + static void visit(ASTPtr & ast, Data & data) + { + if (isDiskFunction(ast)) + { + auto disk_name = getOrCreateDiskFromDiskAST(*ast->as(), data.context); + ast = std::make_shared(disk_name); + } + } + }; + + /// Visits children first. + using FlattenDiskConfigurationVisitor = InDepthNodeVisitor; +} + + +std::string getOrCreateDiskFromDiskAST(const ASTPtr & disk_function, ContextPtr context) +{ + if (!isDiskFunction(disk_function)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected a disk function"); + + auto ast = disk_function->clone(); + + FlattenDiskConfigurationVisitor::Data data{context}; + FlattenDiskConfigurationVisitor{data}.visit(ast); + + auto disk_name = assert_cast(*ast).value.get(); + LOG_TRACE(&Poco::Logger::get("getOrCreateDiskFromDiskAST"), "Result disk name: {}", disk_name); return disk_name; } diff --git a/src/Disks/getOrCreateDiskFromAST.h b/src/Disks/getOrCreateDiskFromAST.h index 7c64707b0bd..0195f575278 100644 --- a/src/Disks/getOrCreateDiskFromAST.h +++ b/src/Disks/getOrCreateDiskFromAST.h @@ -13,6 +13,6 @@ class ASTFunction; * add it to DiskSelector by a unique (but always the same for given configuration) disk name * and return this name. */ -std::string getOrCreateDiskFromDiskAST(const ASTFunction & function, ContextPtr context); +std::string getOrCreateDiskFromDiskAST(const ASTPtr & disk_function, ContextPtr context); } diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 40698386ccb..3330159aff5 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -161,6 +161,8 @@ public: if (curr_process.processed) continue; + LOG_DEBUG(&Poco::Logger::get("KillQuery"), "Will kill query {} (synchronously)", curr_process.query_id); + auto code = process_list.sendCancelToQuery(curr_process.query_id, curr_process.user, true); if (code != CancellationCode::QueryIsNotInitializedYet && code != CancellationCode::CancelSent) @@ -226,6 +228,8 @@ BlockIO InterpreterKillQueryQuery::execute() MutableColumns res_columns = header.cloneEmptyColumns(); for (const auto & query_desc : queries_to_stop) { + if (!query.test) + LOG_DEBUG(&Poco::Logger::get("KillQuery"), "Will kill query {} (asynchronously)", query_desc.query_id); auto code = (query.test) ? CancellationCode::Unknown : process_list.sendCancelToQuery(query_desc.query_id, query_desc.user, true); insertResultRow(query_desc.source_num, code, processes_block, header, res_columns); } diff --git a/src/Parsers/FieldFromAST.cpp b/src/Parsers/FieldFromAST.cpp index 3cd10c1cf80..a81bf45a8be 100644 --- a/src/Parsers/FieldFromAST.cpp +++ b/src/Parsers/FieldFromAST.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -31,42 +32,64 @@ bool FieldFromASTImpl::isSecret() const return isDiskFunction(ast); } +class DiskConfigurationMasker +{ +public: + struct Data {}; + + static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; } + + static void visit(ASTPtr & ast, Data &) + { + if (isDiskFunction(ast)) + { + const auto & disk_function = assert_cast(*ast); + const auto * disk_function_args_expr = assert_cast(disk_function.arguments.get()); + const auto & disk_function_args = disk_function_args_expr->children; + + auto is_secret_arg = [](const std::string & arg_name) + { + /// We allow to not hide type of the disk, e.g. disk(type = s3, ...) + /// and also nested disk, e.g. disk = 'disk_name' + return arg_name != "type" && arg_name != "disk"; + }; + + for (const auto & arg : disk_function_args) + { + auto * setting_function = arg->as(); + if (!setting_function || setting_function->name != "equals") + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected equals function"); + + auto * function_args_expr = assert_cast(setting_function->arguments.get()); + if (!function_args_expr) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected arguments"); + + auto & function_args = function_args_expr->children; + if (function_args.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected non zero number of arguments"); + + auto * key_identifier = function_args[0]->as(); + if (!key_identifier) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected Identifier"); + + const std::string & key = key_identifier->name(); + if (is_secret_arg(key)) + function_args[1] = std::make_shared("[HIDDEN]"); + } + } + } +}; + +/// Visits children first. +using HideDiskConfigurationVisitor = InDepthNodeVisitor; + String FieldFromASTImpl::toString(bool show_secrets) const { if (!show_secrets && isDiskFunction(ast)) { auto hidden = ast->clone(); - const auto & disk_function = assert_cast(*hidden); - const auto * disk_function_args_expr = assert_cast(disk_function.arguments.get()); - const auto & disk_function_args = disk_function_args_expr->children; - - auto is_secret_arg = [](const std::string & arg_name) - { - return arg_name != "type"; - }; - - for (const auto & arg : disk_function_args) - { - auto * setting_function = arg->as(); - if (!setting_function || setting_function->name != "equals") - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected equals function"); - - auto * function_args_expr = assert_cast(setting_function->arguments.get()); - if (!function_args_expr) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected arguments"); - - auto & function_args = function_args_expr->children; - if (function_args.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected non zero number of arguments"); - - auto * key_identifier = function_args[0]->as(); - if (!key_identifier) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected Identifier"); - - const std::string & key = key_identifier->name(); - if (is_secret_arg(key)) - function_args[1] = std::make_shared("[HIDDEN]"); - } + HideDiskConfigurationVisitor::Data data{}; + HideDiskConfigurationVisitor{data}.visit(hidden); return serializeAST(*hidden); } diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index d6235208063..6f818e2c8f7 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -1,5 +1,7 @@ #include +#include +#include #include #include @@ -19,6 +21,8 @@ #include #include #include +#include +#include #include #include @@ -27,6 +31,7 @@ #include #include #include +#include #include #include @@ -40,6 +45,10 @@ #include #include +#include +#include +#include + namespace DB { @@ -143,6 +152,100 @@ NameAndTypePair chooseSmallestColumnToReadFromStorage(const StoragePtr & storage return result; } +bool applyTrivialCountIfPossible( + QueryPlan & query_plan, + const TableNode & table_node, + const QueryTreeNodePtr & query_tree, + const ContextPtr & query_context, + const Names & columns_names) +{ + const auto & settings = query_context->getSettingsRef(); + if (!settings.optimize_trivial_count_query) + return false; + + /// can't apply if FINAL + if (table_node.getTableExpressionModifiers().has_value() && table_node.getTableExpressionModifiers()->hasFinal()) + return false; + + auto & main_query_node = query_tree->as(); + if (main_query_node.hasGroupBy()) + return false; + + const auto & storage = table_node.getStorage(); + if (!storage || storage->hasLightweightDeletedMask()) + return false; + + if (settings.max_parallel_replicas > 1 || settings.allow_experimental_query_deduplication + || settings.empty_result_for_aggregation_by_empty_set) + return false; + + QueryTreeNodes aggregates = collectAggregateFunctionNodes(query_tree); + if (aggregates.size() != 1) + return false; + + const auto & function_node = aggregates.front().get()->as(); + chassert(function_node.getAggregateFunction() != nullptr); + const auto * count_func = typeid_cast(function_node.getAggregateFunction().get()); + if (!count_func) + return false; + + /// get number of rows + std::optional num_rows{}; + /// Transaction check here is necessary because + /// MergeTree maintains total count for all parts in Active state and it simply returns that number for trivial select count() from table query. + /// But if we have current transaction, then we should return number of rows in current snapshot (that may include parts in Outdated state), + /// so we have to use totalRowsByPartitionPredicate() instead of totalRows even for trivial query + /// See https://github.com/ClickHouse/ClickHouse/pull/24258/files#r828182031 + if (!main_query_node.hasPrewhere() && !main_query_node.hasWhere() && !query_context->getCurrentTransaction()) + { + num_rows = storage->totalRows(settings); + } + // TODO: + // else // It's possible to optimize count() given only partition predicates + // { + // SelectQueryInfo temp_query_info; + // temp_query_info.query = query_ptr; + // temp_query_info.syntax_analyzer_result = syntax_analyzer_result; + // temp_query_info.prepared_sets = query_analyzer->getPreparedSets(); + // num_rows = storage->totalRowsByPartitionPredicate(temp_query_info, context); + // } + + if (!num_rows) + return false; + + /// set aggregation state + const AggregateFunctionCount & agg_count = *count_func; + std::vector state(agg_count.sizeOfData()); + AggregateDataPtr place = state.data(); + agg_count.create(place); + SCOPE_EXIT_MEMORY_SAFE(agg_count.destroy(place)); + agg_count.set(place, num_rows.value()); + + auto column = ColumnAggregateFunction::create(function_node.getAggregateFunction()); + column->insertFrom(place); + + /// get count() argument type + DataTypes argument_types; + argument_types.reserve(columns_names.size()); + { + const Block source_header = table_node.getStorageSnapshot()->getSampleBlockForColumns(columns_names); + for (const auto & column_name : columns_names) + argument_types.push_back(source_header.getByName(column_name).type); + } + + Block block_with_count{ + {std::move(column), + std::make_shared(function_node.getAggregateFunction(), argument_types, Array{}), + columns_names.front()}}; + + auto source = std::make_shared(block_with_count); + auto prepared_count = std::make_unique(Pipe(std::move(source))); + prepared_count->setStepDescription("Optimized trivial count"); + query_plan.addStep(std::move(prepared_count)); + + return true; +} + JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & table_expression, const SelectQueryInfo & select_query_info, const SelectQueryOptions & select_query_options, @@ -306,32 +409,43 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & tabl } } - if (!select_query_options.only_analyze) - { - from_stage = storage->getQueryProcessingStage(query_context, select_query_options.to_stage, storage_snapshot, table_expression_query_info); - storage->read(query_plan, columns_names, storage_snapshot, table_expression_query_info, query_context, from_stage, max_block_size, max_streams); - } + /// Apply trivial_count optimization if possible + bool is_trivial_count_applied = !select_query_options.only_analyze && is_single_table_expression && table_node && select_query_info.has_aggregates + && applyTrivialCountIfPossible(query_plan, *table_node, select_query_info.query_tree, planner_context->getQueryContext(), columns_names); - if (query_plan.isInitialized()) + if (is_trivial_count_applied) { - /** Specify the number of threads only if it wasn't specified in storage. - * - * But in case of remote query and prefer_localhost_replica=1 (default) - * The inner local query (that is done in the same process, without - * network interaction), it will setMaxThreads earlier and distributed - * query will not update it. - */ - if (!query_plan.getMaxThreads() || is_remote) - query_plan.setMaxThreads(max_threads_execute_query); + from_stage = QueryProcessingStage::WithMergeableState; } else { - /// Create step which reads from empty source if storage has no data - auto source_header = storage_snapshot->getSampleBlockForColumns(columns_names); - Pipe pipe(std::make_shared(source_header)); - auto read_from_pipe = std::make_unique(std::move(pipe)); - read_from_pipe->setStepDescription("Read from NullSource"); - query_plan.addStep(std::move(read_from_pipe)); + if (!select_query_options.only_analyze) + { + from_stage = storage->getQueryProcessingStage(query_context, select_query_options.to_stage, storage_snapshot, table_expression_query_info); + storage->read(query_plan, columns_names, storage_snapshot, table_expression_query_info, query_context, from_stage, max_block_size, max_streams); + } + + if (query_plan.isInitialized()) + { + /** Specify the number of threads only if it wasn't specified in storage. + * + * But in case of remote query and prefer_localhost_replica=1 (default) + * The inner local query (that is done in the same process, without + * network interaction), it will setMaxThreads earlier and distributed + * query will not update it. + */ + if (!query_plan.getMaxThreads() || is_remote) + query_plan.setMaxThreads(max_threads_execute_query); + } + else + { + /// Create step which reads from empty source if storage has no data. + auto source_header = storage_snapshot->getSampleBlockForColumns(columns_names); + Pipe pipe(std::make_shared(source_header)); + auto read_from_pipe = std::make_unique(std::move(pipe)); + read_from_pipe->setStepDescription("Read from NullSource"); + query_plan.addStep(std::move(read_from_pipe)); + } } } else if (query_node || union_node) diff --git a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp index 70815bb8b3b..4478f1548a4 100644 --- a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp @@ -187,7 +187,7 @@ void PushingAsyncPipelineExecutor::push(Chunk chunk) if (!is_pushed) throw Exception(ErrorCodes::LOGICAL_ERROR, - "Pipeline for PushingPipelineExecutor was finished before all data was inserted"); + "Pipeline for PushingAsyncPipelineExecutor was finished before all data was inserted"); } void PushingAsyncPipelineExecutor::push(Block block) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 6349c559c26..27b3712ffd3 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1224,14 +1224,7 @@ void TCPHandler::receiveHello() session = makeSession(); auto & client_info = session->getClientInfo(); - - /// Extract the last entry from comma separated list of forwarded_for addresses. - /// Only the last proxy can be trusted (if any). - String forwarded_address = client_info.getLastForwardedFor(); - if (!forwarded_address.empty() && server.config().getBool("auth_use_forwarded_address", false)) - session->authenticate(user, password, Poco::Net::SocketAddress(forwarded_address, socket().peerAddress().port())); - else - session->authenticate(user, password, socket().peerAddress()); + session->authenticate(user, password, getClientAddress(client_info)); } void TCPHandler::receiveAddendum() @@ -1522,11 +1515,16 @@ void TCPHandler::receiveQuery() /// so we should not rely on that. However, in this particular case we got client_info from other clickhouse-server, so it's ok. if (client_info.initial_user.empty()) { - LOG_DEBUG(log, "User (no user, interserver mode)"); + LOG_DEBUG(log, "User (no user, interserver mode) (client: {})", getClientAddress(client_info).toString()); } else { - LOG_DEBUG(log, "User (initial, interserver mode): {}", client_info.initial_user); + LOG_DEBUG(log, "User (initial, interserver mode): {} (client: {})", client_info.initial_user, getClientAddress(client_info).toString()); + /// In case of inter-server mode authorization is done with the + /// initial address of the client, not the real address from which + /// the query was come, since the real address is the address of + /// the initiator server, while we are interested in client's + /// address. session->authenticate(AlwaysAllowCredentials{client_info.initial_user}, client_info.initial_address); } #else @@ -2012,4 +2010,15 @@ void TCPHandler::run() } } +Poco::Net::SocketAddress TCPHandler::getClientAddress(const ClientInfo & client_info) +{ + /// Extract the last entry from comma separated list of forwarded_for addresses. + /// Only the last proxy can be trusted (if any). + String forwarded_address = client_info.getLastForwardedFor(); + if (!forwarded_address.empty() && server.config().getBool("auth_use_forwarded_address", false)) + return Poco::Net::SocketAddress(forwarded_address, socket().peerAddress().port()); + else + return socket().peerAddress(); +} + } diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index f06b0b060b3..e3673b213d5 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -273,6 +273,8 @@ private: /// This function is called from different threads. void updateProgress(const Progress & value); + + Poco::Net::SocketAddress getClientAddress(const ClientInfo & client_info); }; } diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index d401840eec7..fa39e304925 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -383,6 +383,15 @@ NamesAndTypesList ColumnsDescription::getEphemeral() const return ret; } +NamesAndTypesList ColumnsDescription::getWithDefaultExpression() const +{ + NamesAndTypesList ret; + for (const auto & col : columns) + if (col.default_desc.expression) + ret.emplace_back(col.name, col.type); + return ret; +} + NamesAndTypesList ColumnsDescription::getAll() const { NamesAndTypesList ret; diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index 4f874f4b850..36109392ab6 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -132,6 +132,9 @@ public: NamesAndTypesList getInsertable() const; /// ordinary + ephemeral NamesAndTypesList getAliases() const; NamesAndTypesList getEphemeral() const; + // Columns with preset default expression. + // For example from `CREATE TABLE` statement + NamesAndTypesList getWithDefaultExpression() const; NamesAndTypesList getAllPhysical() const; /// ordinary + materialized. NamesAndTypesList getAll() const; /// ordinary + materialized + aliases + ephemeral /// Returns .size0/.null/... diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp index 2e07aceeaa9..a93ac248c98 100644 --- a/src/Storages/IndicesDescription.cpp +++ b/src/Storages/IndicesDescription.cpp @@ -94,14 +94,15 @@ IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast auto syntax = TreeRewriter(context).analyze(expr_list, columns.getAllPhysical()); result.expression = ExpressionAnalyzer(expr_list, syntax, context).getActions(true); - Block block_without_columns = result.expression->getSampleBlock(); + result.sample_block = result.expression->getSampleBlock(); - for (size_t i = 0; i < block_without_columns.columns(); ++i) + for (auto & elem : result.sample_block) { - const auto & column = block_without_columns.getByPosition(i); - result.column_names.emplace_back(column.name); - result.data_types.emplace_back(column.type); - result.sample_block.insert(ColumnWithTypeAndName(column.type->createColumn(), column.type, column.name)); + if (!elem.column) + elem.column = elem.type->createColumn(); + + result.column_names.push_back(elem.name); + result.data_types.push_back(elem.type); } const auto & definition_arguments = index_definition->type->arguments; diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 7b97273d8af..2afdc0dda8a 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -959,6 +959,11 @@ void registerStorageKafka(StorageFactory & factory) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "kafka_poll_max_batch_size can not be lower than 1"); } + if (args.columns.getOrdinary() != args.columns.getAll() || !args.columns.getWithDefaultExpression().empty()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "KafkaEngine doesn't support DEFAULT/MATERIALIZED/EPHEMERAL/ALIAS expressions for columns. " + "See https://clickhouse.com/docs/en/engines/table-engines/integrations/kafka/#configuration"); + } return std::make_shared(args.table_id, args.getContext(), args.columns, std::move(kafka_settings), collection_name); }; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 1da99cb4117..73007e3f178 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -525,7 +525,6 @@ void MergeTreeData::checkProperties( for (const auto & index : new_metadata.secondary_indices) { - MergeTreeIndexFactory::instance().validate(index, attach); if (indices_names.find(index.name) != indices_names.end()) diff --git a/src/Storages/MergeTree/MergeTreeIndices.cpp b/src/Storages/MergeTree/MergeTreeIndices.cpp index 2be9ecd8de3..6ae96d00171 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.cpp +++ b/src/Storages/MergeTree/MergeTreeIndices.cpp @@ -35,6 +35,7 @@ MergeTreeIndexPtr MergeTreeIndexFactory::get( { auto it = creators.find(index.type); if (it == creators.end()) + { throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown Index type '{}'. Available index types: {}", index.type, std::accumulate(creators.cbegin(), creators.cend(), std::string{}, @@ -46,6 +47,7 @@ MergeTreeIndexPtr MergeTreeIndexFactory::get( return left + ", " + right.first; }) ); + } return it->second(index); } @@ -61,8 +63,31 @@ MergeTreeIndices MergeTreeIndexFactory::getMany(const std::vectorhasArrayJoin()) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Secondary index '{}' cannot contain array joins", index.name); + + try + { + index.expression->assertDeterministic(); + } + catch (Exception & e) + { + e.addMessage(fmt::format("for secondary index '{}'", index.name)); + throw; + } + + for (const auto & elem : index.sample_block) + if (elem.column && (isColumnConst(*elem.column) || elem.column->isDummy())) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Secondary index '{}' cannot contain constants", index.name); + } + auto it = validators.find(index.type); if (it == validators.end()) + { throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown Index type '{}'. Available index types: {}", index.type, std::accumulate( @@ -77,6 +102,7 @@ void MergeTreeIndexFactory::validate(const IndexDescription & index, bool attach return left + ", " + right.first; }) ); + } it->second(index, attach); } diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index e951b8f54cf..479e50fdebb 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -64,8 +64,7 @@ void MergeTreeSettings::loadFromQuery(ASTStorage & storage_def, ContextPtr conte auto ast = dynamic_cast(custom.getImpl()).ast; if (ast && isDiskFunction(ast)) { - const auto & ast_function = assert_cast(*ast); - auto disk_name = getOrCreateDiskFromDiskAST(ast_function, context); + auto disk_name = getOrCreateDiskFromDiskAST(ast, context); LOG_TRACE(&Poco::Logger::get("MergeTreeSettings"), "Created custom disk {}", disk_name); value = disk_name; } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 54ae8aa5a7b..6c6ff30fd04 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -8434,7 +8434,11 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( } else if (error_code == Coordination::Error::ZNONODE) { - LOG_TRACE(logger, "Node with parent zookeeper lock {} for part {} doesn't exist (part was unlocked before)", zookeeper_part_uniq_node, part_name); + /// We don't know what to do, because this part can be mutation part + /// with hardlinked columns. Since we don't have this information (about blobs not to remove) + /// we refuse to remove blobs. + LOG_WARNING(logger, "Node with parent zookeeper lock {} for part {} doesn't exist (part was unlocked before), refuse to remove blobs", zookeeper_part_uniq_node, part_name); + return {false, {}}; } else { diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 08cd2d466d0..2f35b337cb3 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -316,9 +316,12 @@ CI_CONFIG = { "Integration tests flaky check (asan)": { "required_build": "package_asan", }, - "Compatibility check": { + "Compatibility check (amd64)": { "required_build": "package_release", }, + "Compatibility check (aarch64)": { + "required_build": "package_aarch64", + }, "Unit tests (release-clang)": { "required_build": "binary_release", }, diff --git a/tests/ci/compatibility_check.py b/tests/ci/compatibility_check.py index 0bdcf1ba3b4..432e9ec7c01 100644 --- a/tests/ci/compatibility_check.py +++ b/tests/ci/compatibility_check.py @@ -2,6 +2,7 @@ from distutils.version import StrictVersion from typing import List, Tuple +import argparse import logging import os import subprocess @@ -28,9 +29,7 @@ from upload_result_helper import upload_results IMAGE_UBUNTU = "clickhouse/test-old-ubuntu" IMAGE_CENTOS = "clickhouse/test-old-centos" -MAX_GLIBC_VERSION = "2.4" DOWNLOAD_RETRIES_COUNT = 5 -CHECK_NAME = "Compatibility check" def process_os_check(log_path: str) -> TestResult: @@ -43,7 +42,7 @@ def process_os_check(log_path: str) -> TestResult: return TestResult(name, "OK") -def process_glibc_check(log_path: str) -> TestResults: +def process_glibc_check(log_path: str, max_glibc_version: str) -> TestResults: test_results = [] # type: TestResults with open(log_path, "r") as log: for line in log: @@ -53,7 +52,7 @@ def process_glibc_check(log_path: str) -> TestResults: _, version = symbol_with_glibc.split("@GLIBC_") if version == "PRIVATE": test_results.append(TestResult(symbol_with_glibc, "FAIL")) - elif StrictVersion(version) > MAX_GLIBC_VERSION: + elif StrictVersion(version) > max_glibc_version: test_results.append(TestResult(symbol_with_glibc, "FAIL")) if not test_results: test_results.append(TestResult("glibc check", "OK")) @@ -61,18 +60,24 @@ def process_glibc_check(log_path: str) -> TestResults: def process_result( - result_folder: str, server_log_folder: str + result_folder: str, + server_log_folder: str, + check_glibc: bool, + check_distributions: bool, + max_glibc_version: str, ) -> Tuple[str, str, TestResults, List[str]]: glibc_log_path = os.path.join(result_folder, "glibc.log") - test_results = process_glibc_check(glibc_log_path) + test_results = process_glibc_check(glibc_log_path, max_glibc_version) status = "success" description = "Compatibility check passed" - if len(test_results) > 1 or test_results[0].status != "OK": - status = "failure" - description = "glibc check failed" - if status == "success": + if check_glibc: + if len(test_results) > 1 or test_results[0].status != "OK": + status = "failure" + description = "glibc check failed" + + if status == "success" and check_distributions: for operating_system in ("ubuntu:12.04", "centos:5"): test_result = process_os_check( os.path.join(result_folder, operating_system) @@ -101,13 +106,18 @@ def process_result( return status, description, test_results, result_logs -def get_run_commands( - build_path, result_folder, server_log_folder, image_centos, image_ubuntu -): +def get_run_commands_glibc(build_path, result_folder): return [ f"readelf -s --wide {build_path}/usr/bin/clickhouse | grep '@GLIBC_' > {result_folder}/glibc.log", f"readelf -s --wide {build_path}/usr/bin/clickhouse-odbc-bridge | grep '@GLIBC_' >> {result_folder}/glibc.log", f"readelf -s --wide {build_path}/usr/bin/clickhouse-library-bridge | grep '@GLIBC_' >> {result_folder}/glibc.log", + ] + + +def get_run_commands_distributions( + build_path, result_folder, server_log_folder, image_centos, image_ubuntu +): + return [ f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse " f"--volume={build_path}/etc/clickhouse-server:/config " f"--volume={server_log_folder}:/var/log/clickhouse-server {image_ubuntu} > {result_folder}/ubuntu:12.04", @@ -117,9 +127,21 @@ def get_run_commands( ] +def parse_args(): + parser = argparse.ArgumentParser("Check compatibility with old distributions") + parser.add_argument("--check-name", required=True) + parser.add_argument("--check-glibc", action="store_true") + parser.add_argument( + "--check-distributions", action="store_true" + ) # currently hardcoded to x86, don't enable for ARM + return parser.parse_args() + + def main(): logging.basicConfig(level=logging.INFO) + args = parse_args() + stopwatch = Stopwatch() temp_path = TEMP_PATH @@ -129,13 +151,11 @@ def main(): gh = Github(get_best_robot_token(), per_page=100) - rerun_helper = RerunHelper(gh, pr_info, CHECK_NAME) + rerun_helper = RerunHelper(gh, pr_info, args.check_name) if rerun_helper.is_already_finished_by_status(): logging.info("Check is already finished according to github status, exiting") sys.exit(0) - docker_images = get_images_with_versions(reports_path, [IMAGE_CENTOS, IMAGE_UBUNTU]) - packages_path = os.path.join(temp_path, "packages") if not os.path.exists(packages_path): os.makedirs(packages_path) @@ -145,7 +165,7 @@ def main(): "clickhouse-common-static_" in url or "clickhouse-server_" in url ) - download_builds_filter(CHECK_NAME, reports_path, packages_path, url_filter) + download_builds_filter(args.check_name, reports_path, packages_path, url_filter) for f in os.listdir(packages_path): if ".deb" in f: @@ -162,9 +182,24 @@ def main(): if not os.path.exists(result_path): os.makedirs(result_path) - run_commands = get_run_commands( - packages_path, result_path, server_log_path, docker_images[0], docker_images[1] - ) + run_commands = [] + + if args.check_glibc: + check_glibc_commands = get_run_commands_glibc(packages_path, result_path) + run_commands.extend(check_glibc_commands) + + if args.check_distributions: + docker_images = get_images_with_versions( + reports_path, [IMAGE_CENTOS, IMAGE_UBUNTU] + ) + check_distributions_commands = get_run_commands_distributions( + packages_path, + result_path, + server_log_path, + docker_images[0], + docker_images[1], + ) + run_commands.extend(check_distributions_commands) state = "success" for run_command in run_commands: @@ -177,13 +212,26 @@ def main(): subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + # See https://sourceware.org/glibc/wiki/Glibc%20Timeline + max_glibc_version = "" + if "amd64" in args.check_name: + max_glibc_version = "2.4" + elif "aarch64" in args.check_name: + max_glibc_version = "2.18" # because of build with newer sysroot? + else: + raise Exception("Can't determine max glibc version") + s3_helper = S3Helper() state, description, test_results, additional_logs = process_result( - result_path, server_log_path + result_path, + server_log_path, + args.check_glibc, + args.check_distributions, + max_glibc_version, ) ch_helper = ClickHouseHelper() - mark_flaky_tests(ch_helper, CHECK_NAME, test_results) + mark_flaky_tests(ch_helper, args.check_name, test_results) report_url = upload_results( s3_helper, @@ -191,10 +239,10 @@ def main(): pr_info.sha, test_results, additional_logs, - CHECK_NAME, + args.check_name, ) print(f"::notice ::Report url: {report_url}") - post_commit_status(gh, pr_info.sha, CHECK_NAME, description, state, report_url) + post_commit_status(gh, pr_info.sha, args.check_name, description, state, report_url) prepared_events = prepare_tests_results_for_clickhouse( pr_info, @@ -203,7 +251,7 @@ def main(): stopwatch.duration_seconds, stopwatch.start_time_str, report_url, - CHECK_NAME, + args.check_name, ) ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) diff --git a/tests/integration/test_disk_configuration/test.py b/tests/integration/test_disk_configuration/test.py index 34f8bea219f..6ebe994dc68 100644 --- a/tests/integration/test_disk_configuration/test.py +++ b/tests/integration/test_disk_configuration/test.py @@ -294,6 +294,65 @@ def test_merge_tree_custom_disk_setting(start_cluster): ).strip() ) + node1.query(f"DROP TABLE {TABLE_NAME} SYNC") + node1.query(f"DROP TABLE {TABLE_NAME}_2 SYNC") + node1.query(f"DROP TABLE {TABLE_NAME}_3 SYNC") + node1.query(f"DROP TABLE {TABLE_NAME}_4 SYNC") + node2.query(f"DROP TABLE {TABLE_NAME}_4 SYNC") + + +def test_merge_tree_nested_custom_disk_setting(start_cluster): + node = cluster.instances["node1"] + + minio = cluster.minio_client + for obj in list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)): + minio.remove_object(cluster.minio_bucket, obj.object_name) + assert ( + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) + == 0 + ) + + node.query( + f""" + DROP TABLE IF EXISTS {TABLE_NAME} SYNC; + CREATE TABLE {TABLE_NAME} (a Int32) + ENGINE = MergeTree() order by tuple() + SETTINGS disk = disk( + type=cache, + max_size='1Gi', + path='/var/lib/clickhouse/custom_disk_cache/', + disk=disk( + type=s3, + endpoint='http://minio1:9001/root/data/', + access_key_id='minio', + secret_access_key='minio123')); + """ + ) + + node.query(f"INSERT INTO {TABLE_NAME} SELECT number FROM numbers(100)") + node.query("SYSTEM DROP FILESYSTEM CACHE") + + # Check cache is filled + assert 0 == int(node.query("SELECT count() FROM system.filesystem_cache")) + assert 100 == int(node.query(f"SELECT count() FROM {TABLE_NAME}")) + node.query(f"SELECT * FROM {TABLE_NAME}") + assert 0 < int(node.query("SELECT count() FROM system.filesystem_cache")) + + # Check s3 is filled + assert ( + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) > 0 + ) + + node.restart_clickhouse() + + assert 100 == int(node.query(f"SELECT count() FROM {TABLE_NAME}")) + + expected = """ + SETTINGS disk = disk(type = cache, max_size = \\'[HIDDEN]\\', path = \\'[HIDDEN]\\', disk = disk(type = s3, endpoint = \\'[HIDDEN]\\' + """ + assert expected.strip() in node.query(f"SHOW CREATE TABLE {TABLE_NAME}").strip() + node.query(f"DROP TABLE {TABLE_NAME} SYNC") + def test_merge_tree_setting_override(start_cluster): node = cluster.instances["node3"] @@ -367,3 +426,4 @@ def test_merge_tree_setting_override(start_cluster): assert ( len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) > 0 ) + node.query(f"DROP TABLE {TABLE_NAME} SYNC") diff --git a/tests/integration/test_filesystem_layout/test.py b/tests/integration/test_filesystem_layout/test.py index 898bbc40eb9..2be478f95d0 100644 --- a/tests/integration/test_filesystem_layout/test.py +++ b/tests/integration/test_filesystem_layout/test.py @@ -44,8 +44,6 @@ def test_file_path_escaping(started_cluster): ] ) - -def test_file_path_escaping_atomic_db(started_cluster): node.query("CREATE DATABASE IF NOT EXISTS `test 2` ENGINE = Atomic") node.query( """ diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index a1bc0d42a46..137d585f7d1 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -594,8 +594,6 @@ def test_cancel_while_processing_input(): stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) result = stub.ExecuteQueryWithStreamInput(send_query_info()) assert result.cancelled == True - assert result.progress.written_rows == 6 - assert query("SELECT a FROM t ORDER BY a") == "1\n2\n3\n4\n5\n6\n" def test_cancel_while_generating_output(): diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py index 04f6800b92b..412780c8f0f 100644 --- a/tests/integration/test_keeper_four_word_command/test.py +++ b/tests/integration/test_keeper_four_word_command/test.py @@ -679,3 +679,44 @@ def test_cmd_rqld(started_cluster): + " does not become leader after 30s, maybe there is something wrong." ) assert keeper_utils.is_leader(cluster, node) + + +def test_cmd_clrs(started_cluster): + if node1.is_built_with_sanitizer(): + return + + def get_memory_purges(): + return node1.query( + "SELECT value FROM system.events WHERE event = 'MemoryAllocatorPurge' SETTINGS system_events_show_zero_values = 1" + ) + + zk = None + try: + wait_nodes() + + zk = get_fake_zk(node1.name, timeout=30.0) + + paths = [f"/clrs_{i}" for i in range(10000)] + + # we only count the events because we cannot reliably test memory usage of Keeper + # but let's create and delete nodes so the first purge needs to release some memory + create_transaction = zk.transaction() + for path in paths: + create_transaction.create(path) + create_transaction.commit() + + delete_transaction = zk.transaction() + for path in paths: + delete_transaction.delete(path) + delete_transaction.commit() + + # repeat multiple times to make sure MemoryAllocatorPurge isn't increased because of other reasons + for _ in range(5): + prev_purges = int(get_memory_purges()) + keeper_utils.send_4lw_cmd(cluster, node1, cmd="clrs") + current_purges = int(get_memory_purges()) + assert current_purges > prev_purges + prev_purges = current_purges + + finally: + destroy_zk_client(zk) diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 9f617369859..51952ac1eb7 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -285,6 +285,56 @@ def avro_confluent_message(schema_registry_client, value): # Tests +def test_kafka_prohibited_column_types(kafka_cluster): + def assert_returned_exception(e): + assert e.value.returncode == 36 + assert ( + "KafkaEngine doesn't support DEFAULT/MATERIALIZED/EPHEMERAL/ALIAS expressions for columns." + in str(e.value) + ) + + # check column with DEFAULT expression + with pytest.raises(QueryRuntimeException) as exception: + instance.query( + """ + CREATE TABLE test.kafka (a Int, b Int DEFAULT 0) + ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_new}', '{kafka_group_name_new}', '{kafka_format_json_each_row}', '\\n') + """ + ) + assert_returned_exception(exception) + + # check EPHEMERAL + with pytest.raises(QueryRuntimeException) as exception: + instance.query( + """ + CREATE TABLE test.kafka (a Int, b Int EPHEMERAL) + ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_new}', '{kafka_group_name_new}', '{kafka_format_json_each_row}', '\\n') + """ + ) + assert_returned_exception(exception) + + # check ALIAS + with pytest.raises(QueryRuntimeException) as exception: + instance.query( + """ + CREATE TABLE test.kafka (a Int, b String Alias toString(a)) + ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_new}', '{kafka_group_name_new}', '{kafka_format_json_each_row}', '\\n') + """ + ) + assert_returned_exception(exception) + + # check MATERIALIZED + # check ALIAS + with pytest.raises(QueryRuntimeException) as exception: + instance.query( + """ + CREATE TABLE test.kafka (a Int, b String MATERIALIZED toString(a)) + ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_new}', '{kafka_group_name_new}', '{kafka_format_json_each_row}', '\\n') + """ + ) + assert_returned_exception(exception) + + def test_kafka_settings_old_syntax(kafka_cluster): assert TSV( instance.query( diff --git a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.reference b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.reference index b6f5fe99ca1..b5b93c34c00 100644 --- a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.reference +++ b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.reference @@ -1,2 +1,2 @@ -CREATE TABLE default.x\n(\n `i` Int32,\n INDEX mm rand() TYPE minmax GRANULARITY 1,\n INDEX nn rand() TYPE minmax GRANULARITY 1,\n PROJECTION p\n (\n SELECT max(i)\n ),\n PROJECTION p2\n (\n SELECT min(i)\n )\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/x\', \'r\')\nORDER BY i\nSETTINGS index_granularity = 8192 -metadata format version: 1\ndate column: \nsampling expression: \nindex granularity: 8192\nmode: 0\nsign column: \nprimary key: i\ndata format version: 1\npartition key: \nindices: mm rand() TYPE minmax GRANULARITY 1, nn rand() TYPE minmax GRANULARITY 1\nprojections: p (SELECT max(i)), p2 (SELECT min(i))\ngranularity bytes: 10485760\n +CREATE TABLE default.x\n(\n `i` Int32,\n INDEX mm log2(i) TYPE minmax GRANULARITY 1,\n INDEX nn log2(i) TYPE minmax GRANULARITY 1,\n PROJECTION p\n (\n SELECT max(i)\n ),\n PROJECTION p2\n (\n SELECT min(i)\n )\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/x\', \'r\')\nORDER BY i\nSETTINGS index_granularity = 8192 +metadata format version: 1\ndate column: \nsampling expression: \nindex granularity: 8192\nmode: 0\nsign column: \nprimary key: i\ndata format version: 1\npartition key: \nindices: mm log2(i) TYPE minmax GRANULARITY 1, nn log2(i) TYPE minmax GRANULARITY 1\nprojections: p (SELECT max(i)), p2 (SELECT min(i))\ngranularity bytes: 10485760\n diff --git a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql index 683bd271405..be0f7e8b710 100644 --- a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql +++ b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql @@ -2,9 +2,9 @@ drop table if exists x; -create table x(i int, index mm RAND() type minmax granularity 1, projection p (select MAX(i))) engine ReplicatedMergeTree('/clickhouse/tables/{database}/x', 'r') order by i; +create table x(i int, index mm LOG2(i) type minmax granularity 1, projection p (select MAX(i))) engine ReplicatedMergeTree('/clickhouse/tables/{database}/x', 'r') order by i; -alter table x add index nn RAND() type minmax granularity 1, add projection p2 (select MIN(i)); +alter table x add index nn LOG2(i) type minmax granularity 1, add projection p2 (select MIN(i)); show create x; diff --git a/tests/queries/0_stateless/02232_dist_insert_send_logs_level_hung.sh b/tests/queries/0_stateless/02232_dist_insert_send_logs_level_hung.sh index 5ed94148bc1..734cef06214 100755 --- a/tests/queries/0_stateless/02232_dist_insert_send_logs_level_hung.sh +++ b/tests/queries/0_stateless/02232_dist_insert_send_logs_level_hung.sh @@ -49,7 +49,16 @@ insert_client_opts=( timeout 250s $CLICKHOUSE_CLIENT "${client_opts[@]}" "${insert_client_opts[@]}" -q "insert into function remote('127.2', currentDatabase(), in_02232) select * from numbers(1e6)" # Kill underlying query of remote() to make KILL faster -timeout 60s $CLICKHOUSE_CLIENT "${client_opts[@]}" -q "KILL QUERY WHERE Settings['log_comment'] = '$CLICKHOUSE_LOG_COMMENT' SYNC" --format Null +# This test is reproducing very interesting bahaviour. +# The block size is 1, so the secondary query creates InterpreterSelectQuery for each row due to pushing to the MV. +# It works extremely slow, and the initial query produces new blocks and writes them to the socket much faster +# then the secondary query can read and process them. Therefore, it fills network buffers in the kernel. +# Once a buffer in the kernel is full, send(...) blocks until the secondary query will finish processing data +# that it already has in ReadBufferFromPocoSocket and call recv. +# Or until the kernel will decide to resize the buffer (seems like it has non-trivial rules for that). +# Anyway, it may look like the initial query got stuck, but actually it did not. +# Moreover, the initial query cannot be killed at that point, so KILL QUERY ... SYNC will get "stuck" as well. +timeout 30s $CLICKHOUSE_CLIENT "${client_opts[@]}" -q "KILL QUERY WHERE query like '%INSERT INTO $CLICKHOUSE_DATABASE.in_02232%' SYNC" --format Null echo $? $CLICKHOUSE_CLIENT "${client_opts[@]}" -nm -q " diff --git a/tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.reference b/tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.reference index e228d911715..a3d8a33f757 100644 --- a/tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.reference +++ b/tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.reference @@ -6,7 +6,7 @@ Arrow 2001:db8:11a3:9d7:1f34:8a2e:7a0:765d 127.0.0.1 Parquet ipv6 Nullable(FixedString(16)) -ipv4 Nullable(Int64) +ipv4 Nullable(UInt32) 2001:db8:11a3:9d7:1f34:8a2e:7a0:765d 127.0.0.1 ORC ipv6 Nullable(String) diff --git a/tests/queries/0_stateless/02670_constant_skip_index.reference b/tests/queries/0_stateless/02670_constant_skip_index.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02670_constant_skip_index.sql b/tests/queries/0_stateless/02670_constant_skip_index.sql new file mode 100644 index 00000000000..97dd2ab33c9 --- /dev/null +++ b/tests/queries/0_stateless/02670_constant_skip_index.sql @@ -0,0 +1,25 @@ + +DROP TABLE IF EXISTS t_constant_index; + +CREATE TABLE t_constant_index +( + id UInt64, + INDEX t_constant_index 'foo' TYPE set(2) GRANULARITY 1 +) ENGINE = MergeTree +ORDER BY id; -- { serverError INCORRECT_QUERY } + +CREATE TABLE t_constant_index +( + id UInt64, + INDEX t_constant_index id + rand() TYPE set(2) GRANULARITY 1 +) ENGINE = MergeTree +ORDER BY id; -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t_constant_index +( + id UInt64, + INDEX t_constant_index id * 2 TYPE set(2) GRANULARITY 1 +) ENGINE = MergeTree +ORDER BY id; + +DROP TABLE t_constant_index; diff --git a/tests/queries/0_stateless/02674_trivial_count_analyzer.reference b/tests/queries/0_stateless/02674_trivial_count_analyzer.reference new file mode 100644 index 00000000000..05feadb58a0 --- /dev/null +++ b/tests/queries/0_stateless/02674_trivial_count_analyzer.reference @@ -0,0 +1,47 @@ +-- { echoOn } +set allow_experimental_analyzer=1; +set optimize_trivial_count_query=1; +create table m3(a Int64, b UInt64) Engine=MergeTree order by tuple(); +select count() from m3; +0 +insert into m3 values (0,0); +insert into m3 values (-1,1); +select trimBoth(explain) from (explain select count() from m3) where explain like '%ReadFromPreparedSource (Optimized trivial count)%'; +ReadFromPreparedSource (Optimized trivial count) +select count() from m3; +2 +select count(*) from m3; +2 +select count(a) from m3; +2 +select count(b) from m3; +2 +select count() + 1 from m3; +3 +drop table m3; +-- checking queries with FINAL +create table replacing_m3(a Int64, b UInt64) Engine=ReplacingMergeTree() order by (a, b); +SYSTEM STOP MERGES replacing_m3; +select count() from replacing_m3; +0 +insert into replacing_m3 values (0,0); +insert into replacing_m3 values (0,0); +insert into replacing_m3 values (-1,1); +insert into replacing_m3 values (-2,2); +select trimBoth(explain) from (explain select count() from replacing_m3) where explain like '%ReadFromPreparedSource (Optimized trivial count)%'; +ReadFromPreparedSource (Optimized trivial count) +select count() from replacing_m3; +4 +select count(*) from replacing_m3; +4 +select count(a) from replacing_m3; +4 +select count(b) from replacing_m3; +4 +select count() from replacing_m3 FINAL; +3 +select count(a) from replacing_m3 FINAL; +3 +select count(b) from replacing_m3 FINAL; +3 +drop table replacing_m3; diff --git a/tests/queries/0_stateless/02674_trivial_count_analyzer.sql b/tests/queries/0_stateless/02674_trivial_count_analyzer.sql new file mode 100644 index 00000000000..988d1b9ba92 --- /dev/null +++ b/tests/queries/0_stateless/02674_trivial_count_analyzer.sql @@ -0,0 +1,45 @@ +drop table if exists m3; +drop table if exists replacing_m3; + +-- { echoOn } +set allow_experimental_analyzer=1; +set optimize_trivial_count_query=1; + +create table m3(a Int64, b UInt64) Engine=MergeTree order by tuple(); + +select count() from m3; + +insert into m3 values (0,0); +insert into m3 values (-1,1); + +select trimBoth(explain) from (explain select count() from m3) where explain like '%ReadFromPreparedSource (Optimized trivial count)%'; +select count() from m3; +select count(*) from m3; +select count(a) from m3; +select count(b) from m3; +select count() + 1 from m3; + +drop table m3; + +-- checking queries with FINAL +create table replacing_m3(a Int64, b UInt64) Engine=ReplacingMergeTree() order by (a, b); +SYSTEM STOP MERGES replacing_m3; + +select count() from replacing_m3; + +insert into replacing_m3 values (0,0); +insert into replacing_m3 values (0,0); +insert into replacing_m3 values (-1,1); +insert into replacing_m3 values (-2,2); + +select trimBoth(explain) from (explain select count() from replacing_m3) where explain like '%ReadFromPreparedSource (Optimized trivial count)%'; +select count() from replacing_m3; +select count(*) from replacing_m3; +select count(a) from replacing_m3; +select count(b) from replacing_m3; + +select count() from replacing_m3 FINAL; +select count(a) from replacing_m3 FINAL; +select count(b) from replacing_m3 FINAL; + +drop table replacing_m3;