Merge branch 'master' into evillique-patch-2

This commit is contained in:
Nikolay Degterinsky 2023-03-08 01:03:28 +01:00 committed by GitHub
commit 1421411d9a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
53 changed files with 1267 additions and 225 deletions

View File

@ -79,7 +79,7 @@ jobs:
with:
name: changed_images
path: ${{ runner.temp }}/changed_images.json
CompatibilityCheck:
CompatibilityCheckX86:
needs: [BuilderDebRelease]
runs-on: [self-hosted, style-checker]
steps:
@ -98,12 +98,43 @@ jobs:
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: CompatibilityCheck
- name: CompatibilityCheckX86
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
CompatibilityCheckAarch64:
needs: [BuilderDebAarch64]
runs-on: [self-hosted, style-checker]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/compatibility_check
REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
REPORTS_PATH=${{runner.temp}}/reports_dir
EOF
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: CompatibilityCheckAarch64
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
- name: Cleanup
if: always()
run: |
@ -741,7 +772,8 @@ jobs:
- FunctionalStatefulTestDebug
- StressTestTsan
- IntegrationTestsRelease
- CompatibilityCheck
- CompatibilityCheckX86
- CompatibilityCheckAarch64
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code

View File

@ -110,7 +110,7 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
CompatibilityCheck:
CompatibilityCheckX86:
needs: [BuilderDebRelease]
runs-on: [self-hosted, style-checker]
steps:
@ -129,12 +129,43 @@ jobs:
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: CompatibilityCheck
- name: CompatibilityCheckX86
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
CompatibilityCheckAarch64:
needs: [BuilderDebAarch64]
runs-on: [self-hosted, style-checker]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/compatibility_check
REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
REPORTS_PATH=${{runner.temp}}/reports_dir
EOF
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: CompatibilityCheckAarch64
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
- name: Cleanup
if: always()
run: |
@ -3124,7 +3155,8 @@ jobs:
- PerformanceComparisonX86-1
- PerformanceComparisonX86-2
- PerformanceComparisonX86-3
- CompatibilityCheck
- CompatibilityCheckX86
- CompatibilityCheckAarch64
- ASTFuzzerTestDebug
- ASTFuzzerTestAsan
- ASTFuzzerTestTsan

View File

@ -174,7 +174,7 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
CompatibilityCheck:
CompatibilityCheckX86:
needs: [BuilderDebRelease]
runs-on: [self-hosted, style-checker]
steps:
@ -193,12 +193,43 @@ jobs:
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: CompatibilityCheck
- name: CompatibilityCheckX86
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
CompatibilityCheckAarch64:
needs: [BuilderDebAarch64]
runs-on: [self-hosted, style-checker]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/compatibility_check
REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
REPORTS_PATH=${{runner.temp}}/reports_dir
EOF
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: CompatibilityCheckAarch64
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
- name: Cleanup
if: always()
run: |
@ -4792,7 +4823,8 @@ jobs:
- UnitTestsMsan
- UnitTestsUBsan
- UnitTestsReleaseClang
- CompatibilityCheck
- CompatibilityCheckX86
- CompatibilityCheckAarch64
- IntegrationTestsFlakyCheck
- SQLancerTestRelease
- SQLancerTestDebug

View File

@ -71,7 +71,7 @@ jobs:
with:
name: changed_images
path: ${{ runner.temp }}/changed_images.json
CompatibilityCheck:
CompatibilityCheckX86:
needs: [BuilderDebRelease]
runs-on: [self-hosted, style-checker]
steps:
@ -90,12 +90,43 @@ jobs:
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: CompatibilityCheck
- name: CompatibilityCheckX86
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
CompatibilityCheckAarch64:
needs: [BuilderDebAarch64]
runs-on: [self-hosted, style-checker]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/compatibility_check
REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
REPORTS_PATH=${{runner.temp}}/reports_dir
EOF
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: CompatibilityCheckAarch64
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
- name: Cleanup
if: always()
run: |
@ -1947,7 +1978,8 @@ jobs:
- IntegrationTestsTsan1
- IntegrationTestsTsan2
- IntegrationTestsTsan3
- CompatibilityCheck
- CompatibilityCheckX86
- CompatibilityCheckAarch64
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code

View File

@ -90,6 +90,9 @@ namespace MongoDB
Poco::Net::SocketAddress address() const;
/// Returns the address of the MongoDB server.
const std::string & uri() const;
/// Returns the uri on which the connection was made.
void connect(const std::string & hostAndPort);
/// Connects to the given MongoDB server.
@ -148,6 +151,7 @@ namespace MongoDB
private:
Poco::Net::SocketAddress _address;
Poco::Net::StreamSocket _socket;
std::string _uri;
};
@ -158,6 +162,10 @@ namespace MongoDB
{
return _address;
}
inline const std::string & Connection::uri() const
{
return _uri;
}
}

View File

@ -145,68 +145,155 @@ void Connection::connect(const Poco::Net::StreamSocket& socket)
void Connection::connect(const std::string& uri, SocketFactory& socketFactory)
{
Poco::URI theURI(uri);
if (theURI.getScheme() != "mongodb") throw Poco::UnknownURISchemeException(uri);
std::vector<std::string> strAddresses;
std::string newURI;
std::string userInfo = theURI.getUserInfo();
std::string host = theURI.getHost();
Poco::UInt16 port = theURI.getPort();
if (port == 0) port = 27017;
if (uri.find(',') != std::string::npos)
{
size_t pos;
size_t head = 0;
if ((pos = uri.find("@")) != std::string::npos)
{
head = pos + 1;
}
else if ((pos = uri.find("://")) != std::string::npos)
{
head = pos + 3;
}
std::string databaseName = theURI.getPath();
if (!databaseName.empty() && databaseName[0] == '/') databaseName.erase(0, 1);
if (databaseName.empty()) databaseName = "admin";
std::string tempstr;
std::string::const_iterator it = uri.begin();
it += head;
size_t tail = head;
for (;it != uri.end() && *it != '?' && *it != '/'; ++it)
{
tempstr += *it;
tail++;
}
bool ssl = false;
Poco::Timespan connectTimeout;
Poco::Timespan socketTimeout;
std::string authMechanism = Database::AUTH_SCRAM_SHA1;
it = tempstr.begin();
std::string token;
for (;it != tempstr.end(); ++it)
{
if (*it == ',')
{
newURI = uri.substr(0, head) + token + uri.substr(tail, uri.length());
strAddresses.push_back(newURI);
token = "";
}
else
{
token += *it;
}
}
newURI = uri.substr(0, head) + token + uri.substr(tail, uri.length());
strAddresses.push_back(newURI);
}
else
{
strAddresses.push_back(uri);
}
Poco::URI::QueryParameters params = theURI.getQueryParameters();
for (Poco::URI::QueryParameters::const_iterator it = params.begin(); it != params.end(); ++it)
{
if (it->first == "ssl")
{
ssl = (it->second == "true");
}
else if (it->first == "connectTimeoutMS")
{
connectTimeout = static_cast<Poco::Timespan::TimeDiff>(1000)*Poco::NumberParser::parse(it->second);
}
else if (it->first == "socketTimeoutMS")
{
socketTimeout = static_cast<Poco::Timespan::TimeDiff>(1000)*Poco::NumberParser::parse(it->second);
}
else if (it->first == "authMechanism")
{
authMechanism = it->second;
}
}
newURI = strAddresses.front();
Poco::URI theURI(newURI);
if (theURI.getScheme() != "mongodb") throw Poco::UnknownURISchemeException(uri);
connect(socketFactory.createSocket(host, port, connectTimeout, ssl));
std::string userInfo = theURI.getUserInfo();
std::string databaseName = theURI.getPath();
if (!databaseName.empty() && databaseName[0] == '/') databaseName.erase(0, 1);
if (databaseName.empty()) databaseName = "admin";
if (socketTimeout > 0)
{
_socket.setSendTimeout(socketTimeout);
_socket.setReceiveTimeout(socketTimeout);
}
bool ssl = false;
Poco::Timespan connectTimeout;
Poco::Timespan socketTimeout;
std::string authMechanism = Database::AUTH_SCRAM_SHA1;
std::string readPreference="primary";
if (!userInfo.empty())
{
std::string username;
std::string password;
std::string::size_type pos = userInfo.find(':');
if (pos != std::string::npos)
{
username.assign(userInfo, 0, pos++);
password.assign(userInfo, pos, userInfo.size() - pos);
}
else username = userInfo;
Poco::URI::QueryParameters params = theURI.getQueryParameters();
for (Poco::URI::QueryParameters::const_iterator it = params.begin(); it != params.end(); ++it)
{
if (it->first == "ssl")
{
ssl = (it->second == "true");
}
else if (it->first == "connectTimeoutMS")
{
connectTimeout = static_cast<Poco::Timespan::TimeDiff>(1000)*Poco::NumberParser::parse(it->second);
}
else if (it->first == "socketTimeoutMS")
{
socketTimeout = static_cast<Poco::Timespan::TimeDiff>(1000)*Poco::NumberParser::parse(it->second);
}
else if (it->first == "authMechanism")
{
authMechanism = it->second;
}
else if (it->first == "readPreference")
{
readPreference= it->second;
}
}
Database database(databaseName);
if (!database.authenticate(*this, username, password, authMechanism))
throw Poco::NoPermissionException(Poco::format("Access to MongoDB database %s denied for user %s", databaseName, username));
}
for (std::vector<std::string>::const_iterator it = strAddresses.cbegin();it != strAddresses.cend(); ++it)
{
newURI = *it;
theURI = Poco::URI(newURI);
std::string host = theURI.getHost();
Poco::UInt16 port = theURI.getPort();
if (port == 0) port = 27017;
connect(socketFactory.createSocket(host, port, connectTimeout, ssl));
_uri = newURI;
if (socketTimeout > 0)
{
_socket.setSendTimeout(socketTimeout);
_socket.setReceiveTimeout(socketTimeout);
}
if (strAddresses.size() > 1)
{
Poco::MongoDB::QueryRequest request("admin.$cmd");
request.setNumberToReturn(1);
request.selector().add("isMaster", 1);
Poco::MongoDB::ResponseMessage response;
sendRequest(request, response);
_uri = newURI;
if (!response.documents().empty())
{
Poco::MongoDB::Document::Ptr doc = response.documents()[0];
if (doc->get<bool>("ismaster") && readPreference == "primary")
{
break;
}
else if (!doc->get<bool>("ismaster") && readPreference == "secondary")
{
break;
}
else if (it + 1 == strAddresses.cend())
{
throw Poco::URISyntaxException(uri);
}
}
}
}
if (!userInfo.empty())
{
std::string username;
std::string password;
std::string::size_type pos = userInfo.find(':');
if (pos != std::string::npos)
{
username.assign(userInfo, 0, pos++);
password.assign(userInfo, pos, userInfo.size() - pos);
}
else username = userInfo;
Database database(databaseName);
if (!database.authenticate(*this, username, password, authMechanism))
throw Poco::NoPermissionException(Poco::format("Access to MongoDB database %s denied for user %s", databaseName, username));
}
}

View File

@ -0,0 +1,55 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v22.12.4.76-stable (cb5772db805) FIXME as compared to v22.12.3.5-stable (893de538f02)
#### Performance Improvement
* Backported in [#45704](https://github.com/ClickHouse/ClickHouse/issues/45704): Fixed performance of short `SELECT` queries that read from tables with large number of`Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#46378](https://github.com/ClickHouse/ClickHouse/issues/46378): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
#### Bug Fix
* Backported in [#45672](https://github.com/ClickHouse/ClickHouse/issues/45672): Fix wiping sensitive info in logs. [#45603](https://github.com/ClickHouse/ClickHouse/pull/45603) ([Vitaly Baranov](https://github.com/vitlibar)).
#### Build/Testing/Packaging Improvement
* Backported in [#45200](https://github.com/ClickHouse/ClickHouse/issues/45200): Fix zookeeper downloading, update the version, and optimize the image size. [#44853](https://github.com/ClickHouse/ClickHouse/pull/44853) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#46116](https://github.com/ClickHouse/ClickHouse/issues/46116): Remove the dependency on the `adduser` tool from the packages, because we don't use it. This fixes [#44934](https://github.com/ClickHouse/ClickHouse/issues/44934). [#45011](https://github.com/ClickHouse/ClickHouse/pull/45011) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#46035](https://github.com/ClickHouse/ClickHouse/issues/46035): Add systemd.service file for clickhouse-keeper. Fixes [#44293](https://github.com/ClickHouse/ClickHouse/issues/44293). [#45568](https://github.com/ClickHouse/ClickHouse/pull/45568) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#46484](https://github.com/ClickHouse/ClickHouse/issues/46484): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#46509](https://github.com/ClickHouse/ClickHouse/issues/46509): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#47058](https://github.com/ClickHouse/ClickHouse/issues/47058): Fix error during server startup on old distros (e.g. Amazon Linux 2) and on ARM that glibc 2.28 symbols are not found. [#47008](https://github.com/ClickHouse/ClickHouse/pull/47008) ([Robert Schulze](https://github.com/rschu1ze)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#45904](https://github.com/ClickHouse/ClickHouse/issues/45904): Fixed bug with non-parsable default value for EPHEMERAL column in table metadata. [#44026](https://github.com/ClickHouse/ClickHouse/pull/44026) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Backported in [#45321](https://github.com/ClickHouse/ClickHouse/issues/45321): Fixed a bug in normalization of a `DEFAULT` expression in `CREATE TABLE` statement. The second argument of function `in` (or the right argument of operator `IN`) might be replaced with the result of its evaluation during CREATE query execution. Fixes [#44496](https://github.com/ClickHouse/ClickHouse/issues/44496). [#44547](https://github.com/ClickHouse/ClickHouse/pull/44547) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#45000](https://github.com/ClickHouse/ClickHouse/issues/45000): Another fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#45553](https://github.com/ClickHouse/ClickHouse/issues/45553): Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in xml config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)).
* Backported in [#46226](https://github.com/ClickHouse/ClickHouse/issues/46226): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set<std::string>` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)).
* Backported in [#46218](https://github.com/ClickHouse/ClickHouse/issues/46218): Fix reading of non existing nested columns with multiple level in compact parts. [#46045](https://github.com/ClickHouse/ClickHouse/pull/46045) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#46446](https://github.com/ClickHouse/ClickHouse/issues/46446): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#46678](https://github.com/ClickHouse/ClickHouse/issues/46678): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#46872](https://github.com/ClickHouse/ClickHouse/issues/46872): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#46954](https://github.com/ClickHouse/ClickHouse/issues/46954): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Automatically merge green backport PRs and green approved PRs [#41110](https://github.com/ClickHouse/ClickHouse/pull/41110) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Improve release scripts [#45074](https://github.com/ClickHouse/ClickHouse/pull/45074) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Get rid of progress timestamps in release publishing [#45818](https://github.com/ClickHouse/ClickHouse/pull/45818) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Improve install_check.py [#46458](https://github.com/ClickHouse/ClickHouse/pull/46458) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix dependencies for InstallPackagesTestAarch64 [#46597](https://github.com/ClickHouse/ClickHouse/pull/46597) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -0,0 +1,40 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v22.8.14.53-lts (4ea67c40077) FIXME as compared to v22.8.13.20-lts (e4817946d18)
#### Performance Improvement
* Backported in [#45845](https://github.com/ClickHouse/ClickHouse/issues/45845): Fixed performance of short `SELECT` queries that read from tables with large number of`Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#46374](https://github.com/ClickHouse/ClickHouse/issues/46374): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#46358](https://github.com/ClickHouse/ClickHouse/issues/46358): Allow using Vertical merge algorithm with parts in Compact format. This will allow ClickHouse server to use much less memory for background operations. This closes [#46084](https://github.com/ClickHouse/ClickHouse/issues/46084). [#46282](https://github.com/ClickHouse/ClickHouse/pull/46282) ([Anton Popov](https://github.com/CurtizJ)).
#### Build/Testing/Packaging Improvement
* Backported in [#46112](https://github.com/ClickHouse/ClickHouse/issues/46112): Remove the dependency on the `adduser` tool from the packages, because we don't use it. This fixes [#44934](https://github.com/ClickHouse/ClickHouse/issues/44934). [#45011](https://github.com/ClickHouse/ClickHouse/pull/45011) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#46482](https://github.com/ClickHouse/ClickHouse/issues/46482): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#46505](https://github.com/ClickHouse/ClickHouse/issues/46505): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#45908](https://github.com/ClickHouse/ClickHouse/issues/45908): Fixed bug with non-parsable default value for EPHEMERAL column in table metadata. [#44026](https://github.com/ClickHouse/ClickHouse/pull/44026) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Backported in [#46238](https://github.com/ClickHouse/ClickHouse/issues/46238): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set<std::string>` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)).
* Backported in [#45727](https://github.com/ClickHouse/ClickHouse/issues/45727): Fix key description when encountering duplicate primary keys. This can happen in projections. See [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590) for details. [#45686](https://github.com/ClickHouse/ClickHouse/pull/45686) ([Amos Bird](https://github.com/amosbird)).
* Backported in [#46394](https://github.com/ClickHouse/ClickHouse/issues/46394): Fix `SYSTEM UNFREEZE` queries failing with the exception `CANNOT_PARSE_INPUT_ASSERTION_FAILED`. [#46325](https://github.com/ClickHouse/ClickHouse/pull/46325) ([Aleksei Filatov](https://github.com/aalexfvk)).
* Backported in [#46442](https://github.com/ClickHouse/ClickHouse/issues/46442): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#46674](https://github.com/ClickHouse/ClickHouse/issues/46674): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#46879](https://github.com/ClickHouse/ClickHouse/issues/46879): Fix MSan report in the `maxIntersections` function. This closes [#43126](https://github.com/ClickHouse/ClickHouse/issues/43126). [#46847](https://github.com/ClickHouse/ClickHouse/pull/46847) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#46871](https://github.com/ClickHouse/ClickHouse/issues/46871): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Get rid of progress timestamps in release publishing [#45818](https://github.com/ClickHouse/ClickHouse/pull/45818) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -0,0 +1,47 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.1.4.58-stable (9ed562163a5) FIXME as compared to v23.1.3.5-stable (548b494bcce)
#### Performance Improvement
* Backported in [#46380](https://github.com/ClickHouse/ClickHouse/issues/46380): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
#### Improvement
* Backported in [#46985](https://github.com/ClickHouse/ClickHouse/issues/46985): - Apply `ALTER TABLE table_name ON CLUSTER cluster MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name'` to all replicas. Because `ALTER TABLE t MOVE` is not replicated. [#46402](https://github.com/ClickHouse/ClickHouse/pull/46402) ([lizhuoyu5](https://github.com/lzydmxy)).
* Backported in [#46778](https://github.com/ClickHouse/ClickHouse/issues/46778): Backward compatibility for T64 codec support for IPv4. [#46747](https://github.com/ClickHouse/ClickHouse/pull/46747) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Backported in [#47020](https://github.com/ClickHouse/ClickHouse/issues/47020): Allow IPv4 in range(). [#46995](https://github.com/ClickHouse/ClickHouse/pull/46995) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
#### Build/Testing/Packaging Improvement
* Backported in [#46031](https://github.com/ClickHouse/ClickHouse/issues/46031): Add systemd.service file for clickhouse-keeper. Fixes [#44293](https://github.com/ClickHouse/ClickHouse/issues/44293). [#45568](https://github.com/ClickHouse/ClickHouse/pull/45568) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#46477](https://github.com/ClickHouse/ClickHouse/issues/46477): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#46511](https://github.com/ClickHouse/ClickHouse/issues/46511): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#46228](https://github.com/ClickHouse/ClickHouse/issues/46228): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set<std::string>` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)).
* Backported in [#46967](https://github.com/ClickHouse/ClickHouse/issues/46967): Backward compatibility - allow implicit narrowing conversion from UInt64 to IPv4 - required for "INSERT ... VALUES ..." expression. [#45865](https://github.com/ClickHouse/ClickHouse/pull/45865) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Backported in [#46220](https://github.com/ClickHouse/ClickHouse/issues/46220): Fix reading of non existing nested columns with multiple level in compact parts. [#46045](https://github.com/ClickHouse/ClickHouse/pull/46045) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#46751](https://github.com/ClickHouse/ClickHouse/issues/46751): Follow-up fix for Replace domain IP types (IPv4, IPv6) with native https://github.com/ClickHouse/ClickHouse/pull/43221. [#46087](https://github.com/ClickHouse/ClickHouse/pull/46087) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Backported in [#46448](https://github.com/ClickHouse/ClickHouse/issues/46448): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#46680](https://github.com/ClickHouse/ClickHouse/issues/46680): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#46873](https://github.com/ClickHouse/ClickHouse/issues/46873): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#46956](https://github.com/ClickHouse/ClickHouse/issues/46956): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Support DELETE ON CLUSTER [#45786](https://github.com/ClickHouse/ClickHouse/pull/45786) ([Alexander Gololobov](https://github.com/davenger)).
* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Wait for background tasks in ~UploadHelper [#46334](https://github.com/ClickHouse/ClickHouse/pull/46334) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Improve install_check.py [#46458](https://github.com/ClickHouse/ClickHouse/pull/46458) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix dependencies for InstallPackagesTestAarch64 [#46597](https://github.com/ClickHouse/ClickHouse/pull/46597) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -0,0 +1,30 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.2.2.20-stable (f6c269c8df2) FIXME as compared to v23.2.1.2537-stable (52bf836e03a)
#### Improvement
* Backported in [#46914](https://github.com/ClickHouse/ClickHouse/issues/46914): Allow PREWHERE for Merge with different DEFAULT expression for column. [#46831](https://github.com/ClickHouse/ClickHouse/pull/46831) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#47022](https://github.com/ClickHouse/ClickHouse/issues/47022): Allow IPv4 in range(). [#46995](https://github.com/ClickHouse/ClickHouse/pull/46995) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
#### Bug Fix
* Backported in [#46828](https://github.com/ClickHouse/ClickHouse/issues/46828): Combined PREWHERE column accumulated from multiple PREWHERE in some cases didn't contain 0's from previous steps. The fix is to apply final filter if we know that it wasn't applied from more than 1 last step. [#46785](https://github.com/ClickHouse/ClickHouse/pull/46785) ([Alexander Gololobov](https://github.com/davenger)).
#### Build/Testing/Packaging Improvement
* Backported in [#47062](https://github.com/ClickHouse/ClickHouse/issues/47062): Fix error during server startup on old distros (e.g. Amazon Linux 2) and on ARM that glibc 2.28 symbols are not found. [#47008](https://github.com/ClickHouse/ClickHouse/pull/47008) ([Robert Schulze](https://github.com/rschu1ze)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#46895](https://github.com/ClickHouse/ClickHouse/issues/46895): Fixed a bug in automatic retries of `DROP TABLE` query with `ReplicatedMergeTree` tables and `Atomic` databases. In rare cases it could lead to `Can't get data for node /zk_path/log_pointer` and `The specified key does not exist` errors if ZooKeeper session expired during DROP and a new replicated table with the same path in ZooKeeper was created in parallel. [#46384](https://github.com/ClickHouse/ClickHouse/pull/46384) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#46865](https://github.com/ClickHouse/ClickHouse/issues/46865): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#46958](https://github.com/ClickHouse/ClickHouse/issues/46958): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* More concise logging at trace level for PREWHERE steps [#46771](https://github.com/ClickHouse/ClickHouse/pull/46771) ([Alexander Gololobov](https://github.com/davenger)).
* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -172,7 +172,7 @@ Global thread pool is `GlobalThreadPool` singleton class. To allocate thread fro
Global pool is universal and all pools described below are implemented on top of it. This can be thought of as a hierarchy of pools. Any specialized pool takes its threads from the global pool using `ThreadPool` class. So the main purpose of any specialized pool is to apply limit on the number of simultaneous jobs and do job scheduling. If there are more jobs scheduled than threads in a pool, `ThreadPool` accumulates jobs in a queue with priorities. Each job has an integer priority. Default priority is zero. All jobs with higher priority values are started before any job with lower priority value. But there is no difference between already executing jobs, thus priority matters only when the pool in overloaded.
IO thread pool is implemented as a plain `ThreadPool` accessible via `IOThreadPool::get()` method. It is configured in the same way as global pool with `max_io_thread_pool_size`, `max_io_thread_pool_free_size` and `io_thread_pool_queue_size` settings. The main purpose of IO thread pool is to avoid exhaustion of the global pool with IO jobs, which could prevent queries from fully utilizing CPU.
IO thread pool is implemented as a plain `ThreadPool` accessible via `IOThreadPool::get()` method. It is configured in the same way as global pool with `max_io_thread_pool_size`, `max_io_thread_pool_free_size` and `io_thread_pool_queue_size` settings. The main purpose of IO thread pool is to avoid exhaustion of the global pool with IO jobs, which could prevent queries from fully utilizing CPU. Backup to S3 does significant amount of IO operations and to avoid impact on interactive queries there is a separate `BackupsIOThreadPool` configured with `max_backups_io_thread_pool_size`, `max_backups_io_thread_pool_free_size` and `backups_io_thread_pool_queue_size` settings.
For periodic task execution there is `BackgroundSchedulePool` class. You can register tasks using `BackgroundSchedulePool::TaskHolder` objects and the pool ensures that no task runs two jobs at the same time. It also allows you to postpone task execution to a specific instant in the future or temporarily deactivate task. Global `Context` provides a few instances of this class for different purposes. For general purpose tasks `Context::getSchedulePool()` is used.

View File

@ -125,6 +125,10 @@ Groups are flexible and synced on the cluster. For instance, if you have 10 topi
2. Create a table with the desired structure.
3. Create a materialized view that converts data from the engine and puts it into a previously created table.
:::info
Kafka Engine doesn't support columns with default value of type `DEFAULT/MATERIALIZED/EPHEMERAL/ALIAS`. If you need columns with any default type, they can be added at `MATERIALIZED VIEW` level.
:::
When the `MATERIALIZED VIEW` joins the engine, it starts collecting data in the background. This allows you to continually receive messages from Kafka and convert them to the required format using `SELECT`.
One kafka table can have as many materialized views as you like, they do not read data from the kafka table directly, but receive new records (in blocks), this way you can write to several tables with different detail level (with grouping - aggregation and without).

View File

@ -280,12 +280,20 @@ SELECT
## toIPv4OrDefault(string)
Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns 0.
Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns `0.0.0.0` (0 IPv4).
## toIPv4OrNull(string)
Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns null.
## toIPv6OrDefault(string)
Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns `::` (0 IPv6).
## toIPv6OrNull(string)
Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns null.
## toIPv6
Converts a string form of IPv6 address to [IPv6](../../sql-reference/data-types/domains/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value.

View File

@ -124,6 +124,10 @@ If the data type and default expression are defined explicitly, this expression
Default expressions may be defined as an arbitrary expression from table constants and columns. When creating and changing the table structure, it checks that expressions do not contain loops. For INSERT, it checks that expressions are resolvable that all columns they can be calculated from have been passed.
:::info
Kafka Engine doesn't support columns with default value of type `DEFAULT/MATERIALIZED/EPHEMERAL/ALIAS`. If you need columns with any default type, they can be added at `MATERIALIZED VIEW` level, see [Kafka Engine](../../../engines/table-engines/integrations/kafka.md#description).
:::
### DEFAULT
`DEFAULT expr`

View File

@ -128,6 +128,7 @@ if (BUILD_STANDALONE_KEEPER)
ch_contrib::lz4
ch_contrib::zstd
ch_contrib::cityhash
ch_contrib::jemalloc
common ch_contrib::double_conversion
ch_contrib::dragonbox_to_chars
pcg_random

View File

@ -358,12 +358,27 @@ ZooKeeper::ZooKeeper(
if (!args.auth_scheme.empty())
sendAuth(args.auth_scheme, args.identity);
send_thread = ThreadFromGlobalPool([this] { sendThread(); });
receive_thread = ThreadFromGlobalPool([this] { receiveThread(); });
try
{
send_thread = ThreadFromGlobalPool([this] { sendThread(); });
receive_thread = ThreadFromGlobalPool([this] { receiveThread(); });
initApiVersion();
initApiVersion();
ProfileEvents::increment(ProfileEvents::ZooKeeperInit);
ProfileEvents::increment(ProfileEvents::ZooKeeperInit);
}
catch (...)
{
tryLogCurrentException(log, "Failed to connect to ZooKeeper");
if (send_thread.joinable())
send_thread.join();
if (receive_thread.joinable())
receive_thread.join();
throw;
}
}

View File

@ -36,7 +36,7 @@ void CoordinationSettings::loadFromConfig(const String & config_elem, const Poco
}
const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc";
const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc,clrs";
KeeperConfigurationAndSettings::KeeperConfigurationAndSettings()
: server_id(NOT_EXIST)

View File

@ -148,6 +148,9 @@ void FourLetterCommandFactory::registerCommands(KeeperDispatcher & keeper_dispat
FourLetterCommandPtr recalculate_command = std::make_shared<RecalculateCommand>(keeper_dispatcher);
factory.registerCommand(recalculate_command);
FourLetterCommandPtr clean_resources_command = std::make_shared<CleanResourcesCommand>(keeper_dispatcher);
factory.registerCommand(clean_resources_command);
factory.initializeAllowList(keeper_dispatcher);
factory.setInitialize(true);
}
@ -524,4 +527,10 @@ String RecalculateCommand::run()
return "ok";
}
String CleanResourcesCommand::run()
{
keeper_dispatcher.cleanResources();
return "ok";
}
}

View File

@ -377,7 +377,6 @@ struct RequestLeaderCommand : public IFourLetterCommand
~RequestLeaderCommand() override = default;
};
/// Request to be leader.
struct RecalculateCommand : public IFourLetterCommand
{
explicit RecalculateCommand(KeeperDispatcher & keeper_dispatcher_)
@ -390,4 +389,16 @@ struct RecalculateCommand : public IFourLetterCommand
~RecalculateCommand() override = default;
};
struct CleanResourcesCommand : public IFourLetterCommand
{
explicit CleanResourcesCommand(KeeperDispatcher & keeper_dispatcher_)
: IFourLetterCommand(keeper_dispatcher_)
{
}
String name() override { return "clrs"; }
String run() override;
~CleanResourcesCommand() override = default;
};
}

View File

@ -9,7 +9,7 @@
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/checkStackSize.h>
#include <Common/CurrentMetrics.h>
#include <Common/ProfileEvents.h>
#include <future>
#include <chrono>
@ -17,12 +17,26 @@
#include <iterator>
#include <limits>
#if USE_JEMALLOC
# include <jemalloc/jemalloc.h>
#define STRINGIFY_HELPER(x) #x
#define STRINGIFY(x) STRINGIFY_HELPER(x)
#endif
namespace CurrentMetrics
{
extern const Metric KeeperAliveConnections;
extern const Metric KeeperOutstandingRequets;
}
namespace ProfileEvents
{
extern const Event MemoryAllocatorPurge;
extern const Event MemoryAllocatorPurgeTimeMicroseconds;
}
namespace fs = std::filesystem;
namespace DB
@ -753,4 +767,15 @@ Keeper4LWInfo KeeperDispatcher::getKeeper4LWInfo() const
return result;
}
void KeeperDispatcher::cleanResources()
{
#if USE_JEMALLOC
LOG_TRACE(&Poco::Logger::get("KeeperDispatcher"), "Purging unused memory");
Stopwatch watch;
mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0);
ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurge);
ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurgeTimeMicroseconds, watch.elapsedMicroseconds());
#endif
}
}

View File

@ -230,6 +230,8 @@ public:
{
return server->recalculateStorageStats();
}
static void cleanResources();
};
}

View File

@ -114,7 +114,11 @@ MongoDBDictionarySource::MongoDBDictionarySource(
{
if (!uri.empty())
{
Poco::URI poco_uri(uri);
// Connect with URI.
Poco::MongoDB::Connection::SocketFactory socket_factory;
connection->connect(uri, socket_factory);
Poco::URI poco_uri(connection->uri());
// Parse database from URI. This is required for correctness -- the
// cursor is created using database name and collection name, so we have
@ -134,10 +138,6 @@ MongoDBDictionarySource::MongoDBDictionarySource(
{
user.resize(separator);
}
// Connect with URI.
Poco::MongoDB::Connection::SocketFactory socket_factory;
connection->connect(uri, socket_factory);
}
else
{

View File

@ -6,9 +6,13 @@
#include <Disks/DiskSelector.h>
#include <Parsers/formatAST.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/isDiskFunction.h>
#include <Interpreters/Context.h>
#include <Parsers/IAST.h>
#include <Interpreters/InDepthNodeVisitor.h>
namespace DB
{
@ -18,43 +22,85 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
std::string getOrCreateDiskFromDiskAST(const ASTFunction & function, ContextPtr context)
namespace
{
/// We need a unique name for a created custom disk, but it needs to be the same
/// after table is reattached or server is restarted, so take a hash of the disk
/// configuration serialized ast as a disk name suffix.
auto disk_setting_string = serializeAST(function, true);
auto disk_name = DiskSelector::TMP_INTERNAL_DISK_PREFIX
+ toString(sipHash128(disk_setting_string.data(), disk_setting_string.size()));
auto result_disk = context->getOrCreateDisk(disk_name, [&](const DisksMap & disks_map) -> DiskPtr {
const auto * function_args_expr = assert_cast<const ASTExpressionList *>(function.arguments.get());
const auto & function_args = function_args_expr->children;
auto config = getDiskConfigurationFromAST(disk_name, function_args, context);
auto disk = DiskFactory::instance().create(disk_name, *config, disk_name, context, disks_map);
/// Mark that disk can be used without storage policy.
disk->markDiskAsCustom();
return disk;
});
if (!result_disk->isRemote())
std::string getOrCreateDiskFromDiskAST(const ASTFunction & function, ContextPtr context)
{
static constexpr auto custom_disks_base_dir_in_config = "custom_local_disks_base_directory";
auto disk_path_expected_prefix = context->getConfigRef().getString(custom_disks_base_dir_in_config, "");
/// We need a unique name for a created custom disk, but it needs to be the same
/// after table is reattached or server is restarted, so take a hash of the disk
/// configuration serialized ast as a disk name suffix.
auto disk_setting_string = serializeAST(function, true);
auto disk_name = DiskSelector::TMP_INTERNAL_DISK_PREFIX
+ toString(sipHash128(disk_setting_string.data(), disk_setting_string.size()));
if (disk_path_expected_prefix.empty())
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Base path for custom local disks must be defined in config file by `{}`",
custom_disks_base_dir_in_config);
auto result_disk = context->getOrCreateDisk(disk_name, [&](const DisksMap & disks_map) -> DiskPtr {
const auto * function_args_expr = assert_cast<const ASTExpressionList *>(function.arguments.get());
const auto & function_args = function_args_expr->children;
auto config = getDiskConfigurationFromAST(disk_name, function_args, context);
auto disk = DiskFactory::instance().create(disk_name, *config, disk_name, context, disks_map);
/// Mark that disk can be used without storage policy.
disk->markDiskAsCustom();
return disk;
});
if (!pathStartsWith(result_disk->getPath(), disk_path_expected_prefix))
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Path of the custom local disk must be inside `{}` directory",
disk_path_expected_prefix);
if (!result_disk->isRemote())
{
static constexpr auto custom_disks_base_dir_in_config = "custom_local_disks_base_directory";
auto disk_path_expected_prefix = context->getConfigRef().getString(custom_disks_base_dir_in_config, "");
if (disk_path_expected_prefix.empty())
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Base path for custom local disks must be defined in config file by `{}`",
custom_disks_base_dir_in_config);
if (!pathStartsWith(result_disk->getPath(), disk_path_expected_prefix))
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Path of the custom local disk must be inside `{}` directory",
disk_path_expected_prefix);
}
return disk_name;
}
class DiskConfigurationFlattener
{
public:
struct Data
{
ContextPtr context;
};
static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; }
static void visit(ASTPtr & ast, Data & data)
{
if (isDiskFunction(ast))
{
auto disk_name = getOrCreateDiskFromDiskAST(*ast->as<ASTFunction>(), data.context);
ast = std::make_shared<ASTLiteral>(disk_name);
}
}
};
/// Visits children first.
using FlattenDiskConfigurationVisitor = InDepthNodeVisitor<DiskConfigurationFlattener, false>;
}
std::string getOrCreateDiskFromDiskAST(const ASTPtr & disk_function, ContextPtr context)
{
if (!isDiskFunction(disk_function))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected a disk function");
auto ast = disk_function->clone();
FlattenDiskConfigurationVisitor::Data data{context};
FlattenDiskConfigurationVisitor{data}.visit(ast);
auto disk_name = assert_cast<const ASTLiteral &>(*ast).value.get<String>();
LOG_TRACE(&Poco::Logger::get("getOrCreateDiskFromDiskAST"), "Result disk name: {}", disk_name);
return disk_name;
}

View File

@ -13,6 +13,6 @@ class ASTFunction;
* add it to DiskSelector by a unique (but always the same for given configuration) disk name
* and return this name.
*/
std::string getOrCreateDiskFromDiskAST(const ASTFunction & function, ContextPtr context);
std::string getOrCreateDiskFromDiskAST(const ASTPtr & disk_function, ContextPtr context);
}

View File

@ -161,6 +161,8 @@ public:
if (curr_process.processed)
continue;
LOG_DEBUG(&Poco::Logger::get("KillQuery"), "Will kill query {} (synchronously)", curr_process.query_id);
auto code = process_list.sendCancelToQuery(curr_process.query_id, curr_process.user, true);
if (code != CancellationCode::QueryIsNotInitializedYet && code != CancellationCode::CancelSent)
@ -226,6 +228,8 @@ BlockIO InterpreterKillQueryQuery::execute()
MutableColumns res_columns = header.cloneEmptyColumns();
for (const auto & query_desc : queries_to_stop)
{
if (!query.test)
LOG_DEBUG(&Poco::Logger::get("KillQuery"), "Will kill query {} (asynchronously)", query_desc.query_id);
auto code = (query.test) ? CancellationCode::Unknown : process_list.sendCancelToQuery(query_desc.query_id, query_desc.user, true);
insertResultRow(query_desc.source_num, code, processes_block, header, res_columns);
}

View File

@ -6,6 +6,7 @@
#include <Parsers/ASTFunction.h>
#include <Parsers/isDiskFunction.h>
#include <Common/assert_cast.h>
#include <Interpreters/InDepthNodeVisitor.h>
namespace DB
@ -31,42 +32,64 @@ bool FieldFromASTImpl::isSecret() const
return isDiskFunction(ast);
}
class DiskConfigurationMasker
{
public:
struct Data {};
static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; }
static void visit(ASTPtr & ast, Data &)
{
if (isDiskFunction(ast))
{
const auto & disk_function = assert_cast<const ASTFunction &>(*ast);
const auto * disk_function_args_expr = assert_cast<const ASTExpressionList *>(disk_function.arguments.get());
const auto & disk_function_args = disk_function_args_expr->children;
auto is_secret_arg = [](const std::string & arg_name)
{
/// We allow to not hide type of the disk, e.g. disk(type = s3, ...)
/// and also nested disk, e.g. disk = 'disk_name'
return arg_name != "type" && arg_name != "disk";
};
for (const auto & arg : disk_function_args)
{
auto * setting_function = arg->as<ASTFunction>();
if (!setting_function || setting_function->name != "equals")
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected equals function");
auto * function_args_expr = assert_cast<ASTExpressionList *>(setting_function->arguments.get());
if (!function_args_expr)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected arguments");
auto & function_args = function_args_expr->children;
if (function_args.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected non zero number of arguments");
auto * key_identifier = function_args[0]->as<ASTIdentifier>();
if (!key_identifier)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected Identifier");
const std::string & key = key_identifier->name();
if (is_secret_arg(key))
function_args[1] = std::make_shared<ASTLiteral>("[HIDDEN]");
}
}
}
};
/// Visits children first.
using HideDiskConfigurationVisitor = InDepthNodeVisitor<DiskConfigurationMasker, false>;
String FieldFromASTImpl::toString(bool show_secrets) const
{
if (!show_secrets && isDiskFunction(ast))
{
auto hidden = ast->clone();
const auto & disk_function = assert_cast<const ASTFunction &>(*hidden);
const auto * disk_function_args_expr = assert_cast<const ASTExpressionList *>(disk_function.arguments.get());
const auto & disk_function_args = disk_function_args_expr->children;
auto is_secret_arg = [](const std::string & arg_name)
{
return arg_name != "type";
};
for (const auto & arg : disk_function_args)
{
auto * setting_function = arg->as<ASTFunction>();
if (!setting_function || setting_function->name != "equals")
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected equals function");
auto * function_args_expr = assert_cast<ASTExpressionList *>(setting_function->arguments.get());
if (!function_args_expr)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected arguments");
auto & function_args = function_args_expr->children;
if (function_args.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected non zero number of arguments");
auto * key_identifier = function_args[0]->as<ASTIdentifier>();
if (!key_identifier)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected Identifier");
const std::string & key = key_identifier->name();
if (is_secret_arg(key))
function_args[1] = std::make_shared<ASTLiteral>("[HIDDEN]");
}
HideDiskConfigurationVisitor::Data data{};
HideDiskConfigurationVisitor{data}.visit(hidden);
return serializeAST(*hidden);
}

View File

@ -1,5 +1,7 @@
#include <Planner/PlannerJoinTree.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeAggregateFunction.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <Functions/FunctionFactory.h>
@ -19,6 +21,8 @@
#include <Analyzer/JoinNode.h>
#include <Analyzer/ArrayJoinNode.h>
#include <Analyzer/Utils.h>
#include <Analyzer/AggregationUtils.h>
#include <Analyzer/FunctionNode.h>
#include <Processors/Sources/NullSource.h>
#include <Processors/QueryPlan/SortingStep.h>
@ -27,6 +31,7 @@
#include <Processors/QueryPlan/ExpressionStep.h>
#include <Processors/QueryPlan/JoinStep.h>
#include <Processors/QueryPlan/ArrayJoinStep.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <Interpreters/Context.h>
#include <Interpreters/IJoin.h>
@ -40,6 +45,10 @@
#include <Planner/PlannerActionsVisitor.h>
#include <Planner/Utils.h>
#include <AggregateFunctions/AggregateFunctionCount.h>
#include <Columns/ColumnAggregateFunction.h>
#include <Common/scope_guard_safe.h>
namespace DB
{
@ -143,6 +152,100 @@ NameAndTypePair chooseSmallestColumnToReadFromStorage(const StoragePtr & storage
return result;
}
bool applyTrivialCountIfPossible(
QueryPlan & query_plan,
const TableNode & table_node,
const QueryTreeNodePtr & query_tree,
const ContextPtr & query_context,
const Names & columns_names)
{
const auto & settings = query_context->getSettingsRef();
if (!settings.optimize_trivial_count_query)
return false;
/// can't apply if FINAL
if (table_node.getTableExpressionModifiers().has_value() && table_node.getTableExpressionModifiers()->hasFinal())
return false;
auto & main_query_node = query_tree->as<QueryNode &>();
if (main_query_node.hasGroupBy())
return false;
const auto & storage = table_node.getStorage();
if (!storage || storage->hasLightweightDeletedMask())
return false;
if (settings.max_parallel_replicas > 1 || settings.allow_experimental_query_deduplication
|| settings.empty_result_for_aggregation_by_empty_set)
return false;
QueryTreeNodes aggregates = collectAggregateFunctionNodes(query_tree);
if (aggregates.size() != 1)
return false;
const auto & function_node = aggregates.front().get()->as<const FunctionNode &>();
chassert(function_node.getAggregateFunction() != nullptr);
const auto * count_func = typeid_cast<const AggregateFunctionCount *>(function_node.getAggregateFunction().get());
if (!count_func)
return false;
/// get number of rows
std::optional<UInt64> num_rows{};
/// Transaction check here is necessary because
/// MergeTree maintains total count for all parts in Active state and it simply returns that number for trivial select count() from table query.
/// But if we have current transaction, then we should return number of rows in current snapshot (that may include parts in Outdated state),
/// so we have to use totalRowsByPartitionPredicate() instead of totalRows even for trivial query
/// See https://github.com/ClickHouse/ClickHouse/pull/24258/files#r828182031
if (!main_query_node.hasPrewhere() && !main_query_node.hasWhere() && !query_context->getCurrentTransaction())
{
num_rows = storage->totalRows(settings);
}
// TODO:
// else // It's possible to optimize count() given only partition predicates
// {
// SelectQueryInfo temp_query_info;
// temp_query_info.query = query_ptr;
// temp_query_info.syntax_analyzer_result = syntax_analyzer_result;
// temp_query_info.prepared_sets = query_analyzer->getPreparedSets();
// num_rows = storage->totalRowsByPartitionPredicate(temp_query_info, context);
// }
if (!num_rows)
return false;
/// set aggregation state
const AggregateFunctionCount & agg_count = *count_func;
std::vector<char> state(agg_count.sizeOfData());
AggregateDataPtr place = state.data();
agg_count.create(place);
SCOPE_EXIT_MEMORY_SAFE(agg_count.destroy(place));
agg_count.set(place, num_rows.value());
auto column = ColumnAggregateFunction::create(function_node.getAggregateFunction());
column->insertFrom(place);
/// get count() argument type
DataTypes argument_types;
argument_types.reserve(columns_names.size());
{
const Block source_header = table_node.getStorageSnapshot()->getSampleBlockForColumns(columns_names);
for (const auto & column_name : columns_names)
argument_types.push_back(source_header.getByName(column_name).type);
}
Block block_with_count{
{std::move(column),
std::make_shared<DataTypeAggregateFunction>(function_node.getAggregateFunction(), argument_types, Array{}),
columns_names.front()}};
auto source = std::make_shared<SourceFromSingleChunk>(block_with_count);
auto prepared_count = std::make_unique<ReadFromPreparedSource>(Pipe(std::move(source)));
prepared_count->setStepDescription("Optimized trivial count");
query_plan.addStep(std::move(prepared_count));
return true;
}
JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & table_expression,
const SelectQueryInfo & select_query_info,
const SelectQueryOptions & select_query_options,
@ -306,32 +409,43 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & tabl
}
}
if (!select_query_options.only_analyze)
{
from_stage = storage->getQueryProcessingStage(query_context, select_query_options.to_stage, storage_snapshot, table_expression_query_info);
storage->read(query_plan, columns_names, storage_snapshot, table_expression_query_info, query_context, from_stage, max_block_size, max_streams);
}
/// Apply trivial_count optimization if possible
bool is_trivial_count_applied = !select_query_options.only_analyze && is_single_table_expression && table_node && select_query_info.has_aggregates
&& applyTrivialCountIfPossible(query_plan, *table_node, select_query_info.query_tree, planner_context->getQueryContext(), columns_names);
if (query_plan.isInitialized())
if (is_trivial_count_applied)
{
/** Specify the number of threads only if it wasn't specified in storage.
*
* But in case of remote query and prefer_localhost_replica=1 (default)
* The inner local query (that is done in the same process, without
* network interaction), it will setMaxThreads earlier and distributed
* query will not update it.
*/
if (!query_plan.getMaxThreads() || is_remote)
query_plan.setMaxThreads(max_threads_execute_query);
from_stage = QueryProcessingStage::WithMergeableState;
}
else
{
/// Create step which reads from empty source if storage has no data
auto source_header = storage_snapshot->getSampleBlockForColumns(columns_names);
Pipe pipe(std::make_shared<NullSource>(source_header));
auto read_from_pipe = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
read_from_pipe->setStepDescription("Read from NullSource");
query_plan.addStep(std::move(read_from_pipe));
if (!select_query_options.only_analyze)
{
from_stage = storage->getQueryProcessingStage(query_context, select_query_options.to_stage, storage_snapshot, table_expression_query_info);
storage->read(query_plan, columns_names, storage_snapshot, table_expression_query_info, query_context, from_stage, max_block_size, max_streams);
}
if (query_plan.isInitialized())
{
/** Specify the number of threads only if it wasn't specified in storage.
*
* But in case of remote query and prefer_localhost_replica=1 (default)
* The inner local query (that is done in the same process, without
* network interaction), it will setMaxThreads earlier and distributed
* query will not update it.
*/
if (!query_plan.getMaxThreads() || is_remote)
query_plan.setMaxThreads(max_threads_execute_query);
}
else
{
/// Create step which reads from empty source if storage has no data.
auto source_header = storage_snapshot->getSampleBlockForColumns(columns_names);
Pipe pipe(std::make_shared<NullSource>(source_header));
auto read_from_pipe = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
read_from_pipe->setStepDescription("Read from NullSource");
query_plan.addStep(std::move(read_from_pipe));
}
}
}
else if (query_node || union_node)

View File

@ -187,7 +187,7 @@ void PushingAsyncPipelineExecutor::push(Chunk chunk)
if (!is_pushed)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Pipeline for PushingPipelineExecutor was finished before all data was inserted");
"Pipeline for PushingAsyncPipelineExecutor was finished before all data was inserted");
}
void PushingAsyncPipelineExecutor::push(Block block)

View File

@ -1224,14 +1224,7 @@ void TCPHandler::receiveHello()
session = makeSession();
auto & client_info = session->getClientInfo();
/// Extract the last entry from comma separated list of forwarded_for addresses.
/// Only the last proxy can be trusted (if any).
String forwarded_address = client_info.getLastForwardedFor();
if (!forwarded_address.empty() && server.config().getBool("auth_use_forwarded_address", false))
session->authenticate(user, password, Poco::Net::SocketAddress(forwarded_address, socket().peerAddress().port()));
else
session->authenticate(user, password, socket().peerAddress());
session->authenticate(user, password, getClientAddress(client_info));
}
void TCPHandler::receiveAddendum()
@ -1522,11 +1515,16 @@ void TCPHandler::receiveQuery()
/// so we should not rely on that. However, in this particular case we got client_info from other clickhouse-server, so it's ok.
if (client_info.initial_user.empty())
{
LOG_DEBUG(log, "User (no user, interserver mode)");
LOG_DEBUG(log, "User (no user, interserver mode) (client: {})", getClientAddress(client_info).toString());
}
else
{
LOG_DEBUG(log, "User (initial, interserver mode): {}", client_info.initial_user);
LOG_DEBUG(log, "User (initial, interserver mode): {} (client: {})", client_info.initial_user, getClientAddress(client_info).toString());
/// In case of inter-server mode authorization is done with the
/// initial address of the client, not the real address from which
/// the query was come, since the real address is the address of
/// the initiator server, while we are interested in client's
/// address.
session->authenticate(AlwaysAllowCredentials{client_info.initial_user}, client_info.initial_address);
}
#else
@ -2012,4 +2010,15 @@ void TCPHandler::run()
}
}
Poco::Net::SocketAddress TCPHandler::getClientAddress(const ClientInfo & client_info)
{
/// Extract the last entry from comma separated list of forwarded_for addresses.
/// Only the last proxy can be trusted (if any).
String forwarded_address = client_info.getLastForwardedFor();
if (!forwarded_address.empty() && server.config().getBool("auth_use_forwarded_address", false))
return Poco::Net::SocketAddress(forwarded_address, socket().peerAddress().port());
else
return socket().peerAddress();
}
}

View File

@ -273,6 +273,8 @@ private:
/// This function is called from different threads.
void updateProgress(const Progress & value);
Poco::Net::SocketAddress getClientAddress(const ClientInfo & client_info);
};
}

View File

@ -383,6 +383,15 @@ NamesAndTypesList ColumnsDescription::getEphemeral() const
return ret;
}
NamesAndTypesList ColumnsDescription::getWithDefaultExpression() const
{
NamesAndTypesList ret;
for (const auto & col : columns)
if (col.default_desc.expression)
ret.emplace_back(col.name, col.type);
return ret;
}
NamesAndTypesList ColumnsDescription::getAll() const
{
NamesAndTypesList ret;

View File

@ -132,6 +132,9 @@ public:
NamesAndTypesList getInsertable() const; /// ordinary + ephemeral
NamesAndTypesList getAliases() const;
NamesAndTypesList getEphemeral() const;
// Columns with preset default expression.
// For example from `CREATE TABLE` statement
NamesAndTypesList getWithDefaultExpression() const;
NamesAndTypesList getAllPhysical() const; /// ordinary + materialized.
NamesAndTypesList getAll() const; /// ordinary + materialized + aliases + ephemeral
/// Returns .size0/.null/...

View File

@ -94,14 +94,15 @@ IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast
auto syntax = TreeRewriter(context).analyze(expr_list, columns.getAllPhysical());
result.expression = ExpressionAnalyzer(expr_list, syntax, context).getActions(true);
Block block_without_columns = result.expression->getSampleBlock();
result.sample_block = result.expression->getSampleBlock();
for (size_t i = 0; i < block_without_columns.columns(); ++i)
for (auto & elem : result.sample_block)
{
const auto & column = block_without_columns.getByPosition(i);
result.column_names.emplace_back(column.name);
result.data_types.emplace_back(column.type);
result.sample_block.insert(ColumnWithTypeAndName(column.type->createColumn(), column.type, column.name));
if (!elem.column)
elem.column = elem.type->createColumn();
result.column_names.push_back(elem.name);
result.data_types.push_back(elem.type);
}
const auto & definition_arguments = index_definition->type->arguments;

View File

@ -959,6 +959,11 @@ void registerStorageKafka(StorageFactory & factory)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "kafka_poll_max_batch_size can not be lower than 1");
}
if (args.columns.getOrdinary() != args.columns.getAll() || !args.columns.getWithDefaultExpression().empty())
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "KafkaEngine doesn't support DEFAULT/MATERIALIZED/EPHEMERAL/ALIAS expressions for columns. "
"See https://clickhouse.com/docs/en/engines/table-engines/integrations/kafka/#configuration");
}
return std::make_shared<StorageKafka>(args.table_id, args.getContext(), args.columns, std::move(kafka_settings), collection_name);
};

View File

@ -525,7 +525,6 @@ void MergeTreeData::checkProperties(
for (const auto & index : new_metadata.secondary_indices)
{
MergeTreeIndexFactory::instance().validate(index, attach);
if (indices_names.find(index.name) != indices_names.end())

View File

@ -35,6 +35,7 @@ MergeTreeIndexPtr MergeTreeIndexFactory::get(
{
auto it = creators.find(index.type);
if (it == creators.end())
{
throw Exception(ErrorCodes::INCORRECT_QUERY,
"Unknown Index type '{}'. Available index types: {}", index.type,
std::accumulate(creators.cbegin(), creators.cend(), std::string{},
@ -46,6 +47,7 @@ MergeTreeIndexPtr MergeTreeIndexFactory::get(
return left + ", " + right.first;
})
);
}
return it->second(index);
}
@ -61,8 +63,31 @@ MergeTreeIndices MergeTreeIndexFactory::getMany(const std::vector<IndexDescripti
void MergeTreeIndexFactory::validate(const IndexDescription & index, bool attach) const
{
/// Do not allow constant and non-deterministic expressions.
/// Do not throw on attach for compatibility.
if (!attach)
{
if (index.expression->hasArrayJoin())
throw Exception(ErrorCodes::INCORRECT_QUERY, "Secondary index '{}' cannot contain array joins", index.name);
try
{
index.expression->assertDeterministic();
}
catch (Exception & e)
{
e.addMessage(fmt::format("for secondary index '{}'", index.name));
throw;
}
for (const auto & elem : index.sample_block)
if (elem.column && (isColumnConst(*elem.column) || elem.column->isDummy()))
throw Exception(ErrorCodes::INCORRECT_QUERY, "Secondary index '{}' cannot contain constants", index.name);
}
auto it = validators.find(index.type);
if (it == validators.end())
{
throw Exception(ErrorCodes::INCORRECT_QUERY,
"Unknown Index type '{}'. Available index types: {}", index.type,
std::accumulate(
@ -77,6 +102,7 @@ void MergeTreeIndexFactory::validate(const IndexDescription & index, bool attach
return left + ", " + right.first;
})
);
}
it->second(index, attach);
}

View File

@ -64,8 +64,7 @@ void MergeTreeSettings::loadFromQuery(ASTStorage & storage_def, ContextPtr conte
auto ast = dynamic_cast<const FieldFromASTImpl &>(custom.getImpl()).ast;
if (ast && isDiskFunction(ast))
{
const auto & ast_function = assert_cast<const ASTFunction &>(*ast);
auto disk_name = getOrCreateDiskFromDiskAST(ast_function, context);
auto disk_name = getOrCreateDiskFromDiskAST(ast, context);
LOG_TRACE(&Poco::Logger::get("MergeTreeSettings"), "Created custom disk {}", disk_name);
value = disk_name;
}

View File

@ -8434,7 +8434,11 @@ std::pair<bool, NameSet> StorageReplicatedMergeTree::unlockSharedDataByID(
}
else if (error_code == Coordination::Error::ZNONODE)
{
LOG_TRACE(logger, "Node with parent zookeeper lock {} for part {} doesn't exist (part was unlocked before)", zookeeper_part_uniq_node, part_name);
/// We don't know what to do, because this part can be mutation part
/// with hardlinked columns. Since we don't have this information (about blobs not to remove)
/// we refuse to remove blobs.
LOG_WARNING(logger, "Node with parent zookeeper lock {} for part {} doesn't exist (part was unlocked before), refuse to remove blobs", zookeeper_part_uniq_node, part_name);
return {false, {}};
}
else
{

View File

@ -316,9 +316,12 @@ CI_CONFIG = {
"Integration tests flaky check (asan)": {
"required_build": "package_asan",
},
"Compatibility check": {
"Compatibility check (amd64)": {
"required_build": "package_release",
},
"Compatibility check (aarch64)": {
"required_build": "package_aarch64",
},
"Unit tests (release-clang)": {
"required_build": "binary_release",
},

View File

@ -2,6 +2,7 @@
from distutils.version import StrictVersion
from typing import List, Tuple
import argparse
import logging
import os
import subprocess
@ -28,9 +29,7 @@ from upload_result_helper import upload_results
IMAGE_UBUNTU = "clickhouse/test-old-ubuntu"
IMAGE_CENTOS = "clickhouse/test-old-centos"
MAX_GLIBC_VERSION = "2.4"
DOWNLOAD_RETRIES_COUNT = 5
CHECK_NAME = "Compatibility check"
def process_os_check(log_path: str) -> TestResult:
@ -43,7 +42,7 @@ def process_os_check(log_path: str) -> TestResult:
return TestResult(name, "OK")
def process_glibc_check(log_path: str) -> TestResults:
def process_glibc_check(log_path: str, max_glibc_version: str) -> TestResults:
test_results = [] # type: TestResults
with open(log_path, "r") as log:
for line in log:
@ -53,7 +52,7 @@ def process_glibc_check(log_path: str) -> TestResults:
_, version = symbol_with_glibc.split("@GLIBC_")
if version == "PRIVATE":
test_results.append(TestResult(symbol_with_glibc, "FAIL"))
elif StrictVersion(version) > MAX_GLIBC_VERSION:
elif StrictVersion(version) > max_glibc_version:
test_results.append(TestResult(symbol_with_glibc, "FAIL"))
if not test_results:
test_results.append(TestResult("glibc check", "OK"))
@ -61,18 +60,24 @@ def process_glibc_check(log_path: str) -> TestResults:
def process_result(
result_folder: str, server_log_folder: str
result_folder: str,
server_log_folder: str,
check_glibc: bool,
check_distributions: bool,
max_glibc_version: str,
) -> Tuple[str, str, TestResults, List[str]]:
glibc_log_path = os.path.join(result_folder, "glibc.log")
test_results = process_glibc_check(glibc_log_path)
test_results = process_glibc_check(glibc_log_path, max_glibc_version)
status = "success"
description = "Compatibility check passed"
if len(test_results) > 1 or test_results[0].status != "OK":
status = "failure"
description = "glibc check failed"
if status == "success":
if check_glibc:
if len(test_results) > 1 or test_results[0].status != "OK":
status = "failure"
description = "glibc check failed"
if status == "success" and check_distributions:
for operating_system in ("ubuntu:12.04", "centos:5"):
test_result = process_os_check(
os.path.join(result_folder, operating_system)
@ -101,13 +106,18 @@ def process_result(
return status, description, test_results, result_logs
def get_run_commands(
build_path, result_folder, server_log_folder, image_centos, image_ubuntu
):
def get_run_commands_glibc(build_path, result_folder):
return [
f"readelf -s --wide {build_path}/usr/bin/clickhouse | grep '@GLIBC_' > {result_folder}/glibc.log",
f"readelf -s --wide {build_path}/usr/bin/clickhouse-odbc-bridge | grep '@GLIBC_' >> {result_folder}/glibc.log",
f"readelf -s --wide {build_path}/usr/bin/clickhouse-library-bridge | grep '@GLIBC_' >> {result_folder}/glibc.log",
]
def get_run_commands_distributions(
build_path, result_folder, server_log_folder, image_centos, image_ubuntu
):
return [
f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse "
f"--volume={build_path}/etc/clickhouse-server:/config "
f"--volume={server_log_folder}:/var/log/clickhouse-server {image_ubuntu} > {result_folder}/ubuntu:12.04",
@ -117,9 +127,21 @@ def get_run_commands(
]
def parse_args():
parser = argparse.ArgumentParser("Check compatibility with old distributions")
parser.add_argument("--check-name", required=True)
parser.add_argument("--check-glibc", action="store_true")
parser.add_argument(
"--check-distributions", action="store_true"
) # currently hardcoded to x86, don't enable for ARM
return parser.parse_args()
def main():
logging.basicConfig(level=logging.INFO)
args = parse_args()
stopwatch = Stopwatch()
temp_path = TEMP_PATH
@ -129,13 +151,11 @@ def main():
gh = Github(get_best_robot_token(), per_page=100)
rerun_helper = RerunHelper(gh, pr_info, CHECK_NAME)
rerun_helper = RerunHelper(gh, pr_info, args.check_name)
if rerun_helper.is_already_finished_by_status():
logging.info("Check is already finished according to github status, exiting")
sys.exit(0)
docker_images = get_images_with_versions(reports_path, [IMAGE_CENTOS, IMAGE_UBUNTU])
packages_path = os.path.join(temp_path, "packages")
if not os.path.exists(packages_path):
os.makedirs(packages_path)
@ -145,7 +165,7 @@ def main():
"clickhouse-common-static_" in url or "clickhouse-server_" in url
)
download_builds_filter(CHECK_NAME, reports_path, packages_path, url_filter)
download_builds_filter(args.check_name, reports_path, packages_path, url_filter)
for f in os.listdir(packages_path):
if ".deb" in f:
@ -162,9 +182,24 @@ def main():
if not os.path.exists(result_path):
os.makedirs(result_path)
run_commands = get_run_commands(
packages_path, result_path, server_log_path, docker_images[0], docker_images[1]
)
run_commands = []
if args.check_glibc:
check_glibc_commands = get_run_commands_glibc(packages_path, result_path)
run_commands.extend(check_glibc_commands)
if args.check_distributions:
docker_images = get_images_with_versions(
reports_path, [IMAGE_CENTOS, IMAGE_UBUNTU]
)
check_distributions_commands = get_run_commands_distributions(
packages_path,
result_path,
server_log_path,
docker_images[0],
docker_images[1],
)
run_commands.extend(check_distributions_commands)
state = "success"
for run_command in run_commands:
@ -177,13 +212,26 @@ def main():
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
# See https://sourceware.org/glibc/wiki/Glibc%20Timeline
max_glibc_version = ""
if "amd64" in args.check_name:
max_glibc_version = "2.4"
elif "aarch64" in args.check_name:
max_glibc_version = "2.18" # because of build with newer sysroot?
else:
raise Exception("Can't determine max glibc version")
s3_helper = S3Helper()
state, description, test_results, additional_logs = process_result(
result_path, server_log_path
result_path,
server_log_path,
args.check_glibc,
args.check_distributions,
max_glibc_version,
)
ch_helper = ClickHouseHelper()
mark_flaky_tests(ch_helper, CHECK_NAME, test_results)
mark_flaky_tests(ch_helper, args.check_name, test_results)
report_url = upload_results(
s3_helper,
@ -191,10 +239,10 @@ def main():
pr_info.sha,
test_results,
additional_logs,
CHECK_NAME,
args.check_name,
)
print(f"::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, CHECK_NAME, description, state, report_url)
post_commit_status(gh, pr_info.sha, args.check_name, description, state, report_url)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
@ -203,7 +251,7 @@ def main():
stopwatch.duration_seconds,
stopwatch.start_time_str,
report_url,
CHECK_NAME,
args.check_name,
)
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)

View File

@ -294,6 +294,65 @@ def test_merge_tree_custom_disk_setting(start_cluster):
).strip()
)
node1.query(f"DROP TABLE {TABLE_NAME} SYNC")
node1.query(f"DROP TABLE {TABLE_NAME}_2 SYNC")
node1.query(f"DROP TABLE {TABLE_NAME}_3 SYNC")
node1.query(f"DROP TABLE {TABLE_NAME}_4 SYNC")
node2.query(f"DROP TABLE {TABLE_NAME}_4 SYNC")
def test_merge_tree_nested_custom_disk_setting(start_cluster):
node = cluster.instances["node1"]
minio = cluster.minio_client
for obj in list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)):
minio.remove_object(cluster.minio_bucket, obj.object_name)
assert (
len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
== 0
)
node.query(
f"""
DROP TABLE IF EXISTS {TABLE_NAME} SYNC;
CREATE TABLE {TABLE_NAME} (a Int32)
ENGINE = MergeTree() order by tuple()
SETTINGS disk = disk(
type=cache,
max_size='1Gi',
path='/var/lib/clickhouse/custom_disk_cache/',
disk=disk(
type=s3,
endpoint='http://minio1:9001/root/data/',
access_key_id='minio',
secret_access_key='minio123'));
"""
)
node.query(f"INSERT INTO {TABLE_NAME} SELECT number FROM numbers(100)")
node.query("SYSTEM DROP FILESYSTEM CACHE")
# Check cache is filled
assert 0 == int(node.query("SELECT count() FROM system.filesystem_cache"))
assert 100 == int(node.query(f"SELECT count() FROM {TABLE_NAME}"))
node.query(f"SELECT * FROM {TABLE_NAME}")
assert 0 < int(node.query("SELECT count() FROM system.filesystem_cache"))
# Check s3 is filled
assert (
len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) > 0
)
node.restart_clickhouse()
assert 100 == int(node.query(f"SELECT count() FROM {TABLE_NAME}"))
expected = """
SETTINGS disk = disk(type = cache, max_size = \\'[HIDDEN]\\', path = \\'[HIDDEN]\\', disk = disk(type = s3, endpoint = \\'[HIDDEN]\\'
"""
assert expected.strip() in node.query(f"SHOW CREATE TABLE {TABLE_NAME}").strip()
node.query(f"DROP TABLE {TABLE_NAME} SYNC")
def test_merge_tree_setting_override(start_cluster):
node = cluster.instances["node3"]
@ -367,3 +426,4 @@ def test_merge_tree_setting_override(start_cluster):
assert (
len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) > 0
)
node.query(f"DROP TABLE {TABLE_NAME} SYNC")

View File

@ -44,8 +44,6 @@ def test_file_path_escaping(started_cluster):
]
)
def test_file_path_escaping_atomic_db(started_cluster):
node.query("CREATE DATABASE IF NOT EXISTS `test 2` ENGINE = Atomic")
node.query(
"""

View File

@ -594,8 +594,6 @@ def test_cancel_while_processing_input():
stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel)
result = stub.ExecuteQueryWithStreamInput(send_query_info())
assert result.cancelled == True
assert result.progress.written_rows == 6
assert query("SELECT a FROM t ORDER BY a") == "1\n2\n3\n4\n5\n6\n"
def test_cancel_while_generating_output():

View File

@ -679,3 +679,44 @@ def test_cmd_rqld(started_cluster):
+ " does not become leader after 30s, maybe there is something wrong."
)
assert keeper_utils.is_leader(cluster, node)
def test_cmd_clrs(started_cluster):
if node1.is_built_with_sanitizer():
return
def get_memory_purges():
return node1.query(
"SELECT value FROM system.events WHERE event = 'MemoryAllocatorPurge' SETTINGS system_events_show_zero_values = 1"
)
zk = None
try:
wait_nodes()
zk = get_fake_zk(node1.name, timeout=30.0)
paths = [f"/clrs_{i}" for i in range(10000)]
# we only count the events because we cannot reliably test memory usage of Keeper
# but let's create and delete nodes so the first purge needs to release some memory
create_transaction = zk.transaction()
for path in paths:
create_transaction.create(path)
create_transaction.commit()
delete_transaction = zk.transaction()
for path in paths:
delete_transaction.delete(path)
delete_transaction.commit()
# repeat multiple times to make sure MemoryAllocatorPurge isn't increased because of other reasons
for _ in range(5):
prev_purges = int(get_memory_purges())
keeper_utils.send_4lw_cmd(cluster, node1, cmd="clrs")
current_purges = int(get_memory_purges())
assert current_purges > prev_purges
prev_purges = current_purges
finally:
destroy_zk_client(zk)

View File

@ -285,6 +285,56 @@ def avro_confluent_message(schema_registry_client, value):
# Tests
def test_kafka_prohibited_column_types(kafka_cluster):
def assert_returned_exception(e):
assert e.value.returncode == 36
assert (
"KafkaEngine doesn't support DEFAULT/MATERIALIZED/EPHEMERAL/ALIAS expressions for columns."
in str(e.value)
)
# check column with DEFAULT expression
with pytest.raises(QueryRuntimeException) as exception:
instance.query(
"""
CREATE TABLE test.kafka (a Int, b Int DEFAULT 0)
ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_new}', '{kafka_group_name_new}', '{kafka_format_json_each_row}', '\\n')
"""
)
assert_returned_exception(exception)
# check EPHEMERAL
with pytest.raises(QueryRuntimeException) as exception:
instance.query(
"""
CREATE TABLE test.kafka (a Int, b Int EPHEMERAL)
ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_new}', '{kafka_group_name_new}', '{kafka_format_json_each_row}', '\\n')
"""
)
assert_returned_exception(exception)
# check ALIAS
with pytest.raises(QueryRuntimeException) as exception:
instance.query(
"""
CREATE TABLE test.kafka (a Int, b String Alias toString(a))
ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_new}', '{kafka_group_name_new}', '{kafka_format_json_each_row}', '\\n')
"""
)
assert_returned_exception(exception)
# check MATERIALIZED
# check ALIAS
with pytest.raises(QueryRuntimeException) as exception:
instance.query(
"""
CREATE TABLE test.kafka (a Int, b String MATERIALIZED toString(a))
ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_new}', '{kafka_group_name_new}', '{kafka_format_json_each_row}', '\\n')
"""
)
assert_returned_exception(exception)
def test_kafka_settings_old_syntax(kafka_cluster):
assert TSV(
instance.query(

View File

@ -1,2 +1,2 @@
CREATE TABLE default.x\n(\n `i` Int32,\n INDEX mm rand() TYPE minmax GRANULARITY 1,\n INDEX nn rand() TYPE minmax GRANULARITY 1,\n PROJECTION p\n (\n SELECT max(i)\n ),\n PROJECTION p2\n (\n SELECT min(i)\n )\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/x\', \'r\')\nORDER BY i\nSETTINGS index_granularity = 8192
metadata format version: 1\ndate column: \nsampling expression: \nindex granularity: 8192\nmode: 0\nsign column: \nprimary key: i\ndata format version: 1\npartition key: \nindices: mm rand() TYPE minmax GRANULARITY 1, nn rand() TYPE minmax GRANULARITY 1\nprojections: p (SELECT max(i)), p2 (SELECT min(i))\ngranularity bytes: 10485760\n
CREATE TABLE default.x\n(\n `i` Int32,\n INDEX mm log2(i) TYPE minmax GRANULARITY 1,\n INDEX nn log2(i) TYPE minmax GRANULARITY 1,\n PROJECTION p\n (\n SELECT max(i)\n ),\n PROJECTION p2\n (\n SELECT min(i)\n )\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/x\', \'r\')\nORDER BY i\nSETTINGS index_granularity = 8192
metadata format version: 1\ndate column: \nsampling expression: \nindex granularity: 8192\nmode: 0\nsign column: \nprimary key: i\ndata format version: 1\npartition key: \nindices: mm log2(i) TYPE minmax GRANULARITY 1, nn log2(i) TYPE minmax GRANULARITY 1\nprojections: p (SELECT max(i)), p2 (SELECT min(i))\ngranularity bytes: 10485760\n

View File

@ -2,9 +2,9 @@
drop table if exists x;
create table x(i int, index mm RAND() type minmax granularity 1, projection p (select MAX(i))) engine ReplicatedMergeTree('/clickhouse/tables/{database}/x', 'r') order by i;
create table x(i int, index mm LOG2(i) type minmax granularity 1, projection p (select MAX(i))) engine ReplicatedMergeTree('/clickhouse/tables/{database}/x', 'r') order by i;
alter table x add index nn RAND() type minmax granularity 1, add projection p2 (select MIN(i));
alter table x add index nn LOG2(i) type minmax granularity 1, add projection p2 (select MIN(i));
show create x;

View File

@ -49,7 +49,16 @@ insert_client_opts=(
timeout 250s $CLICKHOUSE_CLIENT "${client_opts[@]}" "${insert_client_opts[@]}" -q "insert into function remote('127.2', currentDatabase(), in_02232) select * from numbers(1e6)"
# Kill underlying query of remote() to make KILL faster
timeout 60s $CLICKHOUSE_CLIENT "${client_opts[@]}" -q "KILL QUERY WHERE Settings['log_comment'] = '$CLICKHOUSE_LOG_COMMENT' SYNC" --format Null
# This test is reproducing very interesting bahaviour.
# The block size is 1, so the secondary query creates InterpreterSelectQuery for each row due to pushing to the MV.
# It works extremely slow, and the initial query produces new blocks and writes them to the socket much faster
# then the secondary query can read and process them. Therefore, it fills network buffers in the kernel.
# Once a buffer in the kernel is full, send(...) blocks until the secondary query will finish processing data
# that it already has in ReadBufferFromPocoSocket and call recv.
# Or until the kernel will decide to resize the buffer (seems like it has non-trivial rules for that).
# Anyway, it may look like the initial query got stuck, but actually it did not.
# Moreover, the initial query cannot be killed at that point, so KILL QUERY ... SYNC will get "stuck" as well.
timeout 30s $CLICKHOUSE_CLIENT "${client_opts[@]}" -q "KILL QUERY WHERE query like '%INSERT INTO $CLICKHOUSE_DATABASE.in_02232%' SYNC" --format Null
echo $?
$CLICKHOUSE_CLIENT "${client_opts[@]}" -nm -q "

View File

@ -6,7 +6,7 @@ Arrow
2001:db8:11a3:9d7:1f34:8a2e:7a0:765d 127.0.0.1
Parquet
ipv6 Nullable(FixedString(16))
ipv4 Nullable(Int64)
ipv4 Nullable(UInt32)
2001:db8:11a3:9d7:1f34:8a2e:7a0:765d 127.0.0.1
ORC
ipv6 Nullable(String)

View File

@ -0,0 +1,25 @@
DROP TABLE IF EXISTS t_constant_index;
CREATE TABLE t_constant_index
(
id UInt64,
INDEX t_constant_index 'foo' TYPE set(2) GRANULARITY 1
) ENGINE = MergeTree
ORDER BY id; -- { serverError INCORRECT_QUERY }
CREATE TABLE t_constant_index
(
id UInt64,
INDEX t_constant_index id + rand() TYPE set(2) GRANULARITY 1
) ENGINE = MergeTree
ORDER BY id; -- { serverError BAD_ARGUMENTS }
CREATE TABLE t_constant_index
(
id UInt64,
INDEX t_constant_index id * 2 TYPE set(2) GRANULARITY 1
) ENGINE = MergeTree
ORDER BY id;
DROP TABLE t_constant_index;

View File

@ -0,0 +1,47 @@
-- { echoOn }
set allow_experimental_analyzer=1;
set optimize_trivial_count_query=1;
create table m3(a Int64, b UInt64) Engine=MergeTree order by tuple();
select count() from m3;
0
insert into m3 values (0,0);
insert into m3 values (-1,1);
select trimBoth(explain) from (explain select count() from m3) where explain like '%ReadFromPreparedSource (Optimized trivial count)%';
ReadFromPreparedSource (Optimized trivial count)
select count() from m3;
2
select count(*) from m3;
2
select count(a) from m3;
2
select count(b) from m3;
2
select count() + 1 from m3;
3
drop table m3;
-- checking queries with FINAL
create table replacing_m3(a Int64, b UInt64) Engine=ReplacingMergeTree() order by (a, b);
SYSTEM STOP MERGES replacing_m3;
select count() from replacing_m3;
0
insert into replacing_m3 values (0,0);
insert into replacing_m3 values (0,0);
insert into replacing_m3 values (-1,1);
insert into replacing_m3 values (-2,2);
select trimBoth(explain) from (explain select count() from replacing_m3) where explain like '%ReadFromPreparedSource (Optimized trivial count)%';
ReadFromPreparedSource (Optimized trivial count)
select count() from replacing_m3;
4
select count(*) from replacing_m3;
4
select count(a) from replacing_m3;
4
select count(b) from replacing_m3;
4
select count() from replacing_m3 FINAL;
3
select count(a) from replacing_m3 FINAL;
3
select count(b) from replacing_m3 FINAL;
3
drop table replacing_m3;

View File

@ -0,0 +1,45 @@
drop table if exists m3;
drop table if exists replacing_m3;
-- { echoOn }
set allow_experimental_analyzer=1;
set optimize_trivial_count_query=1;
create table m3(a Int64, b UInt64) Engine=MergeTree order by tuple();
select count() from m3;
insert into m3 values (0,0);
insert into m3 values (-1,1);
select trimBoth(explain) from (explain select count() from m3) where explain like '%ReadFromPreparedSource (Optimized trivial count)%';
select count() from m3;
select count(*) from m3;
select count(a) from m3;
select count(b) from m3;
select count() + 1 from m3;
drop table m3;
-- checking queries with FINAL
create table replacing_m3(a Int64, b UInt64) Engine=ReplacingMergeTree() order by (a, b);
SYSTEM STOP MERGES replacing_m3;
select count() from replacing_m3;
insert into replacing_m3 values (0,0);
insert into replacing_m3 values (0,0);
insert into replacing_m3 values (-1,1);
insert into replacing_m3 values (-2,2);
select trimBoth(explain) from (explain select count() from replacing_m3) where explain like '%ReadFromPreparedSource (Optimized trivial count)%';
select count() from replacing_m3;
select count(*) from replacing_m3;
select count(a) from replacing_m3;
select count(b) from replacing_m3;
select count() from replacing_m3 FINAL;
select count(a) from replacing_m3 FINAL;
select count(b) from replacing_m3 FINAL;
drop table replacing_m3;