Merge branch 'master' into delay-loading-of-named-collections

This commit is contained in:
Kseniia Sumarokova 2023-02-21 00:24:18 +01:00 committed by GitHub
commit 24d0490ef3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
370 changed files with 8252 additions and 3465 deletions

View File

@ -549,7 +549,7 @@ jobs:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
InstallPackagesTestAarch64:
needs: [BuilderDebRelease]
needs: [BuilderDebAarch64]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Set envs

View File

@ -983,7 +983,7 @@ jobs:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
InstallPackagesTestAarch64:
needs: [BuilderDebRelease]
needs: [BuilderDebAarch64]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Set envs

View File

@ -1021,7 +1021,7 @@ jobs:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
InstallPackagesTestAarch64:
needs: [BuilderDebRelease]
needs: [BuilderDebAarch64]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Set envs

View File

@ -641,7 +641,7 @@ jobs:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
InstallPackagesTestAarch64:
needs: [BuilderDebRelease]
needs: [BuilderDebAarch64]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Set envs

View File

@ -12,21 +12,22 @@
template <typename To, typename From>
std::decay_t<To> bit_cast(const From & from)
{
/**
* Assume the source value is 0xAABBCCDD (i.e. sizeof(from) == 4).
* Its BE representation is 0xAABBCCDD, the LE representation is 0xDDCCBBAA.
* Further assume, sizeof(res) == 8 and that res is initially zeroed out.
* With LE, the result after bit_cast will be 0xDDCCBBAA00000000 --> input value == output value.
* With BE, the result after bit_cast will be 0x00000000AABBCCDD --> input value == output value.
*/
/** Assume the source value is 0xAABBCCDD (i.e. sizeof(from) == 4).
* Its BE representation is 0xAABBCCDD, the LE representation is 0xDDCCBBAA.
* Further assume, sizeof(res) == 8 and that res is initially zeroed out.
* With LE, the result after bit_cast will be 0xDDCCBBAA00000000 --> input value == output value.
* With BE, the result after bit_cast will be 0x00000000AABBCCDD --> input value == output value.
*/
To res {};
if constexpr (std::endian::native == std::endian::little)
memcpy(static_cast<void*>(&res), &from, std::min(sizeof(res), sizeof(from)));
{
memcpy(static_cast<void*>(&res), &from, std::min(sizeof(res), sizeof(from)));
}
else
{
uint32_t offset_to = (sizeof(res) > sizeof(from)) ? (sizeof(res) - sizeof(from)) : 0;
uint32_t offset_from = (sizeof(from) > sizeof(res)) ? (sizeof(from) - sizeof(res)) : 0;
memcpy(reinterpret_cast<char *>(&res) + offset_to, reinterpret_cast<const char *>(&from) + offset_from, std::min(sizeof(res), sizeof(from)));
uint32_t offset_to = (sizeof(res) > sizeof(from)) ? (sizeof(res) - sizeof(from)) : 0;
uint32_t offset_from = (sizeof(from) > sizeof(res)) ? (sizeof(from) - sizeof(res)) : 0;
memcpy(reinterpret_cast<char *>(&res) + offset_to, reinterpret_cast<const char *>(&from) + offset_from, std::min(sizeof(res), sizeof(from)));
}
return res;
}

View File

@ -13,11 +13,7 @@ using char8_t = unsigned char;
#endif
/// This is needed for more strict aliasing. https://godbolt.org/z/xpJBSb https://stackoverflow.com/a/57453713
#if !defined(PVS_STUDIO) /// But PVS-Studio does not treat it correctly.
using UInt8 = char8_t;
#else
using UInt8 = uint8_t;
#endif
using UInt16 = uint16_t;
using UInt32 = uint32_t;

View File

@ -78,9 +78,6 @@
*
*/
// Disable warnings by PVS-Studio
//-V::GA
static const double
pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
a0 = 7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */

View File

@ -85,9 +85,6 @@
*
*/
// Disable warnings by PVS-Studio
//-V::GA
#include <stdint.h>
#include <math.h>
#include "libm.h"

View File

@ -155,7 +155,7 @@ static inline long double fp_barrierl(long double x)
static inline void fp_force_evalf(float x)
{
volatile float y;
y = x; //-V1001
y = x;
}
#endif
@ -164,7 +164,7 @@ static inline void fp_force_evalf(float x)
static inline void fp_force_eval(double x)
{
volatile double y;
y = x; //-V1001
y = x;
}
#endif
@ -173,7 +173,7 @@ static inline void fp_force_eval(double x)
static inline void fp_force_evall(long double x)
{
volatile long double y;
y = x; //-V1001
y = x;
}
#endif

View File

@ -3,9 +3,6 @@
* SPDX-License-Identifier: MIT
*/
// Disable warnings by PVS-Studio
//-V::GA
#include <math.h>
#include <stdint.h>
#include "libm.h"

View File

@ -455,7 +455,7 @@ auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound)
typedef typename RngType::result_type rtype;
rtype threshold = (RngType::max() - RngType::min() + rtype(1) - upper_bound)
% upper_bound;
for (;;) { //-V1044
for (;;) {
rtype r = rng() - RngType::min();
if (r >= threshold)
return r % upper_bound;

View File

@ -930,7 +930,7 @@ struct rxs_m_xs_mixin {
constexpr bitcount_t shift = bits - xtypebits;
constexpr bitcount_t mask = (1 << opbits) - 1;
bitcount_t rshift =
opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; //-V547
opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0;
internal ^= internal >> (opbits + rshift);
internal *= mcg_multiplier<itype>::multiplier();
xtype result = internal >> shift;
@ -952,7 +952,7 @@ struct rxs_m_xs_mixin {
internal *= mcg_unmultiplier<itype>::unmultiplier();
bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0; //-V547
bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0;
internal = unxorshift(internal, bits, opbits + rshift);
return internal;
@ -977,7 +977,7 @@ struct rxs_m_mixin {
: 2;
constexpr bitcount_t shift = bits - xtypebits;
constexpr bitcount_t mask = (1 << opbits) - 1;
bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0; //-V547
bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0;
internal ^= internal >> (opbits + rshift);
internal *= mcg_multiplier<itype>::multiplier();
xtype result = internal >> shift;
@ -1368,7 +1368,7 @@ void extended<table_pow2,advance_pow2,baseclass,extvalclass,kdd>::selfinit()
// - any strange correlations would only be apparent if we
// were to backstep the generator so that the base generator
// was generating the same values again
result_type xdiff = baseclass::operator()() - baseclass::operator()(); //-V501
result_type xdiff = baseclass::operator()() - baseclass::operator()();
for (size_t i = 0; i < table_size; ++i) {
data_[i] = baseclass::operator()() ^ xdiff;
}

View File

@ -113,7 +113,7 @@ public:
}
std::size_t diff = curSize - _size;
Iterator it = --_keys.end(); //--keys can never be invoked on an empty list due to the minSize==1 requirement of LRU
Iterator it = --_keys.end(); /// --keys can never be invoked on an empty list due to the minSize==1 requirement of LRU
std::size_t i = 0;
while (i++ < diff)

View File

@ -538,7 +538,7 @@ unset IFS
numactl --show
numactl --cpunodebind=all --membind=all numactl --show
# Use less jobs to avoid OOM. Some queries can consume 8+ GB of memory.
jobs_count=$(($(grep -c ^processor /proc/cpuinfo) / 3))
jobs_count=$(($(grep -c ^processor /proc/cpuinfo) / 4))
numactl --cpunodebind=all --membind=all parallel --jobs $jobs_count --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log
clickhouse-local --query "

View File

@ -43,11 +43,6 @@ Tries to build the ClickHouse documentation website. It can fail if you changed
something in the documentation. Most probable reason is that some cross-link in
the documentation is wrong. Go to the check report and look for `ERROR` and `WARNING` messages.
### Report Details
- [Status page example](https://clickhouse-test-reports.s3.yandex.net/12550/eabcc293eb02214caa6826b7c15f101643f67a6b/docs_check.html)
- `docs_output.txt` contains the building log. [Successful result example](https://clickhouse-test-reports.s3.yandex.net/12550/eabcc293eb02214caa6826b7c15f101643f67a6b/docs_check/docs_output.txt)
## Description Check
@ -72,10 +67,6 @@ This check means that the CI system started to process the pull request. When it
Performs some simple regex-based checks of code style, using the [`utils/check-style/check-style`](https://github.com/ClickHouse/ClickHouse/blob/master/utils/check-style/check-style) binary (note that it can be run locally).
If it fails, fix the style errors following the [code style guide](style.md).
### Report Details
- [Status page example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html)
- `output.txt` contains the check resulting errors (invalid tabulation etc), blank page means no errors. [Successful result example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt).
## Fast Test
Normally this is the first check that is ran for a PR. It builds ClickHouse and
@ -84,8 +75,6 @@ some. If it fails, further checks are not started until it is fixed. Look at
the report to see which tests fail, then reproduce the failure locally as
described [here](tests.md#functional-test-locally).
### Report Details
[Status page example](https://clickhouse-test-reports.s3.yandex.net/12550/67d716b5cc3987801996c31a67b31bf141bc3486/fast_test.html)
#### Status Page Files
- `runlog.out.log` is the general log that includes all other logs.
@ -113,9 +102,7 @@ Builds ClickHouse in various configurations for use in further steps. You have t
### Report Details
[Status page example](https://clickhouse-builds.s3.yandex.net/12550/67d716b5cc3987801996c31a67b31bf141bc3486/clickhouse_build_check/report.html).
- **Compiler**: `gcc-9` or `clang-10` (or `clang-10-xx` for other architectures e.g. `clang-10-freebsd`).
- **Compiler**: `clang-15`, optionally with the name of a target platform
- **Build type**: `Debug` or `RelWithDebInfo` (cmake).
- **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan).
- **Status**: `success` or `fail`

View File

@ -8,7 +8,7 @@ sidebar_position: 60
Creates a ClickHouse database with tables from PostgreSQL database. Firstly, database with engine `MaterializedPostgreSQL` creates a snapshot of PostgreSQL database and loads required tables. Required tables can include any subset of tables from any subset of schemas from specified database. Along with the snapshot database engine acquires LSN and once initial dump of tables is performed - it starts pulling updates from WAL. After database is created, newly added tables to PostgreSQL database are not automatically added to replication. They have to be added manually with `ATTACH TABLE db.table` query.
Replication is implemented with PostgreSQL Logical Replication Protocol, which does not allow to replicate DDL, but allows to know whether replication breaking changes happened (column type changes, adding/removing columns). Such changes are detected and according tables stop receiving updates. Such tables can be automatically reloaded in the background in case required setting is turned on (can be used starting from 22.1). Safest way for now is to use `ATTACH`/ `DETACH` queries to reload table completely. If DDL does not break replication (for example, renaming a column) table will still receive updates (insertion is done by position).
Replication is implemented with PostgreSQL Logical Replication Protocol, which does not allow to replicate DDL, but allows to know whether replication breaking changes happened (column type changes, adding/removing columns). Such changes are detected and according tables stop receiving updates. In this case you should use `ATTACH`/ `DETACH` queries to reload table completely. If DDL does not break replication (for example, renaming a column) table will still receive updates (insertion is done by position).
:::note
This database engine is experimental. To use it, set `allow_experimental_database_materialized_postgresql` to 1 in your configuration files or by using the `SET` command:
@ -165,19 +165,6 @@ Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.htm
Default value: empty list. (Default schema is used)
### `materialized_postgresql_allow_automatic_update` {#materialized-postgresql-allow-automatic-update}
Do not use this setting before 22.1 version.
Allows reloading table in the background, when schema changes are detected. DDL queries on the PostgreSQL side are not replicated via ClickHouse [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) engine, because it is not allowed with PostgreSQL logical replication protocol, but the fact of DDL changes is detected transactionally. In this case, the default behaviour is to stop replicating those tables once DDL is detected. However, if this setting is enabled, then, instead of stopping the replication of those tables, they will be reloaded in the background via database snapshot without data losses and replication will continue for them.
Possible values:
- 0 — The table is not automatically updated in the background, when schema changes are detected.
- 1 — The table is automatically updated in the background, when schema changes are detected.
Default value: `0`.
### `materialized_postgresql_max_block_size` {#materialized-postgresql-max-block-size}
Sets the number of rows collected in memory before flushing data into PostgreSQL database table.

View File

@ -7,7 +7,7 @@ sidebar_label: Custom Partitioning Key
# Custom Partitioning Key
:::warning
In most cases you do not need a partition key, and in most other cases you do not need a partition key more granular than by months. Partitioning does not speed up queries (in contrast to the ORDER BY expression).
In most cases you do not need a partition key, and in most other cases you do not need a partition key more granular than by months.
You should never use too granular of partitioning. Don't partition your data by client identifiers or names. Instead, make a client identifier or name the first column in the ORDER BY expression.
:::
@ -133,3 +133,48 @@ The `detached` directory contains parts that were detached from the table using
Note that on the operating server, you cannot manually change the set of parts or their data on the file system, since the server will not know about it. For non-replicated tables, you can do this when the server is stopped, but it isnt recommended. For replicated tables, the set of parts cannot be changed in any case.
ClickHouse allows you to perform operations with the partitions: delete them, copy from one table to another, or create a backup. See the list of all operations in the section [Manipulations With Partitions and Parts](../../../sql-reference/statements/alter/partition.md#alter_manipulations-with-partitions).
## Group By optimisation using partition key
For some combinations of table's partition key and query's group by key it might be possible to execute aggregation for each partition independently.
Then we'll not have to merge partially aggregated data from all execution threads at the end,
because we provided with the guarantee that each group by key value cannot appear in working sets of two different threads.
The typical example is:
``` sql
CREATE TABLE session_log
(
UserID UInt64,
SessionID UUID
)
ENGINE = MergeTree
PARTITION BY sipHash64(UserID) % 16
ORDER BY tuple();
SELECT
UserID,
COUNT()
FROM session_log
GROUP BY UserID;
```
:::warning
Performance of such a query heavily depends on the table layout. Because of that the optimisation is not enabled by default.
:::
The key factors for a good performance:
- number of partitions involved in the query should be sufficiently large (more than `max_threads / 2`), otherwise query will underutilize the machine
- partitions shouldn't be too small, so batch processing won't degenerate into row-by-row processing
- partitions should be comparable in size, so all threads will do roughly the same amount of work
:::info
It's recommended to apply some hash function to columns in `partition by` clause in order to distribute data evenly between partitions.
:::
Relevant settings are:
- `allow_aggregate_partitions_independently` - controls if the use of optimisation is enabled
- `force_aggregate_partitions_independently` - forces its use when it's applicable from the correctness standpoint, but getting disabled by internal logic that estimates its expediency
- `max_number_of_partitions_for_independent_aggregation` - hard limit on the maximal number of partitions table could have

View File

@ -130,10 +130,57 @@ then this nested table is interpreted as a mapping of `key => (values...)`, and
Examples:
``` text
[(1, 100)] + [(2, 150)] -> [(1, 100), (2, 150)]
[(1, 100)] + [(1, 150)] -> [(1, 250)]
[(1, 100)] + [(1, 150), (2, 150)] -> [(1, 250), (2, 150)]
[(1, 100), (2, 150)] + [(1, -100)] -> [(2, 150)]
DROP TABLE IF EXISTS nested_sum;
CREATE TABLE nested_sum
(
date Date,
site UInt32,
hitsMap Nested(
browser String,
imps UInt32,
clicks UInt32
)
) ENGINE = SummingMergeTree
PRIMARY KEY (date, site);
INSERT INTO nested_sum VALUES ('2020-01-01', 12, ['Firefox', 'Opera'], [10, 5], [2, 1]);
INSERT INTO nested_sum VALUES ('2020-01-01', 12, ['Chrome', 'Firefox'], [20, 1], [1, 1]);
INSERT INTO nested_sum VALUES ('2020-01-01', 12, ['IE'], [22], [0]);
INSERT INTO nested_sum VALUES ('2020-01-01', 10, ['Chrome'], [4], [3]);
OPTIMIZE TABLE nested_sum FINAL; -- emulate merge
SELECT * FROM nested_sum;
┌───────date─┬─site─┬─hitsMap.browser───────────────────┬─hitsMap.imps─┬─hitsMap.clicks─┐
│ 2020-01-01 │ 10 │ ['Chrome'] │ [4] │ [3] │
│ 2020-01-01 │ 12 │ ['Chrome','Firefox','IE','Opera'] │ [20,11,22,5] │ [1,3,0,1] │
└────────────┴──────┴───────────────────────────────────┴──────────────┴────────────────┘
SELECT
site,
browser,
impressions,
clicks
FROM
(
SELECT
site,
sumMap(hitsMap.browser, hitsMap.imps, hitsMap.clicks) AS imps_map
FROM nested_sum
GROUP BY site
)
ARRAY JOIN
imps_map.1 AS browser,
imps_map.2 AS impressions,
imps_map.3 AS clicks;
┌─site─┬─browser─┬─impressions─┬─clicks─┐
│ 12 │ Chrome │ 20 │ 1 │
│ 12 │ Firefox │ 11 │ 3 │
│ 12 │ IE │ 22 │ 0 │
│ 12 │ Opera │ 5 │ 1 │
│ 10 │ Chrome │ 4 │ 3 │
└──────┴─────────┴─────────────┴────────┘
```
When requesting data, use the [sumMap(key, value)](../../../sql-reference/aggregate-functions/reference/summap.md) function for aggregation of `Map`.

View File

@ -5,27 +5,62 @@ sidebar_label: WikiStat
# WikiStat
See http://dumps.wikimedia.org/other/pagecounts-raw/ for details.
The dataset contains 0.5 trillion records.
See the video from FOSDEM 2023: https://www.youtube.com/watch?v=JlcI2Vfz_uk
And the presentation: https://presentations.clickhouse.com/fosdem2023/
Data source: https://dumps.wikimedia.org/other/pageviews/
Getting the list of links:
```
for i in {2015..2023}; do
for j in {01..12}; do
echo "${i}-${j}" >&2
curl -sSL "https://dumps.wikimedia.org/other/pageviews/$i/$i-$j/" \
| grep -oE 'pageviews-[0-9]+-[0-9]+\.gz'
done
done | sort | uniq | tee links.txt
```
Downloading the data:
```
sed -r 's!pageviews-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz!https://dumps.wikimedia.org/other/pageviews/\1/\1-\2/\0!' \
links.txt | xargs -P3 wget --continue
```
(it will take about 3 days)
Creating a table:
``` sql
CREATE TABLE wikistat
(
date Date,
time DateTime,
project String,
subproject String,
path String,
hits UInt64,
size UInt64
) ENGINE = MergeTree(date, (path, time), 8192);
time DateTime CODEC(Delta, ZSTD(3)),
project LowCardinality(String),
subproject LowCardinality(String),
path String CODEC(ZSTD(3)),
hits UInt64 CODEC(ZSTD(3))
)
ENGINE = MergeTree
ORDER BY (path, time);
```
Loading data:
Loading the data:
``` bash
$ for i in {2007..2016}; do for j in {01..12}; do echo $i-$j >&2; curl -sSL "http://dumps.wikimedia.org/other/pagecounts-raw/$i/$i-$j/" | grep -oE 'pagecounts-[0-9]+-[0-9]+\.gz'; done; done | sort | uniq | tee links.txt
$ cat links.txt | while read link; do wget http://dumps.wikimedia.org/other/pagecounts-raw/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1/')/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1-\2/')/$link; done
$ ls -1 /opt/wikistat/ | grep gz | while read i; do echo $i; gzip -cd /opt/wikistat/$i | ./wikistat-loader --time="$(echo -n $i | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})([0-9]{2})-([0-9]{2})([0-9]{2})([0-9]{2})\.gz/\1-\2-\3 \4-00-00/')" | clickhouse-client --query="INSERT INTO wikistat FORMAT TabSeparated"; done
```
clickhouse-local --query "
WITH replaceRegexpOne(_path, '^.+pageviews-(\\d{4})(\\d{2})(\\d{2})-(\\d{2})(\\d{2})(\\d{2}).gz$', '\1-\2-\3 \4-\5-\6')::DateTime AS time,
extractGroups(line, '^([^ \\.]+)(\\.[^ ]+)? +([^ ]+) +(\\d+) +(\\d+)$') AS values
SELECT
time,
values[1] AS project,
values[2] AS subproject,
values[3] AS path,
(values[4])::UInt64 AS hits,
(values[5])::UInt64 AS size
FROM file('pageviews*.gz', LineAsString)
WHERE length(values) = 5 FORMAT Native
" | clickhouse-client --query "INSERT INTO wikistat FORMAT Native"
```

View File

@ -164,17 +164,48 @@ If you need to install specific version of ClickHouse you have to install all pa
It is recommended to use official pre-compiled `rpm` packages for CentOS, RedHat, and all other rpm-based Linux distributions.
#### Setup the RPM repository
First, you need to add the official repository:
``` bash
sudo yum install -y yum-utils
sudo yum-config-manager --add-repo https://packages.clickhouse.com/rpm/clickhouse.repo
sudo yum install -y clickhouse-server clickhouse-client
```
sudo /etc/init.d/clickhouse-server start
#### Install ClickHouse server and client
```bash
sudo yum install -y clickhouse-server clickhouse-client
```
#### Start ClickHouse server
```bash
sudo systemctl enable clickhouse-server
sudo systemctl start clickhouse-server
sudo systemctl status clickhouse-server
clickhouse-client # or "clickhouse-client --password" if you set up a password.
```
#### Install standalone ClickHouse Keeper
:::tip
If you are going to run ClickHouse Keeper on the same server as ClickHouse server you
do not need to install ClickHouse Keeper as it is included with ClickHouse server. This command is only needed on standalone ClickHouse Keeper servers.
:::
```bash
sudo yum install -y clickhouse-keeper
```
#### Enable and start ClickHouse Keeper
```bash
sudo systemctl enable clickhouse-keeper
sudo systemctl start clickhouse-keeper
sudo systemctl status clickhouse-keeper
```
<details markdown="1">
<summary>Deprecated Method for installing rpm-packages</summary>
@ -219,7 +250,7 @@ case $(uname -m) in
*) echo "Unknown architecture $(uname -m)"; exit 1 ;;
esac
for PKG in clickhouse-common-static clickhouse-common-static-dbg clickhouse-server clickhouse-client
for PKG in clickhouse-common-static clickhouse-common-static-dbg clickhouse-server clickhouse-client clickhouse-keeper
do
curl -fO "https://packages.clickhouse.com/tgz/stable/$PKG-$LATEST_VERSION-${ARCH}.tgz" \
|| curl -fO "https://packages.clickhouse.com/tgz/stable/$PKG-$LATEST_VERSION.tgz"

View File

@ -16,12 +16,12 @@ Otherwise, values outside the interval are ignored.
**Syntax**
``` sql
sparkbar(width[, min_x, max_x])(x, y)
sparkbar(buckets[, min_x, max_x])(x, y)
```
**Parameters**
- `width` — The number of segments. Type: [Integer](../../../sql-reference/data-types/int-uint.md).
- `buckets` — The number of segments. Type: [Integer](../../../sql-reference/data-types/int-uint.md).
- `min_x` — The interval start. Optional parameter.
- `max_x` — The interval end. Optional parameter.

View File

@ -863,3 +863,41 @@ Result:
│ 2 │
└───────────────────────────────┘
```
## regexpExtract(haystack, pattern[, index])
Extracts the first string in haystack that matches the regexp pattern and corresponds to the regex group index.
**Syntax**
``` sql
regexpExtract(haystack, pattern[, index])
```
Alias: `REGEXP_EXTRACT(haystack, pattern[, index])`.
**Arguments**
- `haystack` — String, in which regexp pattern will to be matched. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `pattern` — String, regexp expression, must be constant. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `index` An integer number greater or equal 0 with default 1. It represents which regex group to extract. [UInt or Int](../../sql-reference/data-types/int-uint.md). Optional.
**Returned values**
`pattern` may contain multiple regexp groups, `index` indicates which regex group to extract. An index of 0 means matching the entire regular expression.
Type: `String`.
**Examples**
``` sql
SELECT
regexpExtract('100-200', '(\\d+)-(\\d+)', 1),
regexpExtract('100-200', '(\\d+)-(\\d+)', 2),
regexpExtract('100-200', '(\\d+)-(\\d+)', 0),
regexpExtract('100-200', '(\\d+)-(\\d+)')
┌─regexpExtract('100-200', '(\\d+)-(\\d+)', 1)─┬─regexpExtract('100-200', '(\\d+)-(\\d+)', 2)─┬─regexpExtract('100-200', '(\\d+)-(\\d+)', 0)─┬─regexpExtract('100-200', '(\\d+)-(\\d+)')─┐
│ 100 │ 200 │ 100-200 │ 100 │
└──────────────────────────────────────────────┴──────────────────────────────────────────────┴──────────────────────────────────────────────┴───────────────────────────────────────────┘
```

View File

@ -31,9 +31,17 @@ SETTINGS
[kafka_row_delimiter = 'delimiter_symbol',]
[kafka_schema = '',]
[kafka_num_consumers = N,]
[kafka_max_block_size = 0,]
[kafka_skip_broken_messages = N]
[kafka_commit_every_batch = 0,]
[kafka_thread_per_consumer = 0]
[kafka_client_id = '',]
[kafka_poll_timeout_ms = 0,]
[kafka_poll_max_batch_size = 0,]
[kafka_flush_interval_ms = 0,]
[kafka_thread_per_consumer = 0,]
[kafka_handle_error_mode = 'default',]
[kafka_commit_on_select = false,]
[kafka_max_rows_per_message = 1];
```
Обязательные параметры:
@ -51,7 +59,14 @@ SETTINGS
- `kafka_max_block_size` — максимальный размер пачек (в сообщениях) для poll (по умолчанию `max_block_size`).
- `kafka_skip_broken_messages` — максимальное количество некорректных сообщений в блоке. Если `kafka_skip_broken_messages = N`, то движок отбрасывает `N` сообщений Кафки, которые не получилось обработать. Одно сообщение в точности соответствует одной записи (строке). Значение по умолчанию 0.
- `kafka_commit_every_batch` — включает или отключает режим записи каждой принятой и обработанной пачки по отдельности вместо единой записи целого блока (по умолчанию `0`).
- `kafka_client_id` — идентификатор клиента. Значение по умолчанию пусто ''.
- `kafka_poll_timeout_ms` - Таймаут для poll. По умолчанию: (../../../operations/settings/settings.md#stream_poll_timeout_ms)
- `kafka_poll_max_batch_size` - Максимальное количество сообщений в одном poll Kafka. По умолчанию: (../../../operations/settings/settings.md#setting-max_block_size)
- `kafka_flush_interval_ms` - Таймаут для сброса данных из Kafka. По умолчанию: (../../../operations/settings/settings.md#stream-flush-interval-ms)
- `kafka_thread_per_consumer` — включает или отключает предоставление отдельного потока каждому потребителю (по умолчанию `0`). При включенном режиме каждый потребитель сбрасывает данные независимо и параллельно, при отключённом — строки с данными от нескольких потребителей собираются в один блок.
- `kafka_handle_error_mode` - Способ обработки ошибок для Kafka. Возможные значения: default, stream.
- `kafka_commit_on_select` - Сообщение о commit при запросе select. По умолчанию: `false`.
- `kafka_max_rows_per_message` - Максимальное количество строк записанных в одно сообщение Kafka для формата row-based. По умолчанию: `1`.
Примеры
@ -188,7 +203,10 @@ ClickHouse может поддерживать учетные данные Kerbe
- `_key` — ключ сообщения.
- `_offset` — оффсет сообщения.
- `_timestamp` — временная метка сообщения.
- `_timestamp_ms` — временная метка сообщения в миллисекундах.
- `_partition` — секция топика Kafka.
- `_headers.name` - Массив ключей заголовков сообщений.
- `_headers.value` - Массив значений заголовков сообщений.
**Смотрите также**

View File

@ -1315,6 +1315,12 @@ SELECT type, query FROM system.query_log WHERE log_comment = 'log_comment test'
Чем меньше значение, тем чаще данные сбрасываются в таблицу. Установка слишком низкого значения приводит к снижению производительности.
## stream_poll_timeout_ms {#stream_poll_timeout_ms}
Таймаут для poll стримнга данных.
Значение по умолчанию: 500.
## load_balancing {#settings-load_balancing}
Задает алгоритм выбора реплик, используемый при обработке распределенных запросов.

View File

@ -649,8 +649,9 @@ void LocalServer::processConfig()
if (!config().has("only-system-tables"))
{
DatabaseCatalog::instance().createBackgroundTasks();
loadMetadata(global_context);
DatabaseCatalog::instance().loadDatabases();
DatabaseCatalog::instance().startupBackgroundCleanup();
}
/// For ClickHouse local if path is not set the loader will be disabled.

View File

@ -1652,11 +1652,12 @@ try
/// that may execute DROP before loadMarkedAsDroppedTables() in background,
/// and so loadMarkedAsDroppedTables() will find it and try to add, and UUID will overlap.
database_catalog.loadMarkedAsDroppedTables();
database_catalog.createBackgroundTasks();
/// Then, load remaining databases
loadMetadata(global_context, default_database);
convertDatabasesEnginesIfNeed(global_context);
startupSystemTables();
database_catalog.loadDatabases();
database_catalog.startupBackgroundCleanup();
/// After loading validate that default database exists
database_catalog.assertDatabaseExists(default_database);
/// Load user-defined SQL functions.

View File

@ -121,7 +121,7 @@ void parseLDAPServer(LDAPClient::Params & params, const Poco::Util::AbstractConf
if (enable_tls_lc_str == "starttls")
params.enable_tls = LDAPClient::Params::TLSEnable::YES_STARTTLS;
else if (config.getBool(ldap_server_config + ".enable_tls"))
params.enable_tls = LDAPClient::Params::TLSEnable::YES; //-V1048
params.enable_tls = LDAPClient::Params::TLSEnable::YES;
else
params.enable_tls = LDAPClient::Params::TLSEnable::NO;
}
@ -140,7 +140,7 @@ void parseLDAPServer(LDAPClient::Params & params, const Poco::Util::AbstractConf
else if (tls_minimum_protocol_version_lc_str == "tls1.1")
params.tls_minimum_protocol_version = LDAPClient::Params::TLSProtocolVersion::TLS1_1;
else if (tls_minimum_protocol_version_lc_str == "tls1.2")
params.tls_minimum_protocol_version = LDAPClient::Params::TLSProtocolVersion::TLS1_2; //-V1048
params.tls_minimum_protocol_version = LDAPClient::Params::TLSProtocolVersion::TLS1_2;
else
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Bad value for 'tls_minimum_protocol_version' entry, allowed values are: "
@ -159,7 +159,7 @@ void parseLDAPServer(LDAPClient::Params & params, const Poco::Util::AbstractConf
else if (tls_require_cert_lc_str == "try")
params.tls_require_cert = LDAPClient::Params::TLSRequireCert::TRY;
else if (tls_require_cert_lc_str == "demand")
params.tls_require_cert = LDAPClient::Params::TLSRequireCert::DEMAND; //-V1048
params.tls_require_cert = LDAPClient::Params::TLSRequireCert::DEMAND;
else
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Bad value for 'tls_require_cert' entry, allowed values are: "

View File

@ -137,7 +137,7 @@ GrantedRoles::Elements GrantedRoles::getElements() const
boost::range::set_difference(roles, roles_with_admin_option, std::back_inserter(element.ids));
if (!element.empty())
{
element.admin_option = false; //-V1048
element.admin_option = false;
elements.emplace_back(std::move(element));
}

View File

@ -20,7 +20,7 @@ namespace ErrorCodes
/** Tracks the leftmost and rightmost (x, y) data points.
*/
struct AggregateFunctionBoundingRatioData //-V730
struct AggregateFunctionBoundingRatioData
{
struct Point
{

View File

@ -221,7 +221,7 @@ private:
}
public:
AggregateFunctionHistogramData() //-V730
AggregateFunctionHistogramData()
: size(0)
, lower_bound(std::numeric_limits<Mean>::max())
, upper_bound(std::numeric_limits<Mean>::lowest())

View File

@ -481,7 +481,7 @@ struct Compatibility
/** For strings. Short strings are stored in the object itself, and long strings are allocated separately.
* NOTE It could also be suitable for arrays of numbers.
*/
struct SingleValueDataString //-V730
struct SingleValueDataString
{
private:
using Self = SingleValueDataString;

View File

@ -13,7 +13,9 @@ void registerAggregateFunctionNothing(AggregateFunctionFactory & factory)
factory.registerFunction("nothing", [](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertNoParameters(name, parameters);
return std::make_shared<AggregateFunctionNothing>(argument_types, parameters);
auto result_type = argument_types.empty() ? std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>()) : argument_types.front();
return std::make_shared<AggregateFunctionNothing>(argument_types, parameters, result_type);
});
}

View File

@ -19,19 +19,14 @@ struct Settings;
class AggregateFunctionNothing final : public IAggregateFunctionHelper<AggregateFunctionNothing>
{
public:
AggregateFunctionNothing(const DataTypes & arguments, const Array & params)
: IAggregateFunctionHelper<AggregateFunctionNothing>(arguments, params, createResultType(arguments)) {}
AggregateFunctionNothing(const DataTypes & arguments, const Array & params, const DataTypePtr & result_type_)
: IAggregateFunctionHelper<AggregateFunctionNothing>(arguments, params, result_type_) {}
String getName() const override
{
return "nothing";
}
static DataTypePtr createResultType(const DataTypes & arguments)
{
return arguments.empty() ? std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>()) : arguments.front();
}
bool allocatesMemoryInArena() const override { return false; }
void create(AggregateDataPtr __restrict) const override

View File

@ -72,11 +72,9 @@ public:
{
/// Currently the only functions that returns not-NULL on all NULL arguments are count and uniq, and they returns UInt64.
if (properties.returns_default_when_only_null)
return std::make_shared<AggregateFunctionNothing>(DataTypes{
std::make_shared<DataTypeUInt64>()}, params);
return std::make_shared<AggregateFunctionNothing>(arguments, params, std::make_shared<DataTypeUInt64>());
else
return std::make_shared<AggregateFunctionNothing>(DataTypes{
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>())}, params);
return std::make_shared<AggregateFunctionNothing>(arguments, params, std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>()));
}
assert(nested_function);

View File

@ -190,7 +190,7 @@ public:
SequenceDirection seq_direction_,
size_t min_required_args_,
UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
: IAggregateFunctionDataHelper<SequenceNextNodeGeneralData<Node>, Self>({data_type_}, parameters_, data_type_)
: IAggregateFunctionDataHelper<SequenceNextNodeGeneralData<Node>, Self>(arguments, parameters_, data_type_)
, seq_base_kind(seq_base_kind_)
, seq_direction(seq_direction_)
, min_required_args(min_required_args_)

View File

@ -59,7 +59,7 @@ struct AggregateFunctionSumData
}
/// Vectorized version
MULTITARGET_FUNCTION_AVX2_SSE42(
MULTITARGET_FUNCTION_AVX512BW_AVX512F_AVX2_SSE42(
MULTITARGET_FUNCTION_HEADER(
template <typename Value>
void NO_SANITIZE_UNDEFINED NO_INLINE
@ -107,12 +107,25 @@ struct AggregateFunctionSumData
void NO_INLINE addMany(const Value * __restrict ptr, size_t start, size_t end)
{
#if USE_MULTITARGET_CODE
if (isArchSupported(TargetArch::AVX512BW))
{
addManyImplAVX512BW(ptr, start, end);
return;
}
if (isArchSupported(TargetArch::AVX512F))
{
addManyImplAVX512F(ptr, start, end);
return;
}
if (isArchSupported(TargetArch::AVX2))
{
addManyImplAVX2(ptr, start, end);
return;
}
else if (isArchSupported(TargetArch::SSE42))
if (isArchSupported(TargetArch::SSE42))
{
addManyImplSSE42(ptr, start, end);
return;
@ -122,7 +135,7 @@ struct AggregateFunctionSumData
addManyImpl(ptr, start, end);
}
MULTITARGET_FUNCTION_AVX2_SSE42(
MULTITARGET_FUNCTION_AVX512BW_AVX512F_AVX2_SSE42(
MULTITARGET_FUNCTION_HEADER(
template <typename Value, bool add_if_zero>
void NO_SANITIZE_UNDEFINED NO_INLINE
@ -198,12 +211,25 @@ struct AggregateFunctionSumData
void NO_INLINE addManyConditionalInternal(const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
{
#if USE_MULTITARGET_CODE
if (isArchSupported(TargetArch::AVX512BW))
{
addManyConditionalInternalImplAVX512BW<Value, add_if_zero>(ptr, condition_map, start, end);
return;
}
if (isArchSupported(TargetArch::AVX512F))
{
addManyConditionalInternalImplAVX512F<Value, add_if_zero>(ptr, condition_map, start, end);
return;
}
if (isArchSupported(TargetArch::AVX2))
{
addManyConditionalInternalImplAVX2<Value, add_if_zero>(ptr, condition_map, start, end);
return;
}
else if (isArchSupported(TargetArch::SSE42))
if (isArchSupported(TargetArch::SSE42))
{
addManyConditionalInternalImplSSE42<Value, add_if_zero>(ptr, condition_map, start, end);
return;

View File

@ -2,6 +2,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
namespace DB
{
@ -25,8 +26,15 @@ public:
: assert_no_aggregates_place_message(std::move(assert_no_aggregates_place_message_))
{}
explicit CollectAggregateFunctionNodesVisitor(bool only_check_)
: only_check(only_check_)
{}
void visitImpl(const QueryTreeNodePtr & node)
{
if (only_check && has_aggregate_functions)
return;
auto * function_node = node->as<FunctionNode>();
if (!function_node || !function_node->isAggregateFunction())
return;
@ -39,16 +47,29 @@ public:
if (aggregate_function_nodes)
aggregate_function_nodes->push_back(node);
has_aggregate_functions = true;
}
static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node)
bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node) const
{
return !(child_node->getNodeType() == QueryTreeNodeType::QUERY || child_node->getNodeType() == QueryTreeNodeType::UNION);
if (only_check && has_aggregate_functions)
return false;
auto child_node_type = child_node->getNodeType();
return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION);
}
bool hasAggregateFunctions() const
{
return has_aggregate_functions;
}
private:
String assert_no_aggregates_place_message;
QueryTreeNodes * aggregate_function_nodes = nullptr;
bool only_check = false;
bool has_aggregate_functions = false;
};
}
@ -68,47 +89,23 @@ void collectAggregateFunctionNodes(const QueryTreeNodePtr & node, QueryTreeNodes
visitor.visit(node);
}
bool hasAggregateFunctionNodes(const QueryTreeNodePtr & node)
{
CollectAggregateFunctionNodesVisitor visitor(true /*only_check*/);
visitor.visit(node);
return visitor.hasAggregateFunctions();
}
void assertNoAggregateFunctionNodes(const QueryTreeNodePtr & node, const String & assert_no_aggregates_place_message)
{
CollectAggregateFunctionNodesVisitor visitor(assert_no_aggregates_place_message);
visitor.visit(node);
}
namespace
void assertNoGroupingFunctionNodes(const QueryTreeNodePtr & node, const String & assert_no_grouping_function_place_message)
{
class ValidateGroupingFunctionNodesVisitor : public ConstInDepthQueryTreeVisitor<ValidateGroupingFunctionNodesVisitor>
{
public:
explicit ValidateGroupingFunctionNodesVisitor(String assert_no_grouping_function_place_message_)
: assert_no_grouping_function_place_message(std::move(assert_no_grouping_function_place_message_))
{}
void visitImpl(const QueryTreeNodePtr & node)
{
auto * function_node = node->as<FunctionNode>();
if (function_node && function_node->getFunctionName() == "grouping")
throw Exception(ErrorCodes::ILLEGAL_AGGREGATION,
"GROUPING function {} is found {} in query",
function_node->formatASTForErrorMessage(),
assert_no_grouping_function_place_message);
}
static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node)
{
return !(child_node->getNodeType() == QueryTreeNodeType::QUERY || child_node->getNodeType() == QueryTreeNodeType::UNION);
}
private:
String assert_no_grouping_function_place_message;
};
}
void assertNoGroupingFunction(const QueryTreeNodePtr & node, const String & assert_no_grouping_function_place_message)
{
ValidateGroupingFunctionNodesVisitor visitor(assert_no_grouping_function_place_message);
visitor.visit(node);
assertNoFunctionNodes(node, "grouping", ErrorCodes::ILLEGAL_AGGREGATION, "GROUPING", assert_no_grouping_function_place_message);
}
}

View File

@ -15,14 +15,19 @@ QueryTreeNodes collectAggregateFunctionNodes(const QueryTreeNodePtr & node);
*/
void collectAggregateFunctionNodes(const QueryTreeNodePtr & node, QueryTreeNodes & result);
/** Returns true if there are aggregate function nodes in node children, false otherwise.
* Do not visit subqueries.
*/
bool hasAggregateFunctionNodes(const QueryTreeNodePtr & node);
/** Assert that there are no aggregate function nodes in node children.
* Do not visit subqueries.
*/
void assertNoAggregateFunctionNodes(const QueryTreeNodePtr & node, const String & assert_no_aggregates_place_message);
/** Assert that there are no GROUPING functions in node children.
/** Assert that there are no GROUPING function nodes in node children.
* Do not visit subqueries.
*/
void assertNoGroupingFunction(const QueryTreeNodePtr & node, const String & assert_no_grouping_function_place_message);
void assertNoGroupingFunctionNodes(const QueryTreeNodePtr & node, const String & assert_no_grouping_function_place_message);
}

View File

@ -8,6 +8,8 @@
#include <Parsers/ASTIdentifier.h>
#include <Analyzer/TableNode.h>
namespace DB
{
@ -71,15 +73,7 @@ void ColumnNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & state, size_t
bool ColumnNode::isEqualImpl(const IQueryTreeNode & rhs) const
{
const auto & rhs_typed = assert_cast<const ColumnNode &>(rhs);
auto source = getColumnSourceOrNull();
auto rhs_source = rhs_typed.getColumnSourceOrNull();
if (source && !rhs_source)
return false;
if (!source && rhs_source)
return false;
return column == rhs_typed.column && (!source || source->isEqual(*rhs_source));
return column == rhs_typed.column;
}
void ColumnNode::updateTreeHashImpl(HashState & hash_state) const
@ -94,12 +88,37 @@ void ColumnNode::updateTreeHashImpl(HashState & hash_state) const
QueryTreeNodePtr ColumnNode::cloneImpl() const
{
return std::make_shared<ColumnNode>(column, getColumnSource());
return std::make_shared<ColumnNode>(column, getSourceWeakPointer());
}
ASTPtr ColumnNode::toASTImpl() const
{
return std::make_shared<ASTIdentifier>(column.name);
std::vector<std::string> column_identifier_parts;
auto column_source = getColumnSourceOrNull();
if (column_source)
{
auto node_type = column_source->getNodeType();
if (node_type == QueryTreeNodeType::TABLE ||
node_type == QueryTreeNodeType::TABLE_FUNCTION ||
node_type == QueryTreeNodeType::QUERY ||
node_type == QueryTreeNodeType::UNION)
{
if (column_source->hasAlias())
{
column_identifier_parts = {column_source->getAlias()};
}
else if (auto * table_node = column_source->as<TableNode>())
{
const auto & table_storage_id = table_node->getStorageID();
column_identifier_parts = {table_storage_id.getDatabaseName(), table_storage_id.getTableName()};
}
}
}
column_identifier_parts.push_back(column.name);
return std::make_shared<ASTIdentifier>(std::move(column_identifier_parts));
}
}

View File

@ -10,18 +10,19 @@
#include <DataTypes/FieldToDataType.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTFunction.h>
#include <Interpreters/convertFieldToType.h>
namespace DB
{
ConstantNode::ConstantNode(ConstantValuePtr constant_value_, QueryTreeNodePtr source_expression)
ConstantNode::ConstantNode(ConstantValuePtr constant_value_, QueryTreeNodePtr source_expression_)
: IQueryTreeNode(children_size)
, constant_value(std::move(constant_value_))
, value_string(applyVisitor(FieldVisitorToString(), constant_value->getValue()))
{
children[source_child_index] = std::move(source_expression);
source_expression = std::move(source_expression_);
}
ConstantNode::ConstantNode(ConstantValuePtr constant_value_)
@ -71,12 +72,65 @@ void ConstantNode::updateTreeHashImpl(HashState & hash_state) const
QueryTreeNodePtr ConstantNode::cloneImpl() const
{
return std::make_shared<ConstantNode>(constant_value);
return std::make_shared<ConstantNode>(constant_value, source_expression);
}
ASTPtr ConstantNode::toASTImpl() const
{
return std::make_shared<ASTLiteral>(constant_value->getValue());
const auto & constant_value_literal = constant_value->getValue();
auto constant_value_ast = std::make_shared<ASTLiteral>(constant_value_literal);
bool need_to_add_cast_function = false;
auto constant_value_literal_type = constant_value_literal.getType();
WhichDataType constant_value_type(constant_value->getType());
switch (constant_value_literal_type)
{
case Field::Types::String:
{
need_to_add_cast_function = !constant_value_type.isString();
break;
}
case Field::Types::UInt64:
case Field::Types::Int64:
case Field::Types::Float64:
{
WhichDataType constant_value_field_type(applyVisitor(FieldToDataType(), constant_value_literal));
need_to_add_cast_function = constant_value_field_type.idx != constant_value_type.idx;
break;
}
case Field::Types::Int128:
case Field::Types::UInt128:
case Field::Types::Int256:
case Field::Types::UInt256:
case Field::Types::Decimal32:
case Field::Types::Decimal64:
case Field::Types::Decimal128:
case Field::Types::Decimal256:
case Field::Types::AggregateFunctionState:
case Field::Types::Array:
case Field::Types::Tuple:
case Field::Types::Map:
case Field::Types::UUID:
case Field::Types::Bool:
case Field::Types::Object:
case Field::Types::IPv4:
case Field::Types::IPv6:
case Field::Types::Null:
case Field::Types::CustomType:
{
need_to_add_cast_function = true;
break;
}
}
if (need_to_add_cast_function)
{
auto constant_type_name_ast = std::make_shared<ASTLiteral>(constant_value->getType()->getName());
return makeASTFunction("_CAST", std::move(constant_value_ast), std::move(constant_type_name_ast));
}
return constant_value_ast;
}
}

View File

@ -49,19 +49,19 @@ public:
/// Returns true if constant node has source expression, false otherwise
bool hasSourceExpression() const
{
return children[source_child_index] != nullptr;
return source_expression != nullptr;
}
/// Get source expression
const QueryTreeNodePtr & getSourceExpression() const
{
return children[source_child_index];
return source_expression;
}
/// Get source expression
QueryTreeNodePtr & getSourceExpression()
{
return children[source_child_index];
return source_expression;
}
QueryTreeNodeType getNodeType() const override
@ -88,9 +88,9 @@ protected:
private:
ConstantValuePtr constant_value;
String value_string;
QueryTreeNodePtr source_expression;
static constexpr size_t children_size = 1;
static constexpr size_t source_child_index = 0;
static constexpr size_t children_size = 0;
};
}

View File

@ -16,6 +16,7 @@ namespace DB
namespace ErrorCodes
{
extern const int UNSUPPORTED_METHOD;
extern const int LOGICAL_ERROR;
}
class IFunctionOverloadResolver;
@ -85,7 +86,10 @@ public:
/// Get arguments node
QueryTreeNodePtr & getArgumentsNode() { return children[arguments_child_index]; }
/// Get argument types
const DataTypes & getArgumentTypes() const;
/// Get argument columns
ColumnsWithTypeAndName getArgumentColumns() const;
/// Returns true if function node has window, false otherwise
@ -104,8 +108,8 @@ public:
*/
QueryTreeNodePtr & getWindowNode() { return children[window_child_index]; }
/** Get non aggregate function.
* If function is not resolved nullptr returned.
/** Get ordinary function.
* If function is not resolved or is resolved as non ordinary function nullptr is returned.
*/
FunctionBasePtr getFunction() const
{
@ -114,6 +118,19 @@ public:
return std::static_pointer_cast<const IFunctionBase>(function);
}
/** Get ordinary function.
* If function is not resolved or is resolved as non ordinary function exception is thrown.
*/
FunctionBasePtr getFunctionOrThrow() const
{
if (kind != FunctionKind::ORDINARY)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Function node with name '{}' is not resolved as ordinary function",
function_name);
return std::static_pointer_cast<const IFunctionBase>(function);
}
/** Get aggregate function.
* If function is not resolved nullptr returned.
* If function is resolved as non aggregate function nullptr returned.

View File

@ -16,11 +16,11 @@ struct QueryTreeNodeWithHash
{
QueryTreeNodeWithHash(QueryTreeNodePtrType node_) /// NOLINT
: node(std::move(node_))
, hash(node->getTreeHash().first)
, hash(node->getTreeHash())
{}
QueryTreeNodePtrType node = nullptr;
size_t hash = 0;
std::pair<UInt64, UInt64> hash;
};
template <typename T>
@ -55,6 +55,6 @@ struct std::hash<DB::QueryTreeNodeWithHash<T>>
{
size_t operator()(const DB::QueryTreeNodeWithHash<T> & node_with_hash) const
{
return node_with_hash.hash;
return node_with_hash.hash.first;
}
};

View File

@ -74,8 +74,11 @@ struct NodePairHash
}
bool IQueryTreeNode::isEqual(const IQueryTreeNode & rhs) const
bool IQueryTreeNode::isEqual(const IQueryTreeNode & rhs, CompareOptions compare_options) const
{
if (this == &rhs)
return true;
std::vector<NodePair> nodes_to_process;
std::unordered_set<NodePair, NodePairHash> equals_pairs;
@ -89,19 +92,25 @@ bool IQueryTreeNode::isEqual(const IQueryTreeNode & rhs) const
const auto * lhs_node_to_compare = nodes_to_compare.first;
const auto * rhs_node_to_compare = nodes_to_compare.second;
if (equals_pairs.contains(std::make_pair(lhs_node_to_compare, rhs_node_to_compare)))
continue;
assert(lhs_node_to_compare);
assert(rhs_node_to_compare);
if (lhs_node_to_compare->getNodeType() != rhs_node_to_compare->getNodeType() ||
lhs_node_to_compare->alias != rhs_node_to_compare->alias ||
!lhs_node_to_compare->isEqualImpl(*rhs_node_to_compare))
if (equals_pairs.contains(std::make_pair(lhs_node_to_compare, rhs_node_to_compare)))
continue;
if (lhs_node_to_compare == rhs_node_to_compare)
{
return false;
equals_pairs.emplace(lhs_node_to_compare, rhs_node_to_compare);
continue;
}
if (lhs_node_to_compare->getNodeType() != rhs_node_to_compare->getNodeType() ||
!lhs_node_to_compare->isEqualImpl(*rhs_node_to_compare))
return false;
if (compare_options.compare_aliases && lhs_node_to_compare->alias != rhs_node_to_compare->alias)
return false;
const auto & lhs_children = lhs_node_to_compare->children;
const auto & rhs_children = rhs_node_to_compare->children;
@ -155,26 +164,39 @@ bool IQueryTreeNode::isEqual(const IQueryTreeNode & rhs) const
IQueryTreeNode::Hash IQueryTreeNode::getTreeHash() const
{
/** Compute tree hash with this node as root.
*
* Some nodes can contain weak pointers to other nodes. Such weak nodes are not necessary
* part of tree that we try to hash, but we need to update hash state with their content.
*
* Algorithm
* For each node in tree we update hash state with their content.
* For weak nodes there is special handling. If we visit weak node first time we update hash state with weak node content and register
* identifier for this node, for subsequent visits of this weak node we hash weak node identifier instead of content.
*/
HashState hash_state;
std::unordered_map<const IQueryTreeNode *, size_t> node_to_identifier;
std::unordered_map<const IQueryTreeNode *, size_t> weak_node_to_identifier;
std::vector<const IQueryTreeNode *> nodes_to_process;
nodes_to_process.push_back(this);
std::vector<std::pair<const IQueryTreeNode *, bool>> nodes_to_process;
nodes_to_process.emplace_back(this, false);
while (!nodes_to_process.empty())
{
const auto * node_to_process = nodes_to_process.back();
const auto [node_to_process, is_weak_node] = nodes_to_process.back();
nodes_to_process.pop_back();
auto node_identifier_it = node_to_identifier.find(node_to_process);
if (node_identifier_it != node_to_identifier.end())
if (is_weak_node)
{
hash_state.update(node_identifier_it->second);
continue;
}
auto node_identifier_it = weak_node_to_identifier.find(node_to_process);
if (node_identifier_it != weak_node_to_identifier.end())
{
hash_state.update(node_identifier_it->second);
continue;
}
node_to_identifier.emplace(node_to_process, node_to_identifier.size());
weak_node_to_identifier.emplace(node_to_process, weak_node_to_identifier.size());
}
hash_state.update(static_cast<size_t>(node_to_process->getNodeType()));
if (!node_to_process->alias.empty())
@ -192,7 +214,7 @@ IQueryTreeNode::Hash IQueryTreeNode::getTreeHash() const
if (!node_to_process_child)
continue;
nodes_to_process.push_back(node_to_process_child.get());
nodes_to_process.emplace_back(node_to_process_child.get(), false);
}
hash_state.update(node_to_process->weak_pointers.size());
@ -203,7 +225,7 @@ IQueryTreeNode::Hash IQueryTreeNode::getTreeHash() const
if (!strong_pointer)
continue;
nodes_to_process.push_back(strong_pointer.get());
nodes_to_process.emplace_back(strong_pointer.get(), true);
}
}
@ -245,12 +267,15 @@ QueryTreeNodePtr IQueryTreeNode::cloneAndReplace(const ReplacementMap & replacem
auto node_clone = it != replacement_map.end() ? it->second : node_to_clone->cloneImpl();
*place_for_cloned_node = node_clone;
old_pointer_to_new_pointer.emplace(node_to_clone, node_clone);
if (it != replacement_map.end())
continue;
node_clone->setAlias(node_to_clone->alias);
node_clone->children = node_to_clone->children;
node_clone->weak_pointers = node_to_clone->weak_pointers;
old_pointer_to_new_pointer.emplace(node_to_clone, node_clone);
for (auto & child : node_clone->children)
{
if (!child)
@ -291,6 +316,14 @@ QueryTreeNodePtr IQueryTreeNode::cloneAndReplace(const ReplacementMap & replacem
return result_cloned_node_place;
}
QueryTreeNodePtr IQueryTreeNode::cloneAndReplace(const QueryTreeNodePtr & node_to_replace, QueryTreeNodePtr replacement_node) const
{
ReplacementMap replacement_map;
replacement_map.emplace(node_to_replace.get(), std::move(replacement_node));
return cloneAndReplace(replacement_map);
}
ASTPtr IQueryTreeNode::toAST() const
{
auto converted_node = toASTImpl();

View File

@ -90,12 +90,17 @@ public:
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Method getResultType is not supported for {} query node", getNodeTypeName());
}
struct CompareOptions
{
bool compare_aliases = true;
};
/** Is tree equal to other tree with node root.
*
* Aliases of query tree nodes are compared during isEqual call.
* With default compare options aliases of query tree nodes are compared during isEqual call.
* Original ASTs of query tree nodes are not compared during isEqual call.
*/
bool isEqual(const IQueryTreeNode & rhs) const;
bool isEqual(const IQueryTreeNode & rhs, CompareOptions compare_options = { .compare_aliases = true }) const;
using Hash = std::pair<UInt64, UInt64>;
using HashState = SipHash;
@ -117,6 +122,11 @@ public:
using ReplacementMap = std::unordered_map<const IQueryTreeNode *, QueryTreeNodePtr>;
QueryTreeNodePtr cloneAndReplace(const ReplacementMap & replacement_map) const;
/** Get a deep copy of the query tree.
* If node to clone is node to replace, then instead of clone it use replacement node.
*/
QueryTreeNodePtr cloneAndReplace(const QueryTreeNodePtr & node_to_replace, QueryTreeNodePtr replacement_node) const;
/// Returns true if node has alias, false otherwise
bool hasAlias() const
{

View File

@ -82,6 +82,8 @@ ASTPtr LambdaNode::toASTImpl() const
lambda_function_ast->children.push_back(std::move(lambda_function_arguments_ast));
lambda_function_ast->arguments = lambda_function_ast->children.back();
lambda_function_ast->is_lambda_function = true;
return lambda_function_ast;
}

View File

@ -17,6 +17,7 @@ class ListNode final : public IQueryTreeNode
{
public:
using iterator = QueryTreeNodes::iterator;
using const_iterator = QueryTreeNodes::const_iterator;
/// Initialize list node with empty nodes
ListNode();
@ -44,7 +45,10 @@ public:
void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
iterator begin() { return children.begin(); }
const_iterator begin() const { return children.begin(); }
iterator end() { return children.end(); }
const_iterator end() const { return children.end(); }
protected:
bool isEqualImpl(const IQueryTreeNode & rhs) const override;

View File

@ -209,12 +209,14 @@ ASTPtr MatcherNode::toASTImpl() const
ASTPtr result;
ASTPtr transformers;
if (!children.empty())
const auto & column_transformers = getColumnTransformers().getNodes();
if (!column_transformers.empty())
{
transformers = std::make_shared<ASTColumnsTransformerList>();
for (const auto & child : children)
transformers->children.push_back(child->toAST());
for (const auto & column_transformer : column_transformers)
transformers->children.push_back(column_transformer->toAST());
}
if (matcher_type == MatcherNodeType::ASTERISK)

View File

@ -99,6 +99,9 @@ public:
const auto * left_argument_constant_node = arithmetic_function_arguments_nodes[0]->as<ConstantNode>();
const auto * right_argument_constant_node = arithmetic_function_arguments_nodes[1]->as<ConstantNode>();
if (!left_argument_constant_node && !right_argument_constant_node)
return;
/** If we extract negative constant, aggregate function name must be updated.
*
* Example: SELECT min(-1 * id);

View File

@ -0,0 +1,210 @@
#include <Analyzer/Passes/ComparisonTupleEliminationPass.h>
#include <DataTypes/DataTypeTuple.h>
#include <Functions/FunctionFactory.h>
#include <Interpreters/Context.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
namespace DB
{
namespace
{
class ComparisonTupleEliminationPassVisitor : public InDepthQueryTreeVisitor<ComparisonTupleEliminationPassVisitor>
{
public:
explicit ComparisonTupleEliminationPassVisitor(ContextPtr context_)
: context(std::move(context_))
{}
static bool needChildVisit(QueryTreeNodePtr &, QueryTreeNodePtr & child)
{
return child->getNodeType() != QueryTreeNodeType::TABLE_FUNCTION;
}
void visitImpl(QueryTreeNodePtr & node) const
{
auto * function_node = node->as<FunctionNode>();
if (!function_node)
return;
const auto & comparison_function_name = function_node->getFunctionName();
if (comparison_function_name != "equals" && comparison_function_name != "notEquals")
return;
const auto & arguments = function_node->getArguments().getNodes();
if (arguments.size() != 2)
return;
const auto & lhs_argument = arguments[0];
const auto & lhs_argument_result_type = lhs_argument->getResultType();
if (!isTuple(lhs_argument_result_type))
return;
const auto & rhs_argument = arguments[1];
const auto & rhs_argument_result_type = rhs_argument->getResultType();
if (!isTuple(rhs_argument_result_type))
return;
auto lhs_argument_node_type = lhs_argument->getNodeType();
auto rhs_argument_node_type = rhs_argument->getNodeType();
if (lhs_argument_node_type == QueryTreeNodeType::FUNCTION && rhs_argument_node_type == QueryTreeNodeType::FUNCTION)
tryOptimizeComparisonTupleFunctions(node, lhs_argument, rhs_argument, comparison_function_name);
else if (lhs_argument_node_type == QueryTreeNodeType::FUNCTION && rhs_argument_node_type == QueryTreeNodeType::CONSTANT)
tryOptimizeComparisonTupleFunctionAndConstant(node, lhs_argument, rhs_argument, comparison_function_name);
else if (lhs_argument_node_type == QueryTreeNodeType::CONSTANT && rhs_argument_node_type == QueryTreeNodeType::FUNCTION)
tryOptimizeComparisonTupleFunctionAndConstant(node, rhs_argument, lhs_argument, comparison_function_name);
}
private:
void tryOptimizeComparisonTupleFunctions(QueryTreeNodePtr & node,
const QueryTreeNodePtr & lhs_function_node,
const QueryTreeNodePtr & rhs_function_node,
const std::string & comparison_function_name) const
{
const auto & lhs_function_node_typed = lhs_function_node->as<FunctionNode &>();
if (lhs_function_node_typed.getFunctionName() != "tuple")
return;
const auto & rhs_function_node_typed = rhs_function_node->as<FunctionNode &>();
if (rhs_function_node_typed.getFunctionName() != "tuple")
return;
const auto & lhs_tuple_function_arguments_nodes = lhs_function_node_typed.getArguments().getNodes();
size_t lhs_tuple_function_arguments_nodes_size = lhs_tuple_function_arguments_nodes.size();
const auto & rhs_tuple_function_arguments_nodes = rhs_function_node_typed.getArguments().getNodes();
if (lhs_tuple_function_arguments_nodes_size != rhs_tuple_function_arguments_nodes.size())
return;
if (lhs_tuple_function_arguments_nodes_size == 1)
{
node = makeComparisonFunction(lhs_tuple_function_arguments_nodes[0], rhs_tuple_function_arguments_nodes[0], comparison_function_name);
return;
}
QueryTreeNodes tuple_arguments_equals_functions;
tuple_arguments_equals_functions.reserve(lhs_tuple_function_arguments_nodes_size);
for (size_t i = 0; i < lhs_tuple_function_arguments_nodes_size; ++i)
{
auto equals_function = makeEqualsFunction(lhs_tuple_function_arguments_nodes[i], rhs_tuple_function_arguments_nodes[i]);
tuple_arguments_equals_functions.push_back(std::move(equals_function));
}
node = makeEquivalentTupleComparisonFunction(std::move(tuple_arguments_equals_functions), comparison_function_name);
}
void tryOptimizeComparisonTupleFunctionAndConstant(QueryTreeNodePtr & node,
const QueryTreeNodePtr & function_node,
const QueryTreeNodePtr & constant_node,
const std::string & comparison_function_name) const
{
const auto & function_node_typed = function_node->as<FunctionNode &>();
if (function_node_typed.getFunctionName() != "tuple")
return;
auto & constant_node_typed = constant_node->as<ConstantNode &>();
const auto & constant_node_value = constant_node_typed.getValue();
if (constant_node_value.getType() != Field::Types::Which::Tuple)
return;
const auto & constant_tuple = constant_node_value.get<const Tuple &>();
const auto & function_arguments_nodes = function_node_typed.getArguments().getNodes();
size_t function_arguments_nodes_size = function_arguments_nodes.size();
if (function_arguments_nodes_size != constant_tuple.size())
return;
auto constant_node_result_type = constant_node_typed.getResultType();
const auto * tuple_data_type = typeid_cast<const DataTypeTuple *>(constant_node_result_type.get());
if (!tuple_data_type)
return;
const auto & tuple_data_type_elements = tuple_data_type->getElements();
if (tuple_data_type_elements.size() != function_arguments_nodes_size)
return;
if (function_arguments_nodes_size == 1)
{
auto comparison_argument_constant_value = std::make_shared<ConstantValue>(constant_tuple[0], tuple_data_type_elements[0]);
auto comparison_argument_constant_node = std::make_shared<ConstantNode>(std::move(comparison_argument_constant_value));
node = makeComparisonFunction(function_arguments_nodes[0], std::move(comparison_argument_constant_node), comparison_function_name);
return;
}
QueryTreeNodes tuple_arguments_equals_functions;
tuple_arguments_equals_functions.reserve(function_arguments_nodes_size);
for (size_t i = 0; i < function_arguments_nodes_size; ++i)
{
auto equals_argument_constant_value = std::make_shared<ConstantValue>(constant_tuple[i], tuple_data_type_elements[i]);
auto equals_argument_constant_node = std::make_shared<ConstantNode>(std::move(equals_argument_constant_value));
auto equals_function = makeEqualsFunction(function_arguments_nodes[i], std::move(equals_argument_constant_node));
tuple_arguments_equals_functions.push_back(std::move(equals_function));
}
node = makeEquivalentTupleComparisonFunction(std::move(tuple_arguments_equals_functions), comparison_function_name);
}
QueryTreeNodePtr makeEquivalentTupleComparisonFunction(QueryTreeNodes tuple_arguments_equals_functions,
const std::string & comparison_function_name) const
{
auto result_function = std::make_shared<FunctionNode>("and");
result_function->getArguments().getNodes() = std::move(tuple_arguments_equals_functions);
resolveOrdinaryFunctionNode(*result_function, result_function->getFunctionName());
if (comparison_function_name == "notEquals")
{
auto not_function = std::make_shared<FunctionNode>("not");
not_function->getArguments().getNodes().push_back(std::move(result_function));
resolveOrdinaryFunctionNode(*not_function, not_function->getFunctionName());
result_function = std::move(not_function);
}
return result_function;
}
inline QueryTreeNodePtr makeEqualsFunction(QueryTreeNodePtr lhs_argument, QueryTreeNodePtr rhs_argument) const
{
return makeComparisonFunction(std::move(lhs_argument), std::move(rhs_argument), "equals");
}
QueryTreeNodePtr makeComparisonFunction(QueryTreeNodePtr lhs_argument,
QueryTreeNodePtr rhs_argument,
const std::string & comparison_function_name) const
{
auto comparison_function = std::make_shared<FunctionNode>(comparison_function_name);
comparison_function->getArguments().getNodes().push_back(std::move(lhs_argument));
comparison_function->getArguments().getNodes().push_back(std::move(rhs_argument));
resolveOrdinaryFunctionNode(*comparison_function, comparison_function->getFunctionName());
return comparison_function;
}
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
auto function = FunctionFactory::instance().get(function_name, context);
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
ContextPtr context;
};
}
void ComparisonTupleEliminationPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
{
ComparisonTupleEliminationPassVisitor visitor(std::move(context));
visitor.visit(query_tree_node);
}
}

View File

@ -0,0 +1,24 @@
#pragma once
#include <Analyzer/IQueryTreePass.h>
namespace DB
{
/** Rewrite tuples comparison into equivalent comparison of tuples arguments.
*
* Example: SELECT id FROM test_table WHERE (id, value) = (1, 'Value');
* Result: SELECT id FROM test_table WHERE id = 1 AND value = 'Value';
*/
class ComparisonTupleEliminationPass final : public IQueryTreePass
{
public:
String getName() override { return "ComparisonTupleEliminationPass"; }
String getDescription() override { return "Rewrite tuples comparison into equivalent comparison of tuples arguments"; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
};
}

View File

@ -38,22 +38,6 @@ public:
{
auto count_distinct_implementation_function_name = String(settings.count_distinct_implementation);
/// Replace countDistinct with countDistinct implementation
if (function_name_lowercase == "countdistinct")
{
resolveAggregateOrWindowFunctionNode(*function_node, count_distinct_implementation_function_name);
function_name = function_node->getFunctionName();
function_name_lowercase = Poco::toLower(function_name);
}
/// Replace countIfDistinct with countDistinctIf implementation
if (function_name_lowercase == "countifdistinct")
{
resolveAggregateOrWindowFunctionNode(*function_node, count_distinct_implementation_function_name + "If");
function_name = function_node->getFunctionName();
function_name_lowercase = Poco::toLower(function_name);
}
/// Replace aggregateFunctionIfDistinct into aggregateFunctionDistinctIf to make execution more optimal
if (function_name_lowercase.ends_with("ifdistinct"))
{
@ -64,19 +48,6 @@ public:
function_name_lowercase = Poco::toLower(function_name);
}
/// Rewrite all aggregate functions to add -OrNull suffix to them
if (settings.aggregate_functions_null_for_empty && !function_name.ends_with("OrNull"))
{
auto function_properies = AggregateFunctionFactory::instance().tryGetProperties(function_name);
if (function_properies && !function_properies->returns_default_when_only_null)
{
auto updated_function_name = function_name + "OrNull";
resolveAggregateOrWindowFunctionNode(*function_node, updated_function_name);
function_name = function_node->getFunctionName();
function_name_lowercase = Poco::toLower(function_name);
}
}
/** Move -OrNull suffix ahead, this should execute after add -OrNull suffix.
* Used to rewrite aggregate functions with -OrNull suffix in some cases.
* Example: sumIfOrNull.

View File

@ -2,6 +2,7 @@
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeMap.h>
#include <Storages/IStorage.h>
@ -181,7 +182,7 @@ public:
/// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)`
column.name += ".keys";
column.type = data_type_map.getKeyType();
column.type = std::make_shared<DataTypeArray>(data_type_map.getKeyType());
auto has_function_argument = std::make_shared<ColumnNode>(column, column_source);
function_arguments_nodes[0] = std::move(has_function_argument);

View File

@ -149,8 +149,9 @@ void resolveGroupingFunctions(QueryTreeNodePtr & query_node, ContextPtr context)
/// It is expected by execution layer that if there are only 1 grouping set it will be removed
if (query_node_typed.isGroupByWithGroupingSets() && query_node_typed.getGroupBy().getNodes().size() == 1)
{
auto & grouping_set_list_node = query_node_typed.getGroupBy().getNodes().front()->as<ListNode &>();
query_node_typed.getGroupBy().getNodes() = std::move(grouping_set_list_node.getNodes());
auto grouping_set_list_node = query_node_typed.getGroupBy().getNodes().front();
auto & grouping_set_list_node_typed = grouping_set_list_node->as<ListNode &>();
query_node_typed.getGroupBy().getNodes() = std::move(grouping_set_list_node_typed.getNodes());
query_node_typed.setIsGroupByWithGroupingSets(false);
}

View File

@ -18,8 +18,11 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<OptimizeGroupByFunctionKeysVisitor>;
using Base::Base;
static bool needChildVisit(QueryTreeNodePtr & /*parent*/, QueryTreeNodePtr & child)
static bool needChildVisit(QueryTreeNodePtr & parent, QueryTreeNodePtr & child)
{
if (parent->getNodeType() == QueryTreeNodeType::TABLE_FUNCTION)
return false;
return !child->as<FunctionNode>();
}
@ -62,7 +65,7 @@ private:
std::vector<NodeWithInfo> candidates;
auto & function_arguments = function->getArguments().getNodes();
bool is_deterministic = function->getFunction()->isDeterministicInScopeOfQuery();
bool is_deterministic = function->getFunctionOrThrow()->isDeterministicInScopeOfQuery();
for (auto it = function_arguments.rbegin(); it != function_arguments.rend(); ++it)
candidates.push_back({ *it, is_deterministic });
@ -86,7 +89,8 @@ private:
if (!found)
{
bool is_deterministic_function = parents_are_only_deterministic && function->getFunction()->isDeterministicInScopeOfQuery();
bool is_deterministic_function = parents_are_only_deterministic &&
function->getFunctionOrThrow()->isDeterministicInScopeOfQuery();
for (auto it = arguments.rbegin(); it != arguments.rend(); ++it)
candidates.push_back({ *it, is_deterministic_function });
}

File diff suppressed because it is too large Load Diff

View File

@ -51,6 +51,8 @@ namespace DB
* Function `untuple` is handled properly.
* Function `arrayJoin` is handled properly.
* For functions `dictGet` and its variations and for function `joinGet` identifier as first argument is handled properly.
* Replace `countDistinct` and `countIfDistinct` aggregate functions using setting count_distinct_implementation.
* Add -OrNull suffix to aggregate functions if setting aggregate_functions_null_for_empty is true.
* Function `exists` is converted into `in`.
*
* For function `grouping` arguments are resolved, but it is planner responsibility to initialize it with concrete grouping function

View File

@ -23,6 +23,11 @@
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
QueryNode::QueryNode(ContextMutablePtr context_, SettingsChanges settings_changes_)
: IQueryTreeNode(children_size)
, context(std::move(context_))
@ -268,7 +273,24 @@ ASTPtr QueryNode::toASTImpl() const
if (hasWith())
select_query->setExpression(ASTSelectQuery::Expression::WITH, getWith().toAST());
select_query->setExpression(ASTSelectQuery::Expression::SELECT, getProjection().toAST());
auto projection_ast = getProjection().toAST();
auto & projection_expression_list_ast = projection_ast->as<ASTExpressionList &>();
size_t projection_expression_list_ast_children_size = projection_expression_list_ast.children.size();
if (projection_expression_list_ast_children_size != getProjection().getNodes().size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Query node invalid projection conversion to AST");
if (!projection_columns.empty())
{
for (size_t i = 0; i < projection_expression_list_ast_children_size; ++i)
{
auto * ast_with_alias = dynamic_cast<ASTWithAlias *>(projection_expression_list_ast.children[i].get());
if (ast_with_alias)
ast_with_alias->setAlias(projection_columns[i].name);
}
}
select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(projection_ast));
ASTPtr tables_in_select_query_ast = std::make_shared<ASTTablesInSelectQuery>();
addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, getJoinTree());
@ -314,6 +336,7 @@ ASTPtr QueryNode::toASTImpl() const
{
auto settings_query = std::make_shared<ASTSetQuery>();
settings_query->changes = settings_changes;
settings_query->is_standalone = false;
select_query->setExpression(ASTSelectQuery::Expression::SETTINGS, std::move(settings_query));
}

View File

@ -13,6 +13,7 @@
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTQueryParameter.h>
#include <Parsers/ASTAsterisk.h>
#include <Parsers/ASTQualifiedAsterisk.h>
#include <Parsers/ASTColumnsMatcher.h>
@ -61,6 +62,7 @@ namespace ErrorCodes
extern const int EXPECTED_ALL_OR_ANY;
extern const int NOT_IMPLEMENTED;
extern const int BAD_ARGUMENTS;
extern const int UNKNOWN_QUERY_PARAMETER;
}
namespace
@ -438,6 +440,11 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co
auto identifier = Identifier(ast_identifier->name_parts);
result = std::make_shared<IdentifierNode>(std::move(identifier));
}
else if (const auto * table_identifier = expression->as<ASTTableIdentifier>())
{
auto identifier = Identifier(table_identifier->name_parts);
result = std::make_shared<IdentifierNode>(std::move(identifier));
}
else if (const auto * asterisk = expression->as<ASTAsterisk>())
{
auto column_transformers = buildColumnTransformers(asterisk->transformers, context);
@ -535,6 +542,11 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co
result = std::move(query_node);
}
else if (const auto * select_with_union_query = expression->as<ASTSelectWithUnionQuery>())
{
auto query_node = buildSelectWithUnionExpression(expression, false /*is_subquery*/, {} /*cte_name*/, context);
result = std::move(query_node);
}
else if (const auto * with_element = expression->as<ASTWithElement>())
{
auto with_element_subquery = with_element->subquery->as<ASTSubquery &>().children.at(0);
@ -583,6 +595,12 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co
auto column_transformers = buildColumnTransformers(qualified_columns_list_matcher->transformers, context);
result = std::make_shared<MatcherNode>(Identifier(qualified_identifier.name_parts), std::move(column_list_identifiers), std::move(column_transformers));
}
else if (const auto * query_parameter = expression->as<ASTQueryParameter>())
{
throw Exception(ErrorCodes::UNKNOWN_QUERY_PARAMETER,
"Query parameter {} was not set",
backQuote(query_parameter->name));
}
else
{
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,

View File

@ -8,6 +8,7 @@
#include <IO/Operators.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <Interpreters/Context.h>
@ -36,6 +37,7 @@
#include <Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h>
#include <Analyzer/Passes/GroupingFunctionsResolvePass.h>
#include <Analyzer/Passes/ArrayExistsToHasPass.h>
#include <Analyzer/Passes/ComparisonTupleEliminationPass.h>
namespace DB
{
@ -61,6 +63,14 @@ public:
: pass_name(std::move(pass_name_))
{}
static bool needChildVisit(VisitQueryTreeNodeType & parent, VisitQueryTreeNodeType &)
{
if (parent->getNodeType() == QueryTreeNodeType::TABLE_FUNCTION)
return false;
return true;
}
void visitImpl(QueryTreeNodePtr & node) const
{
if (auto * column = node->as<ColumnNode>())
@ -105,14 +115,22 @@ private:
if (WhichDataType(expected_argument_types[i]).isFunction())
continue;
if (!expected_argument_types[i]->equals(*actual_argument_columns[i].type))
const auto & expected_argument_type = expected_argument_types[i];
const auto & actual_argument_type = actual_argument_columns[i].type;
if (!expected_argument_type->equals(*actual_argument_type))
{
/// Aggregate functions remove low cardinality for their argument types
if ((function->isAggregateFunction() || function->isWindowFunction()) &&
expected_argument_type->equals(*recursiveRemoveLowCardinality(actual_argument_type)))
continue;
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Function {} expects {} argument to have {} type but receives {} after running {} pass",
function->toAST()->formatForErrorMessage(),
i + 1,
expected_argument_types[i]->getName(),
actual_argument_columns[i].type->getName(),
expected_argument_type->getName(),
actual_argument_type->getName(),
pass_name);
}
}
@ -231,6 +249,8 @@ void addQueryTreePasses(QueryTreePassManager & manager)
manager.addPass(std::make_unique<IfConstantConditionPass>());
manager.addPass(std::make_unique<IfChainToMultiIfPass>());
manager.addPass(std::make_unique<ComparisonTupleEliminationPass>());
manager.addPass(std::make_unique<OptimizeRedundantFunctionsInOrderByPass>());
manager.addPass(std::make_unique<OrderByTupleEliminationPass>());

View File

@ -170,7 +170,7 @@ SetPtr makeSetForConstantValue(const DataTypePtr & expression_type, const Field
value_type->getName());
}
auto set = std::make_shared<Set>(size_limits_for_set, false /*fill_set_elements*/, tranform_null_in);
auto set = std::make_shared<Set>(size_limits_for_set, true /*fill_set_elements*/, tranform_null_in);
set->setHeader(result_block.cloneEmpty().getColumnsWithTypeAndName());
set->insertFromBlock(result_block.getColumnsWithTypeAndName());

View File

@ -26,6 +26,13 @@ TableNode::TableNode(StoragePtr storage_, TableLockHolder storage_lock_, Storage
{
}
TableNode::TableNode(StoragePtr storage_, const ContextPtr & context)
: TableNode(storage_,
storage_->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout),
storage_->getStorageSnapshot(storage_->getInMemoryMetadataPtr(), context))
{
}
void TableNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const
{
buffer << std::string(indent, ' ') << "TABLE id: " << format_state.getNodeId(this);

View File

@ -29,6 +29,9 @@ public:
/// Construct table node with storage, storage lock, storage snapshot
explicit TableNode(StoragePtr storage_, TableLockHolder storage_lock_, StorageSnapshotPtr storage_snapshot_);
/// Construct table node with storage, context
explicit TableNode(StoragePtr storage_, const ContextPtr & context);
/// Get storage
const StoragePtr & getStorage() const
{

View File

@ -148,6 +148,16 @@ ASTPtr UnionNode::toASTImpl() const
select_with_union_query->children.push_back(getQueriesNode()->toAST());
select_with_union_query->list_of_selects = select_with_union_query->children.back();
if (is_subquery)
{
auto subquery = std::make_shared<ASTSubquery>();
subquery->cte_name = cte_name;
subquery->children.push_back(std::move(select_with_union_query));
return subquery;
}
return select_with_union_query;
}

View File

@ -10,10 +10,12 @@
#include <Functions/FunctionHelpers.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/IdentifierNode.h>
#include <Analyzer/ColumnNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/JoinNode.h>
#include <Analyzer/ArrayJoinNode.h>
#include <Analyzer/ColumnNode.h>
#include <Analyzer/TableNode.h>
#include <Analyzer/TableFunctionNode.h>
#include <Analyzer/QueryNode.h>
@ -90,6 +92,8 @@ static ASTPtr convertIntoTableExpressionAST(const QueryTreeNodePtr & table_expre
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Identifier for table expression must contain 1 or 2 parts. Actual '{}'",
identifier.getFullName());
table_expression_node_ast->setAlias(identifier_node.getAlias());
}
else
{
@ -242,6 +246,58 @@ QueryTreeNodes extractTableExpressions(const QueryTreeNodePtr & join_tree_node)
return result;
}
QueryTreeNodePtr extractLeftTableExpression(const QueryTreeNodePtr & join_tree_node)
{
QueryTreeNodePtr result;
std::deque<QueryTreeNodePtr> nodes_to_process;
nodes_to_process.push_back(join_tree_node);
while (!result)
{
auto node_to_process = std::move(nodes_to_process.front());
nodes_to_process.pop_front();
auto node_type = node_to_process->getNodeType();
switch (node_type)
{
case QueryTreeNodeType::TABLE:
[[fallthrough]];
case QueryTreeNodeType::QUERY:
[[fallthrough]];
case QueryTreeNodeType::UNION:
[[fallthrough]];
case QueryTreeNodeType::TABLE_FUNCTION:
{
result = std::move(node_to_process);
break;
}
case QueryTreeNodeType::ARRAY_JOIN:
{
auto & array_join_node = node_to_process->as<ArrayJoinNode &>();
nodes_to_process.push_front(array_join_node.getTableExpression());
break;
}
case QueryTreeNodeType::JOIN:
{
auto & join_node = node_to_process->as<JoinNode &>();
nodes_to_process.push_front(join_node.getLeftTableExpression());
break;
}
default:
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Unexpected node type for table expression. "
"Expected table, table function, query, union, join or array join. Actual {}",
node_to_process->getNodeTypeName());
}
}
}
return result;
}
namespace
{
@ -320,4 +376,105 @@ bool nestedIdentifierCanBeResolved(const DataTypePtr & compound_type, Identifier
return true;
}
namespace
{
class CheckFunctionExistsVisitor : public ConstInDepthQueryTreeVisitor<CheckFunctionExistsVisitor>
{
public:
explicit CheckFunctionExistsVisitor(std::string_view function_name_)
: function_name(function_name_)
{}
void visitImpl(const QueryTreeNodePtr & node)
{
if (has_function)
return;
auto * function_node = node->as<FunctionNode>();
if (!function_node)
return;
has_function = function_node->getFunctionName() == function_name;
}
bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node) const
{
if (has_function)
return false;
auto child_node_type = child_node->getNodeType();
return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION);
}
bool hasFunction() const
{
return has_function;
}
private:
std::string_view function_name;
bool has_function = false;
};
}
bool hasFunctionNode(const QueryTreeNodePtr & node, std::string_view function_name)
{
CheckFunctionExistsVisitor visitor(function_name);
visitor.visit(node);
return visitor.hasFunction();
}
namespace
{
class ReplaceColumnsVisitor : public InDepthQueryTreeVisitor<ReplaceColumnsVisitor>
{
public:
explicit ReplaceColumnsVisitor(const QueryTreeNodePtr & table_expression_node_,
const std::unordered_map<std::string, QueryTreeNodePtr> & column_name_to_node_)
: table_expression_node(table_expression_node_)
, column_name_to_node(column_name_to_node_)
{}
void visitImpl(QueryTreeNodePtr & node)
{
auto * column_node = node->as<ColumnNode>();
if (!column_node)
return;
auto column_source = column_node->getColumnSourceOrNull();
if (column_source != table_expression_node)
return;
auto node_it = column_name_to_node.find(column_node->getColumnName());
if (node_it == column_name_to_node.end())
return;
node = node_it->second;
}
static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node)
{
auto child_node_type = child_node->getNodeType();
return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION);
}
private:
QueryTreeNodePtr table_expression_node;
const std::unordered_map<std::string, QueryTreeNodePtr> & column_name_to_node;
};
}
void replaceColumns(QueryTreeNodePtr & node,
const QueryTreeNodePtr & table_expression_node,
const std::unordered_map<std::string, QueryTreeNodePtr> & column_name_to_node)
{
ReplaceColumnsVisitor visitor(table_expression_node, column_name_to_node);
visitor.visit(node);
}
}

View File

@ -19,6 +19,9 @@ void addTableExpressionOrJoinIntoTablesInSelectQuery(ASTPtr & tables_in_select_q
/// Extract table, table function, query, union from join tree
QueryTreeNodes extractTableExpressions(const QueryTreeNodePtr & join_tree_node);
/// Extract left table expression from join tree
QueryTreeNodePtr extractLeftTableExpression(const QueryTreeNodePtr & join_tree_node);
/** Build table expressions stack that consists from table, table function, query, union, join, array join from join tree.
*
* Example: SELECT * FROM t1 INNER JOIN t2 INNER JOIN t3.
@ -39,4 +42,27 @@ QueryTreeNodes buildTableExpressionsStack(const QueryTreeNodePtr & join_tree_nod
*/
bool nestedIdentifierCanBeResolved(const DataTypePtr & compound_type, IdentifierView nested_identifier);
/** Assert that there are no function nodes with specified function name in node children.
* Do not visit subqueries.
*/
void assertNoFunctionNodes(const QueryTreeNodePtr & node,
std::string_view function_name,
int exception_code,
std::string_view exception_function_name,
std::string_view exception_place_message);
/** Returns true if there is function node with specified function name in node children, false otherwise.
* Do not visit subqueries.
*/
bool hasFunctionNode(const QueryTreeNodePtr & node, std::string_view function_name);
/** Replace columns in node children.
* If there is column node and its source is specified table expression node and there is
* node for column name in map replace column node with node from map.
* Do not visit subqueries.
*/
void replaceColumns(QueryTreeNodePtr & node,
const QueryTreeNodePtr & table_expression_node,
const std::unordered_map<std::string, QueryTreeNodePtr> & column_name_to_node);
}

View File

@ -0,0 +1,282 @@
#include <Analyzer/ValidationUtils.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/ColumnNode.h>
#include <Analyzer/TableNode.h>
#include <Analyzer/QueryNode.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/AggregationUtils.h>
#include <Analyzer/WindowFunctionsUtils.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_AN_AGGREGATE;
extern const int NOT_IMPLEMENTED;
}
class ValidateGroupByColumnsVisitor : public ConstInDepthQueryTreeVisitor<ValidateGroupByColumnsVisitor>
{
public:
explicit ValidateGroupByColumnsVisitor(const QueryTreeNodes & group_by_keys_nodes_, const QueryTreeNodePtr & query_node_)
: group_by_keys_nodes(group_by_keys_nodes_)
, query_node(query_node_)
{}
void visitImpl(const QueryTreeNodePtr & node)
{
auto query_tree_node_type = node->getNodeType();
if (query_tree_node_type == QueryTreeNodeType::CONSTANT ||
query_tree_node_type == QueryTreeNodeType::SORT ||
query_tree_node_type == QueryTreeNodeType::INTERPOLATE)
return;
if (nodeIsAggregateFunctionOrInGroupByKeys(node))
return;
auto * function_node = node->as<FunctionNode>();
if (function_node && function_node->getFunctionName() == "grouping")
{
auto & grouping_function_arguments_nodes = function_node->getArguments().getNodes();
for (auto & grouping_function_arguments_node : grouping_function_arguments_nodes)
{
bool found_argument_in_group_by_keys = false;
for (const auto & group_by_key_node : group_by_keys_nodes)
{
if (grouping_function_arguments_node->isEqual(*group_by_key_node))
{
found_argument_in_group_by_keys = true;
break;
}
}
if (!found_argument_in_group_by_keys)
throw Exception(ErrorCodes::NOT_AN_AGGREGATE,
"GROUPING function argument {} is not in GROUP BY keys. In query {}",
grouping_function_arguments_node->formatASTForErrorMessage(),
query_node->formatASTForErrorMessage());
}
return;
}
auto * column_node = node->as<ColumnNode>();
if (!column_node)
return;
auto column_node_source = column_node->getColumnSource();
if (column_node_source->getNodeType() == QueryTreeNodeType::LAMBDA)
return;
throw Exception(ErrorCodes::NOT_AN_AGGREGATE,
"Column {} is not under aggregate function and not in GROUP BY keys. In query {}",
column_node->formatConvertedASTForErrorMessage(),
query_node->formatASTForErrorMessage());
}
bool needChildVisit(const QueryTreeNodePtr & parent_node, const QueryTreeNodePtr & child_node)
{
if (nodeIsAggregateFunctionOrInGroupByKeys(parent_node))
return false;
auto child_node_type = child_node->getNodeType();
return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION);
}
private:
bool nodeIsAggregateFunctionOrInGroupByKeys(const QueryTreeNodePtr & node) const
{
if (auto * function_node = node->as<FunctionNode>())
if (function_node->isAggregateFunction())
return true;
for (const auto & group_by_key_node : group_by_keys_nodes)
if (node->isEqual(*group_by_key_node, {.compare_aliases = false}))
return true;
return false;
}
const QueryTreeNodes & group_by_keys_nodes;
const QueryTreeNodePtr & query_node;
};
void validateAggregates(const QueryTreeNodePtr & query_node)
{
const auto & query_node_typed = query_node->as<QueryNode &>();
auto join_tree_node_type = query_node_typed.getJoinTree()->getNodeType();
bool join_tree_is_subquery = join_tree_node_type == QueryTreeNodeType::QUERY || join_tree_node_type == QueryTreeNodeType::UNION;
if (!join_tree_is_subquery)
{
assertNoAggregateFunctionNodes(query_node_typed.getJoinTree(), "in JOIN TREE");
assertNoGroupingFunctionNodes(query_node_typed.getJoinTree(), "in JOIN TREE");
assertNoWindowFunctionNodes(query_node_typed.getJoinTree(), "in JOIN TREE");
}
if (query_node_typed.hasWhere())
{
assertNoAggregateFunctionNodes(query_node_typed.getWhere(), "in WHERE");
assertNoGroupingFunctionNodes(query_node_typed.getWhere(), "in WHERE");
assertNoWindowFunctionNodes(query_node_typed.getWhere(), "in WHERE");
}
if (query_node_typed.hasPrewhere())
{
assertNoAggregateFunctionNodes(query_node_typed.getPrewhere(), "in PREWHERE");
assertNoGroupingFunctionNodes(query_node_typed.getPrewhere(), "in PREWHERE");
assertNoWindowFunctionNodes(query_node_typed.getPrewhere(), "in PREWHERE");
}
if (query_node_typed.hasHaving())
assertNoWindowFunctionNodes(query_node_typed.getHaving(), "in HAVING");
if (query_node_typed.hasWindow())
assertNoWindowFunctionNodes(query_node_typed.getWindowNode(), "in WINDOW");
QueryTreeNodes aggregate_function_nodes;
QueryTreeNodes window_function_nodes;
collectAggregateFunctionNodes(query_node, aggregate_function_nodes);
collectWindowFunctionNodes(query_node, window_function_nodes);
if (query_node_typed.hasGroupBy())
{
assertNoAggregateFunctionNodes(query_node_typed.getGroupByNode(), "in GROUP BY");
assertNoGroupingFunctionNodes(query_node_typed.getGroupByNode(), "in GROUP BY");
assertNoWindowFunctionNodes(query_node_typed.getGroupByNode(), "in GROUP BY");
}
for (auto & aggregate_function_node : aggregate_function_nodes)
{
auto & aggregate_function_node_typed = aggregate_function_node->as<FunctionNode &>();
assertNoAggregateFunctionNodes(aggregate_function_node_typed.getArgumentsNode(), "inside another aggregate function");
assertNoGroupingFunctionNodes(aggregate_function_node_typed.getArgumentsNode(), "inside another aggregate function");
assertNoWindowFunctionNodes(aggregate_function_node_typed.getArgumentsNode(), "inside an aggregate function");
}
for (auto & window_function_node : window_function_nodes)
{
auto & window_function_node_typed = window_function_node->as<FunctionNode &>();
assertNoWindowFunctionNodes(window_function_node_typed.getArgumentsNode(), "inside another window function");
if (query_node_typed.hasWindow())
assertNoWindowFunctionNodes(window_function_node_typed.getWindowNode(), "inside window definition");
}
QueryTreeNodes group_by_keys_nodes;
group_by_keys_nodes.reserve(query_node_typed.getGroupBy().getNodes().size());
for (const auto & node : query_node_typed.getGroupBy().getNodes())
{
if (query_node_typed.isGroupByWithGroupingSets())
{
auto & grouping_set_keys = node->as<ListNode &>();
for (auto & grouping_set_key : grouping_set_keys.getNodes())
{
if (grouping_set_key->as<ConstantNode>())
continue;
group_by_keys_nodes.push_back(grouping_set_key);
}
}
else
{
if (node->as<ConstantNode>())
continue;
group_by_keys_nodes.push_back(node);
}
}
if (query_node_typed.getGroupBy().getNodes().empty())
{
if (query_node_typed.hasHaving())
assertNoGroupingFunctionNodes(query_node_typed.getHaving(), "in HAVING without GROUP BY");
if (query_node_typed.hasOrderBy())
assertNoGroupingFunctionNodes(query_node_typed.getOrderByNode(), "in ORDER BY without GROUP BY");
assertNoGroupingFunctionNodes(query_node_typed.getProjectionNode(), "in SELECT without GROUP BY");
}
bool has_aggregation = !query_node_typed.getGroupBy().getNodes().empty() || !aggregate_function_nodes.empty();
if (has_aggregation)
{
ValidateGroupByColumnsVisitor validate_group_by_columns_visitor(group_by_keys_nodes, query_node);
if (query_node_typed.hasHaving())
validate_group_by_columns_visitor.visit(query_node_typed.getHaving());
if (query_node_typed.hasOrderBy())
validate_group_by_columns_visitor.visit(query_node_typed.getOrderByNode());
validate_group_by_columns_visitor.visit(query_node_typed.getProjectionNode());
}
bool aggregation_with_rollup_or_cube_or_grouping_sets = query_node_typed.isGroupByWithRollup() ||
query_node_typed.isGroupByWithCube() ||
query_node_typed.isGroupByWithGroupingSets();
if (!has_aggregation && (query_node_typed.isGroupByWithTotals() || aggregation_with_rollup_or_cube_or_grouping_sets))
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "WITH TOTALS, ROLLUP, CUBE or GROUPING SETS are not supported without aggregation");
}
namespace
{
class ValidateFunctionNodesVisitor : public ConstInDepthQueryTreeVisitor<ValidateFunctionNodesVisitor>
{
public:
explicit ValidateFunctionNodesVisitor(std::string_view function_name_,
int exception_code_,
std::string_view exception_function_name_,
std::string_view exception_place_message_)
: function_name(function_name_)
, exception_code(exception_code_)
, exception_function_name(exception_function_name_)
, exception_place_message(exception_place_message_)
{}
void visitImpl(const QueryTreeNodePtr & node)
{
auto * function_node = node->as<FunctionNode>();
if (function_node && function_node->getFunctionName() == function_name)
throw Exception(exception_code,
"{} function {} is found {} in query",
exception_function_name,
function_node->formatASTForErrorMessage(),
exception_place_message);
}
static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node)
{
auto child_node_type = child_node->getNodeType();
return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION);
}
private:
std::string_view function_name;
int exception_code = 0;
std::string_view exception_function_name;
std::string_view exception_place_message;
};
}
void assertNoFunctionNodes(const QueryTreeNodePtr & node,
std::string_view function_name,
int exception_code,
std::string_view exception_function_name,
std::string_view exception_place_message)
{
ValidateFunctionNodesVisitor visitor(function_name, exception_code, exception_function_name, exception_place_message);
visitor.visit(node);
}
}

View File

@ -0,0 +1,29 @@
#pragma once
#include <Analyzer/IQueryTreeNode.h>
namespace DB
{
/** Validate aggregates in query node.
*
* 1. Check that there are no aggregate functions and GROUPING function in JOIN TREE, WHERE, PREWHERE, in another aggregate functions.
* 2. Check that there are no window functions in JOIN TREE, WHERE, PREWHERE, HAVING, WINDOW, inside another aggregate function,
* inside window function arguments, inside window function window definition.
* 3. Check that there are no columns that are not specified in GROUP BY keys in HAVING, ORDER BY, PROJECTION.
* 4. Check that there are no GROUPING functions that have arguments that are not specified in GROUP BY keys in HAVING, ORDER BY,
* PROJECTION.
* 5. Throws exception if there is GROUPING SETS or ROLLUP or CUBE or WITH TOTALS without aggregation.
*/
void validateAggregates(const QueryTreeNodePtr & query_node);
/** Assert that there are no function nodes with specified function name in node children.
* Do not visit subqueries.
*/
void assertNoFunctionNodes(const QueryTreeNodePtr & node,
std::string_view function_name,
int exception_code,
std::string_view exception_function_name,
std::string_view exception_place_message);
}

View File

@ -26,8 +26,15 @@ public:
: assert_no_window_functions_place_message(std::move(assert_no_window_functions_place_message_))
{}
explicit CollectWindowFunctionNodeVisitor(bool only_check_)
: only_check(only_check_)
{}
void visitImpl(const QueryTreeNodePtr & node)
{
if (only_check && has_window_functions)
return;
auto * function_node = node->as<FunctionNode>();
if (!function_node || !function_node->isWindowFunction())
return;
@ -40,16 +47,28 @@ public:
if (window_function_nodes)
window_function_nodes->push_back(node);
has_window_functions = true;
}
static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node)
bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node) const
{
return !(child_node->getNodeType() == QueryTreeNodeType::QUERY || child_node->getNodeType() == QueryTreeNodeType::UNION);
if (only_check && has_window_functions)
return false;
auto child_node_type = child_node->getNodeType();
return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION);
}
bool hasWindowFunctions() const
{
return has_window_functions;
}
private:
QueryTreeNodes * window_function_nodes = nullptr;
String assert_no_window_functions_place_message;
bool only_check = false;
bool has_window_functions = false;
};
}
@ -63,6 +82,14 @@ QueryTreeNodes collectWindowFunctionNodes(const QueryTreeNodePtr & node)
return window_function_nodes;
}
bool hasWindowFunctionNodes(const QueryTreeNodePtr & node)
{
CollectWindowFunctionNodeVisitor visitor(true /*only_check*/);
visitor.visit(node);
return visitor.hasWindowFunctions();
}
void collectWindowFunctionNodes(const QueryTreeNodePtr & node, QueryTreeNodes & result)
{
CollectWindowFunctionNodeVisitor visitor(&result);

View File

@ -15,6 +15,11 @@ QueryTreeNodes collectWindowFunctionNodes(const QueryTreeNodePtr & node);
*/
void collectWindowFunctionNodes(const QueryTreeNodePtr & node, QueryTreeNodes & result);
/** Returns true if there are window function nodes in node children, false otherwise.
* Do not visit subqueries.
*/
bool hasWindowFunctionNodes(const QueryTreeNodePtr & node);
/** Assert that there are no window function nodes in node children.
* Do not visit subqueries.
*/

View File

@ -34,14 +34,7 @@ LocalConnection::LocalConnection(ContextPtr context_, bool send_progress_, bool
LocalConnection::~LocalConnection()
{
try
{
state.reset();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
state.reset();
}
bool LocalConnection::hasReadPendingData() const

View File

@ -310,6 +310,13 @@ MutableColumnPtr ColumnLowCardinality::cloneResized(size_t size) const
return ColumnLowCardinality::create(IColumn::mutate(std::move(unique_ptr)), getIndexes().cloneResized(size));
}
MutableColumnPtr ColumnLowCardinality::cloneNullable() const
{
auto res = cloneFinalized();
assert_cast<ColumnLowCardinality &>(*res).nestedToNullable();
return res;
}
int ColumnLowCardinality::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator) const
{
const auto & low_cardinality_column = assert_cast<const ColumnLowCardinality &>(rhs);
@ -830,4 +837,11 @@ void ColumnLowCardinality::Dictionary::compact(ColumnPtr & positions)
shared = false;
}
bool isColumnLowCardinalityNullable(const IColumn & column)
{
if (const auto * lc_column = checkAndGetColumn<ColumnLowCardinality>(column))
return lc_column->nestedIsNullable();
return false;
}
}

View File

@ -219,6 +219,7 @@ public:
bool nestedCanBeInsideNullable() const { return dictionary.getColumnUnique().getNestedColumn()->canBeInsideNullable(); }
void nestedToNullable() { dictionary.getColumnUnique().nestedToNullable(); }
void nestedRemoveNullable() { dictionary.getColumnUnique().nestedRemoveNullable(); }
MutableColumnPtr cloneNullable() const;
const IColumnUnique & getDictionary() const { return dictionary.getColumnUnique(); }
IColumnUnique & getDictionary() { return dictionary.getColumnUnique(); }
@ -360,5 +361,7 @@ private:
void getPermutationImpl(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, Permutation & res, const Collator * collator = nullptr) const;
};
bool isColumnLowCardinalityNullable(const IColumn & column);
}

View File

@ -8,6 +8,7 @@
#include <Columns/ColumnConst.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnCompressed.h>
#include <Columns/ColumnLowCardinality.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#if USE_EMBEDDED_COMPILER
@ -792,6 +793,23 @@ ColumnPtr makeNullable(const ColumnPtr & column)
return ColumnNullable::create(column, ColumnUInt8::create(column->size(), 0));
}
ColumnPtr makeNullableOrLowCardinalityNullable(const ColumnPtr & column)
{
if (isColumnNullable(*column))
return column;
if (isColumnLowCardinalityNullable(*column))
return column;
if (isColumnConst(*column))
return ColumnConst::create(makeNullable(assert_cast<const ColumnConst &>(*column).getDataColumnPtr()), column->size());
if (column->lowCardinality())
return assert_cast<const ColumnLowCardinality &>(*column).cloneNullable();
return ColumnNullable::create(column, ColumnUInt8::create(column->size(), 0));
}
ColumnPtr makeNullableSafe(const ColumnPtr & column)
{
if (isColumnNullable(*column))

View File

@ -220,5 +220,6 @@ private:
ColumnPtr makeNullable(const ColumnPtr & column);
ColumnPtr makeNullableSafe(const ColumnPtr & column);
ColumnPtr makeNullableOrLowCardinalityNullable(const ColumnPtr & column);
}

View File

@ -182,7 +182,7 @@ TEST(WeakHash32, ColumnVectorI32)
for (int idx [[maybe_unused]] : {1, 2})
{
for (int32_t i = -32768; i < 32768; ++i)
data.push_back(i << 16); //-V610
data.push_back(i << 16);
}
WeakHash32 hash(col->size());
@ -216,7 +216,7 @@ TEST(WeakHash32, ColumnVectorI64)
for (int idx [[maybe_unused]] : {1, 2})
{
for (int64_t i = -32768; i < 32768; ++i)
data.push_back(i << 32); //-V610
data.push_back(i << 32);
}
WeakHash32 hash(col->size());
@ -258,7 +258,7 @@ TEST(WeakHash32, ColumnVectorI128)
for (int idx [[maybe_unused]] : {1, 2})
{
for (int64_t i = -32768; i < 32768; ++i)
data.push_back(i << 32); //-V610
data.push_back(i << 32);
}
WeakHash32 hash(col->size());
@ -275,7 +275,7 @@ TEST(WeakHash32, ColumnDecimal32)
for (int idx [[maybe_unused]] : {1, 2})
{
for (int32_t i = -32768; i < 32768; ++i)
data.push_back(i << 16); //-V610
data.push_back(i << 16);
}
WeakHash32 hash(col->size());
@ -292,7 +292,7 @@ TEST(WeakHash32, ColumnDecimal64)
for (int idx [[maybe_unused]] : {1, 2})
{
for (int64_t i = -32768; i < 32768; ++i)
data.push_back(i << 32); //-V610
data.push_back(i << 32);
}
WeakHash32 hash(col->size());
@ -309,7 +309,7 @@ TEST(WeakHash32, ColumnDecimal128)
for (int idx [[maybe_unused]] : {1, 2})
{
for (int64_t i = -32768; i < 32768; ++i)
data.push_back(i << 32); //-V610
data.push_back(i << 32);
}
WeakHash32 hash(col->size());

View File

@ -17,7 +17,7 @@ class DateLUT : private boost::noncopyable
{
public:
/// Return singleton DateLUTImpl instance for the default time zone.
static ALWAYS_INLINE const DateLUTImpl & instance() // -V1071
static ALWAYS_INLINE const DateLUTImpl & instance()
{
const auto & date_lut = getInstance();
return *date_lut.default_impl.load(std::memory_order_acquire);

View File

@ -1083,7 +1083,7 @@ bool Dwarf::findLocation(
// file+line of the non-inlined outer function making the call.
// locationInfo.name is already set by the caller by looking up the
// non-inlined function @address belongs to.
info.has_file_and_line = true; //-V1048
info.has_file_and_line = true;
info.file = call_locations[0].file;
info.line = call_locations[0].line;
@ -1783,7 +1783,7 @@ void Dwarf::LineNumberVM::init()
lineRange_ = read<uint8_t>(header);
opcodeBase_ = read<uint8_t>(header);
SAFE_CHECK(opcodeBase_ != 0, "invalid opcode base");
standardOpcodeLengths_ = reinterpret_cast<const uint8_t *>(header.data()); //-V506
standardOpcodeLengths_ = reinterpret_cast<const uint8_t *>(header.data());
header.remove_prefix(opcodeBase_ - 1);
if (version_ <= 4)

View File

@ -44,7 +44,7 @@ struct ClearableHashTableCell : public BaseCell
/// Do I need to store the zero key separately (that is, can a zero key be inserted into the hash table).
static constexpr bool need_zero_value_storage = false;
ClearableHashTableCell() {} //-V730 /// NOLINT
ClearableHashTableCell() {} /// NOLINT
ClearableHashTableCell(const Key & key_, const State & state) : BaseCell(key_, state), version(state.version) {}
};
@ -68,7 +68,7 @@ struct ClearableHashTableCell<StringRef, StringRefBaseCell> : public StringRefBa
/// Do I need to store the zero key separately (that is, can a zero key be inserted into the hash table).
static constexpr bool need_zero_value_storage = true;
ClearableHashTableCell() { } //-V730 /// NOLINT
ClearableHashTableCell() { } /// NOLINT
ClearableHashTableCell(const StringRef & key_, const State & state) : StringRefBaseCell(key_, state), version(state.version) { }
};

View File

@ -13,7 +13,7 @@ struct FixedClearableHashTableCell
using mapped_type = VoidMapped;
UInt32 version;
FixedClearableHashTableCell() {} //-V730 /// NOLINT
FixedClearableHashTableCell() {} /// NOLINT
FixedClearableHashTableCell(const Key &, const State & state) : version(state.version) {}
const VoidKey getKey() const { return {}; } /// NOLINT

View File

@ -16,7 +16,7 @@ struct FixedHashMapCell
bool full;
Mapped mapped;
FixedHashMapCell() {} //-V730 /// NOLINT
FixedHashMapCell() {} /// NOLINT
FixedHashMapCell(const Key &, const State &) : full(true) {}
FixedHashMapCell(const value_type & value_, const State &) : full(true), mapped(value_.second) {}
@ -31,7 +31,7 @@ struct FixedHashMapCell
/// Note that we have to assemble a continuous layout for the value_type on each call of getValue().
struct CellExt
{
CellExt() {} //-V730 /// NOLINT
CellExt() {} /// NOLINT
CellExt(Key && key_, const FixedHashMapCell * ptr_) : key(key_), ptr(const_cast<FixedHashMapCell *>(ptr_)) {}
void update(Key && key_, const FixedHashMapCell * ptr_)
{
@ -76,7 +76,7 @@ struct FixedHashMapImplicitZeroCell
/// Note that we have to assemble a continuous layout for the value_type on each call of getValue().
struct CellExt
{
CellExt() {} //-V730 /// NOLINT
CellExt() {} /// NOLINT
CellExt(Key && key_, const FixedHashMapImplicitZeroCell * ptr_) : key(key_), ptr(const_cast<FixedHashMapImplicitZeroCell *>(ptr_)) {}
void update(Key && key_, const FixedHashMapImplicitZeroCell * ptr_)
{

View File

@ -19,7 +19,7 @@ struct FixedHashTableCell
using mapped_type = VoidMapped;
bool full;
FixedHashTableCell() {} //-V730 /// NOLINT
FixedHashTableCell() {} /// NOLINT
FixedHashTableCell(const Key &, const State &) : full(true) {}
const VoidKey getKey() const { return {}; } /// NOLINT

View File

@ -121,8 +121,8 @@ struct HashSetCellWithSavedHash : public HashTableCell<Key, Hash, TState>
size_t saved_hash;
HashSetCellWithSavedHash() : Base() {} //-V730
HashSetCellWithSavedHash(const Key & key_, const typename Base::State & state) : Base(key_, state) {} //-V730
HashSetCellWithSavedHash() : Base() {}
HashSetCellWithSavedHash(const Key & key_, const typename Base::State & state) : Base(key_, state) {}
bool keyEquals(const Key & key_) const { return bitEquals(this->key, key_); }
bool keyEquals(const Key & key_, size_t hash_) const { return saved_hash == hash_ && bitEquals(this->key, key_); }

View File

@ -369,7 +369,7 @@ template <bool need_zero_value_storage, typename Cell>
struct ZeroValueStorage;
template <typename Cell>
struct ZeroValueStorage<true, Cell> //-V730
struct ZeroValueStorage<true, Cell>
{
private:
bool has_zero = false;

View File

@ -92,7 +92,7 @@ struct StringHashTableHash
};
template <typename Cell>
struct StringHashTableEmpty //-V730
struct StringHashTableEmpty
{
using Self = StringHashTableEmpty;

View File

@ -95,7 +95,7 @@ class JSONMap : public IItem
};
public:
void add(std::string key, ItemPtr value) { values.emplace_back(Pair{.key = std::move(key), .value = std::move(value)}); } //-V1030
void add(std::string key, ItemPtr value) { values.emplace_back(Pair{.key = std::move(key), .value = std::move(value)}); }
void add(std::string key, std::string value) { add(std::move(key), std::make_unique<JSONString>(std::move(value))); }
void add(std::string key, const char * value) { add(std::move(key), std::make_unique<JSONString>(value)); }
void add(std::string key, std::string_view value) { add(std::move(key), std::make_unique<JSONString>(value)); }

View File

@ -320,8 +320,8 @@ TracingContextHolder::TracingContextHolder(
while (_parent_trace_context.trace_id == UUID())
{
// Make sure the random generated trace_id is not 0 which is an invalid id.
_parent_trace_context.trace_id.toUnderType().items[0] = thread_local_rng(); //-V656
_parent_trace_context.trace_id.toUnderType().items[1] = thread_local_rng(); //-V656
_parent_trace_context.trace_id.toUnderType().items[0] = thread_local_rng();
_parent_trace_context.trace_id.toUnderType().items[1] = thread_local_rng();
}
_parent_trace_context.span_id = 0;
}

View File

@ -381,7 +381,7 @@ void PoolWithFailoverBase<TNestedPool>::updateErrorCounts(PoolWithFailoverBase<T
{
time_t current_time = time(nullptr);
if (last_decrease_time) //-V1051
if (last_decrease_time)
{
time_t delta = current_time - last_decrease_time;

View File

@ -86,7 +86,7 @@ public:
struct Counter
{
Counter() = default; //-V730
Counter() = default;
explicit Counter(const TKey & k, UInt64 c = 0, UInt64 e = 0, size_t h = 0)
: key(k), slot(0), hash(h), count(c), error(e) {}

View File

@ -307,7 +307,7 @@ DECLARE_AVX512VBMI2_SPECIFIC_CODE(
* class TestClass
* {
* public:
* MULTITARGET_FUNCTION_AVX2_SSE42(
* MULTITARGET_FUNCTION_AVX512BW_AVX512F_AVX2_SSE42(
* MULTITARGET_FUNCTION_HEADER(int), testFunctionImpl, MULTITARGET_FUNCTION_BODY((int value)
* {
* return value;
@ -315,7 +315,15 @@ DECLARE_AVX512VBMI2_SPECIFIC_CODE(
* )
*
* void testFunction(int value) {
* if (isArchSupported(TargetArch::AVX2))
* if (isArchSupported(TargetArch::AVX512BW))
* {
* testFunctionImplAVX512BW(value);
* }
* else if (isArchSupported(TargetArch::AVX512F))
* {
* testFunctionImplAVX512F(value);
* }
* else if (isArchSupported(TargetArch::AVX2))
* {
* testFunctionImplAVX2(value);
* }
@ -341,7 +349,19 @@ DECLARE_AVX512VBMI2_SPECIFIC_CODE(
#if ENABLE_MULTITARGET_CODE && defined(__GNUC__) && defined(__x86_64__)
/// NOLINTNEXTLINE
#define MULTITARGET_FUNCTION_AVX2_SSE42(FUNCTION_HEADER, name, FUNCTION_BODY) \
#define MULTITARGET_FUNCTION_AVX512BW_AVX512F_AVX2_SSE42(FUNCTION_HEADER, name, FUNCTION_BODY) \
FUNCTION_HEADER \
\
AVX512BW_FUNCTION_SPECIFIC_ATTRIBUTE \
name##AVX512BW \
FUNCTION_BODY \
\
FUNCTION_HEADER \
\
AVX512_FUNCTION_SPECIFIC_ATTRIBUTE \
name##AVX512F \
FUNCTION_BODY \
\
FUNCTION_HEADER \
\
AVX2_FUNCTION_SPECIFIC_ATTRIBUTE \
@ -362,7 +382,7 @@ DECLARE_AVX512VBMI2_SPECIFIC_CODE(
#else
/// NOLINTNEXTLINE
#define MULTITARGET_FUNCTION_AVX2_SSE42(FUNCTION_HEADER, name, FUNCTION_BODY) \
#define MULTITARGET_FUNCTION_AVX512BW_AVX512F_AVX2_SSE42(FUNCTION_HEADER, name, FUNCTION_BODY) \
FUNCTION_HEADER \
\
name \

View File

@ -204,7 +204,7 @@ public:
void updateCounters(ProfileEvents::Counters & profile_events);
private:
::taskstats stats; //-V730_NOINIT
::taskstats stats;
std::function<::taskstats()> stats_getter;
explicit TasksStatsCounters(UInt64 tid, MetricsProvider provider);

View File

@ -193,8 +193,8 @@ namespace VolnitskyTraits
chars.c1 = seq_l[seq_ngram_offset + 1];
putNGramBase(n, offset);
chars.c0 = seq_r[seq_ngram_offset]; //-V519
chars.c1 = seq_r[seq_ngram_offset + 1]; //-V519
chars.c0 = seq_r[seq_ngram_offset];
chars.c1 = seq_r[seq_ngram_offset + 1];
putNGramBase(n, offset);
}
@ -317,7 +317,7 @@ namespace VolnitskyTraits
{
/// ngram for Ul
chars.c0 = c0u;
chars.c1 = c1l; //-V1048
chars.c1 = c1l;
putNGramBase(n, offset);
}

View File

@ -212,7 +212,7 @@ std::pair<ResponsePtr, Undo> TestKeeperCreateRequest::process(TestKeeper::Contai
else
{
TestKeeper::Node created_node;
created_node.seq_num = 0; //-V1048
created_node.seq_num = 0;
created_node.stat.czxid = zxid;
created_node.stat.mzxid = zxid;
created_node.stat.ctime = std::chrono::system_clock::now().time_since_epoch() / std::chrono::milliseconds(1);
@ -286,7 +286,7 @@ std::pair<ResponsePtr, Undo> TestKeeperRemoveRequest::process(TestKeeper::Contai
auto & parent = container.at(parentPath(path));
--parent.stat.numChildren;
++parent.stat.cversion;
response.error = Error::ZOK; //-V1048
response.error = Error::ZOK;
undo = [prev_node, &container, path = path]
{
@ -308,7 +308,7 @@ std::pair<ResponsePtr, Undo> TestKeeperExistsRequest::process(TestKeeper::Contai
if (it != container.end())
{
response.stat = it->second.stat;
response.error = Error::ZOK; //-V1048
response.error = Error::ZOK;
}
else
{
@ -331,7 +331,7 @@ std::pair<ResponsePtr, Undo> TestKeeperGetRequest::process(TestKeeper::Container
{
response.stat = it->second.stat;
response.data = it->second.data;
response.error = Error::ZOK; //-V1048
response.error = Error::ZOK;
}
return { std::make_shared<GetResponse>(response), {} };
@ -358,7 +358,7 @@ std::pair<ResponsePtr, Undo> TestKeeperSetRequest::process(TestKeeper::Container
it->second.data = data;
++container.at(parentPath(path)).stat.cversion;
response.stat = it->second.stat;
response.error = Error::ZOK; //-V1048
response.error = Error::ZOK;
undo = [prev_node, &container, path = path]
{
@ -412,7 +412,7 @@ std::pair<ResponsePtr, Undo> TestKeeperListRequest::process(TestKeeper::Containe
}
response.stat = it->second.stat;
response.error = Error::ZOK; //-V1048
response.error = Error::ZOK;
}
return { std::make_shared<ListResponse>(response), {} };
@ -432,7 +432,7 @@ std::pair<ResponsePtr, Undo> TestKeeperCheckRequest::process(TestKeeper::Contain
}
else
{
response.error = Error::ZOK; //-V1048
response.error = Error::ZOK;
}
return { std::make_shared<CheckResponse>(response), {} };
@ -455,7 +455,7 @@ std::pair<ResponsePtr, Undo> TestKeeperMultiRequest::process(TestKeeper::Contain
try
{
auto request_it = requests.begin();
response.error = Error::ZOK; //-V1048
response.error = Error::ZOK;
while (request_it != requests.end())
{
const TestKeeperRequest & concrete_request = dynamic_cast<const TestKeeperRequest &>(**request_it);

View File

@ -826,7 +826,7 @@ void ZooKeeper::receiveEvent()
if (length != actual_length)
throw Exception(Error::ZMARSHALLINGERROR, "Response length doesn't match. Expected: {}, actual: {}", length, actual_length);
logOperationIfNeeded(request_info.request, response, /* finalize= */ false, elapsed_ms); //-V614
logOperationIfNeeded(request_info.request, response, /* finalize= */ false, elapsed_ms);
}
catch (...)
{

View File

@ -6,10 +6,16 @@
# include <fstream>
#endif
#include <boost/algorithm/string/trim.hpp>
#include <thread>
#include <set>
namespace
{
#if defined(OS_LINUX)
static int32_t readFrom(const char * filename, int default_value)
int32_t readFrom(const char * filename, int default_value)
{
std::ifstream infile(filename);
if (!infile.is_open())
@ -22,7 +28,7 @@ static int32_t readFrom(const char * filename, int default_value)
}
/// Try to look at cgroups limit if it is available.
static uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count)
uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count)
{
uint32_t quota_count = default_cpu_count;
/// Return the number of milliseconds per period process is guaranteed to run.
@ -36,20 +42,73 @@ static uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count)
}
#endif
static unsigned getNumberOfPhysicalCPUCoresImpl()
/// Returns number of physical cores, unlike std::thread::hardware_concurrency() which returns the logical core count. With 2-way SMT
/// (HyperThreading) enabled, physical_concurrency() returns half of of std::thread::hardware_concurrency(), otherwise return the same.
#if defined(__x86_64__) && defined(OS_LINUX)
unsigned physical_concurrency()
try
{
unsigned cpu_count = std::thread::hardware_concurrency();
/// The CPUID instruction isn't reliable across different vendors and CPU models. The best option to get the physical core count is
/// to parse /proc/cpuinfo. boost::thread::physical_concurrency() does the same, so use their implementation.
///
/// See https://doc.callmematthi.eu/static/webArticles/Understanding%20Linux%20_proc_cpuinfo.pdf
std::ifstream proc_cpuinfo("/proc/cpuinfo");
/// Most of x86_64 CPUs have 2-way Hyper-Threading
if (!proc_cpuinfo.is_open())
/// In obscure cases (chroot) /proc can be unmounted
return std::thread::hardware_concurrency();
using CoreEntry = std::pair<size_t, size_t>; /// physical id, core id
using CoreEntrySet = std::set<CoreEntry>;
CoreEntrySet core_entries;
CoreEntry cur_core_entry;
std::string line;
while (std::getline(proc_cpuinfo, line))
{
size_t pos = line.find(std::string(":"));
if (pos == std::string::npos)
continue;
std::string key = line.substr(0, pos);
std::string val = line.substr(pos + 1);
if (key.find("physical id") != std::string::npos)
{
cur_core_entry.first = std::stoi(val);
continue;
}
if (key.find("core id") != std::string::npos)
{
cur_core_entry.second = std::stoi(val);
core_entries.insert(cur_core_entry);
continue;
}
}
return core_entries.empty() ? /*unexpected format*/ std::thread::hardware_concurrency() : static_cast<unsigned>(core_entries.size());
}
catch (...)
{
return std::thread::hardware_concurrency(); /// parsing error
}
#endif
unsigned getNumberOfPhysicalCPUCoresImpl()
{
unsigned cpu_count = std::thread::hardware_concurrency(); /// logical cores (with SMT/HyperThreading)
/// Most x86_64 CPUs have 2-way SMT (Hyper-Threading).
/// Aarch64 and RISC-V don't have SMT so far.
/// POWER has SMT and it can be multiple way (like 8-way), but we don't know how ClickHouse really behaves, so use all of them.
/// POWER has SMT and it can be multi-way (e.g. 8-way), but we don't know how ClickHouse really behaves, so use all of them.
#if defined(__x86_64__)
/// Let's limit ourself to the number of physical cores.
/// But if the number of logical cores is small - maybe it is a small machine
/// or very limited cloud instance and it is reasonable to use all the cores.
#if defined(__x86_64__) && defined(OS_LINUX)
/// On really big machines, SMT is detrimental to performance (+ ~5% overhead in ClickBench). On such machines, we limit ourself to the physical cores.
/// Few cores indicate it is a small machine, runs in a VM or is a limited cloud instance --> it is reasonable to use all the cores.
if (cpu_count >= 32)
cpu_count /= 2;
cpu_count = physical_concurrency();
#endif
#if defined(OS_LINUX)
@ -59,6 +118,8 @@ static unsigned getNumberOfPhysicalCPUCoresImpl()
return cpu_count;
}
}
unsigned getNumberOfPhysicalCPUCores()
{
/// Calculate once.

View File

@ -32,17 +32,16 @@ static void dummyFunctionForInterposing() __attribute__((used));
static void dummyFunctionForInterposing()
{
void* dummy;
/// Suppression for PVS-Studio and clang-tidy.
free(nullptr); // -V575 NOLINT
ignore(malloc(0)); // -V575 NOLINT
ignore(calloc(0, 0)); // -V575 NOLINT
ignore(realloc(nullptr, 0)); // -V575 NOLINT
ignore(posix_memalign(&dummy, 0, 0)); // -V575 NOLINT
ignore(aligned_alloc(1, 0)); // -V575 NOLINT
ignore(valloc(0)); // -V575 NOLINT
ignore(memalign(0, 0)); // -V575 NOLINT
free(nullptr); // NOLINT
ignore(malloc(0)); // NOLINT
ignore(calloc(0, 0)); // NOLINT
ignore(realloc(nullptr, 0)); // NOLINT
ignore(posix_memalign(&dummy, 0, 0)); // NOLINT
ignore(aligned_alloc(1, 0)); // NOLINT
ignore(valloc(0)); // NOLINT
ignore(memalign(0, 0)); // NOLINT
#if !defined(USE_MUSL)
ignore(pvalloc(0)); // -V575 NOLINT
ignore(pvalloc(0)); // NOLINT
#endif
}
#endif

View File

@ -25,7 +25,7 @@ TEST(ThreadPool, GlobalFull1)
std::atomic<size_t> counter = 0;
static constexpr size_t num_jobs = capacity + 1;
auto func = [&] { ++counter; while (counter != num_jobs) {} }; //-V776
auto func = [&] { ++counter; while (counter != num_jobs) {} };
ThreadPool pool(num_jobs);
@ -63,7 +63,7 @@ TEST(ThreadPool, GlobalFull2)
global_pool.wait();
std::atomic<size_t> counter = 0;
auto func = [&] { ++counter; while (counter != capacity + 1) {} }; //-V776
auto func = [&] { ++counter; while (counter != capacity + 1) {} };
ThreadPool pool(capacity, 0, capacity);
for (size_t i = 0; i < capacity; ++i)

View File

@ -1130,7 +1130,7 @@ template <typename ValueType>
auto DDCompatibilityTestSequence()
{
// Generates sequences with double delta in given range.
auto dd_generator = [prev_delta = static_cast<Int64>(0), prev = static_cast<Int64>(0)](auto dd) mutable //-V788
auto dd_generator = [prev_delta = static_cast<Int64>(0), prev = static_cast<Int64>(0)](auto dd) mutable
{
const auto curr = dd + prev + prev_delta;
prev = curr;

View File

@ -465,7 +465,7 @@ bool hasErrorsInMultiRequest(Coordination::ZooKeeperRequestPtr request)
if (request == nullptr)
return true;
for (const auto & subrequest : dynamic_cast<Coordination::ZooKeeperMultiRequest *>(request.get())->requests) // -V522
for (const auto & subrequest : dynamic_cast<Coordination::ZooKeeperMultiRequest *>(request.get())->requests)
if (subrequest == nullptr)
return true;
return false;

View File

@ -1018,7 +1018,6 @@ struct DefineAliases
\
template class BaseSettings<SETTINGS_TRAITS_NAME>;
//-V:IMPLEMENT_SETTINGS:501
/// NOLINTNEXTLINE
#define IMPLEMENT_SETTINGS_TRAITS_(TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS) \
res.field_infos.emplace_back( \

Some files were not shown because too many files have changed in this diff Show More