Merge remote-tracking branch 'origin/master' into tmp

This commit is contained in:
Alexander Kuzmenkov 2021-01-14 18:50:34 +03:00
commit 828dea83cb
82 changed files with 1314 additions and 390 deletions

View File

@ -112,11 +112,13 @@ static void writeSignalIDtoSignalPipe(int sig)
/** Signal handler for HUP / USR1 */
static void closeLogsSignalHandler(int sig, siginfo_t *, void *)
{
DENY_ALLOCATIONS_IN_SCOPE;
writeSignalIDtoSignalPipe(sig);
}
static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *)
{
DENY_ALLOCATIONS_IN_SCOPE;
writeSignalIDtoSignalPipe(sig);
}
@ -125,6 +127,7 @@ static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *)
*/
static void signalHandler(int sig, siginfo_t * info, void * context)
{
DENY_ALLOCATIONS_IN_SCOPE;
auto saved_errno = errno; /// We must restore previous value of errno in signal handler.
char buf[signal_pipe_buf_size];

View File

@ -4,5 +4,5 @@ alpine-root/install/*
# docs (looks useless)
alpine-root/usr/share/doc/*
# packages, etc. (used by prepare.sh)
alpine-root/tgz-packages/*
# packages, etc. (used by alpine-build.sh)
tgz-packages/*

View File

@ -1 +1,2 @@
alpine-root/*
tgz-packages/*

View File

@ -16,7 +16,7 @@ RUN addgroup clickhouse \
&& chown root:clickhouse /var/log/clickhouse-server \
&& chmod 775 /var/log/clickhouse-server \
&& chmod +x /entrypoint.sh \
&& apk add --no-cache su-exec
&& apk add --no-cache su-exec bash
EXPOSE 9000 8123 9009

View File

@ -4,6 +4,7 @@ set -x
REPO_CHANNEL="${REPO_CHANNEL:-stable}" # lts / testing / prestable / etc
REPO_URL="${REPO_URL:-"https://repo.yandex.ru/clickhouse/tgz/${REPO_CHANNEL}"}"
VERSION="${VERSION:-20.9.3.45}"
DOCKER_IMAGE="${DOCKER_IMAGE:-yandex/clickhouse-server}"
# where original files live
DOCKER_BUILD_FOLDER="${BASH_SOURCE%/*}"
@ -11,12 +12,12 @@ DOCKER_BUILD_FOLDER="${BASH_SOURCE%/*}"
# we will create root for our image here
CONTAINER_ROOT_FOLDER="${DOCKER_BUILD_FOLDER}/alpine-root"
# where to put downloaded tgz
TGZ_PACKAGES_FOLDER="${CONTAINER_ROOT_FOLDER}/tgz-packages"
# clean up the root from old runs
# clean up the root from old runs, it's reconstructed each time
rm -rf "$CONTAINER_ROOT_FOLDER"
mkdir -p "$CONTAINER_ROOT_FOLDER"
# where to put downloaded tgz
TGZ_PACKAGES_FOLDER="${DOCKER_BUILD_FOLDER}/tgz-packages"
mkdir -p "$TGZ_PACKAGES_FOLDER"
PACKAGES=( "clickhouse-client" "clickhouse-server" "clickhouse-common-static" )
@ -24,7 +25,7 @@ PACKAGES=( "clickhouse-client" "clickhouse-server" "clickhouse-common-static" )
# download tars from the repo
for package in "${PACKAGES[@]}"
do
wget -q --show-progress "${REPO_URL}/${package}-${VERSION}.tgz" -O "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz"
wget -c -q --show-progress "${REPO_URL}/${package}-${VERSION}.tgz" -O "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz"
done
# unpack tars
@ -42,7 +43,7 @@ mkdir -p "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/users.d" \
"${CONTAINER_ROOT_FOLDER}/lib64"
cp "${DOCKER_BUILD_FOLDER}/docker_related_config.xml" "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/config.d/"
cp "${DOCKER_BUILD_FOLDER}/entrypoint.alpine.sh" "${CONTAINER_ROOT_FOLDER}/entrypoint.sh"
cp "${DOCKER_BUILD_FOLDER}/entrypoint.sh" "${CONTAINER_ROOT_FOLDER}/entrypoint.sh"
## get glibc components from ubuntu 20.04 and put them to expected place
docker pull ubuntu:20.04
@ -56,4 +57,5 @@ docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_dns.so.2 "${CONTAIN
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libresolv.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
docker cp -L "${ubuntu20image}":/lib64/ld-linux-x86-64.so.2 "${CONTAINER_ROOT_FOLDER}/lib64"
docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "yandex/clickhouse-server:${VERSION}-alpine" --pull
docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "${DOCKER_IMAGE}:${VERSION}-alpine" --pull
rm -rf "$CONTAINER_ROOT_FOLDER"

View File

@ -1,152 +0,0 @@
#!/bin/sh
#set -x
DO_CHOWN=1
if [ "$CLICKHOUSE_DO_NOT_CHOWN" = 1 ]; then
DO_CHOWN=0
fi
CLICKHOUSE_UID="${CLICKHOUSE_UID:-"$(id -u clickhouse)"}"
CLICKHOUSE_GID="${CLICKHOUSE_GID:-"$(id -g clickhouse)"}"
# support --user
if [ "$(id -u)" = "0" ]; then
USER=$CLICKHOUSE_UID
GROUP=$CLICKHOUSE_GID
# busybox has setuidgid & chpst buildin
gosu="su-exec $USER:$GROUP"
else
USER="$(id -u)"
GROUP="$(id -g)"
gosu=""
DO_CHOWN=0
fi
# set some vars
CLICKHOUSE_CONFIG="${CLICKHOUSE_CONFIG:-/etc/clickhouse-server/config.xml}"
# port is needed to check if clickhouse-server is ready for connections
HTTP_PORT="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=http_port)"
# get CH directories locations
DATA_DIR="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=path || true)"
TMP_DIR="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=tmp_path || true)"
USER_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=user_files_path || true)"
LOG_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=logger.log || true)"
LOG_DIR="$(dirname "${LOG_PATH}" || true)"
ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=logger.errorlog || true)"
ERROR_LOG_DIR="$(dirname "${ERROR_LOG_PATH}" || true)"
FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=format_schema_path || true)"
CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}"
CLICKHOUSE_DB="${CLICKHOUSE_DB:-}"
for dir in "$DATA_DIR" \
"$ERROR_LOG_DIR" \
"$LOG_DIR" \
"$TMP_DIR" \
"$USER_PATH" \
"$FORMAT_SCHEMA_PATH"
do
# check if variable not empty
[ -z "$dir" ] && continue
# ensure directories exist
if ! mkdir -p "$dir"; then
echo "Couldn't create necessary directory: $dir"
exit 1
fi
if [ "$DO_CHOWN" = "1" ]; then
# ensure proper directories permissions
chown -R "$USER:$GROUP" "$dir"
elif [ "$(stat -c %u "$dir")" != "$USER" ]; then
echo "Necessary directory '$dir' isn't owned by user with id '$USER'"
exit 1
fi
done
# if clickhouse user is defined - create it (user "default" already exists out of box)
if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CLICKHOUSE_PASSWORD" ]; then
echo "$0: create new user '$CLICKHOUSE_USER' instead 'default'"
cat <<EOT > /etc/clickhouse-server/users.d/default-user.xml
<yandex>
<!-- Docs: <https://clickhouse.tech/docs/en/operations/settings/settings_users/> -->
<users>
<!-- Remove default user -->
<default remove="remove">
</default>
<${CLICKHOUSE_USER}>
<profile>default</profile>
<networks>
<ip>::/0</ip>
</networks>
<password>${CLICKHOUSE_PASSWORD}</password>
<quota>default</quota>
</${CLICKHOUSE_USER}>
</users>
</yandex>
EOT
fi
if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
# Listen only on localhost until the initialization is done
$gosu /usr/bin/clickhouse-server --config-file="${CLICKHOUSE_CONFIG}" -- --listen_host=127.0.0.1 &
pid="$!"
# check if clickhouse is ready to accept connections
# will try to send ping clickhouse via http_port (max 6 retries, with 1 sec timeout and 1 sec delay between retries)
tries=6
while ! wget --spider -T 1 -q "http://localhost:$HTTP_PORT/ping" 2>/dev/null; do
if [ "$tries" -le "0" ]; then
echo >&2 'ClickHouse init process failed.'
exit 1
fi
tries=$(( tries-1 ))
sleep 1
done
if [ -n "$CLICKHOUSE_PASSWORD" ]; then
printf -v WITH_PASSWORD '%s %q' "--password" "$CLICKHOUSE_PASSWORD"
fi
clickhouseclient="clickhouse-client --multiquery -u $CLICKHOUSE_USER $WITH_PASSWORD "
# create default database, if defined
if [ -n "$CLICKHOUSE_DB" ]; then
echo "$0: create database '$CLICKHOUSE_DB'"
"$clickhouseclient" -q "CREATE DATABASE IF NOT EXISTS $CLICKHOUSE_DB";
fi
for f in /docker-entrypoint-initdb.d/*; do
case "$f" in
*.sh)
if [ -x "$f" ]; then
echo "$0: running $f"
"$f"
else
echo "$0: sourcing $f"
. "$f"
fi
;;
*.sql) echo "$0: running $f"; "$clickhouseclient" < "$f" ; echo ;;
*.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "$clickhouseclient"; echo ;;
*) echo "$0: ignoring $f" ;;
esac
echo
done
if ! kill -s TERM "$pid" || ! wait "$pid"; then
echo >&2 'Finishing of ClickHouse init process failed.'
exit 1
fi
fi
# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments
if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
exec $gosu /usr/bin/clickhouse-server --config-file="${CLICKHOUSE_CONFIG}" "$@"
fi
# Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image
exec "$@"

63
docker/server/entrypoint.sh Normal file → Executable file
View File

@ -1,7 +1,10 @@
#!/bin/bash
set -eo pipefail
shopt -s nullglob
DO_CHOWN=1
if [ "$CLICKHOUSE_DO_NOT_CHOWN" = 1 ]; then
if [ "${CLICKHOUSE_DO_NOT_CHOWN:-0}" = "1" ]; then
DO_CHOWN=0
fi
@ -9,10 +12,17 @@ CLICKHOUSE_UID="${CLICKHOUSE_UID:-"$(id -u clickhouse)"}"
CLICKHOUSE_GID="${CLICKHOUSE_GID:-"$(id -g clickhouse)"}"
# support --user
if [ x"$UID" == x0 ]; then
if [ "$(id -u)" = "0" ]; then
USER=$CLICKHOUSE_UID
GROUP=$CLICKHOUSE_GID
if command -v gosu &> /dev/null; then
gosu="gosu $USER:$GROUP"
elif command -v su-exec &> /dev/null; then
gosu="su-exec $USER:$GROUP"
else
echo "No gosu/su-exec detected!"
exit 1
fi
else
USER="$(id -u)"
GROUP="$(id -g)"
@ -23,18 +33,23 @@ fi
# set some vars
CLICKHOUSE_CONFIG="${CLICKHOUSE_CONFIG:-/etc/clickhouse-server/config.xml}"
if ! $gosu test -f "$CLICKHOUSE_CONFIG" -a -r "$CLICKHOUSE_CONFIG"; then
echo "Configuration file '$dir' isn't readable by user with id '$USER'"
exit 1
fi
# port is needed to check if clickhouse-server is ready for connections
HTTP_PORT="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=http_port)"
HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port)"
# get CH directories locations
DATA_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=path || true)"
TMP_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=tmp_path || true)"
USER_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=user_files_path || true)"
LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.log || true)"
LOG_DIR="$(dirname $LOG_PATH || true)"
ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.errorlog || true)"
ERROR_LOG_DIR="$(dirname $ERROR_LOG_PATH || true)"
FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=format_schema_path || true)"
DATA_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=path || true)"
TMP_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=tmp_path || true)"
USER_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=user_files_path || true)"
LOG_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=logger.log || true)"
LOG_DIR="$(dirname "$LOG_PATH" || true)"
ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=logger.errorlog || true)"
ERROR_LOG_DIR="$(dirname "$ERROR_LOG_PATH" || true)"
FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=format_schema_path || true)"
CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}"
@ -58,8 +73,8 @@ do
if [ "$DO_CHOWN" = "1" ]; then
# ensure proper directories permissions
chown -R "$USER:$GROUP" "$dir"
elif [ "$(stat -c %u "$dir")" != "$USER" ]; then
echo "Necessary directory '$dir' isn't owned by user with id '$USER'"
elif ! $gosu test -d "$dir" -a -w "$dir" -a -r "$dir"; then
echo "Necessary directory '$dir' isn't accessible by user with id '$USER'"
exit 1
fi
done
@ -90,21 +105,22 @@ fi
if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
# Listen only on localhost until the initialization is done
$gosu /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG -- --listen_host=127.0.0.1 &
$gosu /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" -- --listen_host=127.0.0.1 &
pid="$!"
# check if clickhouse is ready to accept connections
# will try to send ping clickhouse via http_port (max 12 retries by default, with 1 sec delay)
if ! wget --spider --quiet --prefer-family=IPv6 --tries="${CLICKHOUSE_INIT_TIMEOUT:-12}" --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then
# will try to send ping clickhouse via http_port (max 12 retries by default, with 1 sec timeout and 1 sec delay between retries)
tries=${CLICKHOUSE_INIT_TIMEOUT:-12}
while ! wget --spider -T 1 -q "http://127.0.0.1:$HTTP_PORT/ping" 2>/dev/null; do
if [ "$tries" -le "0" ]; then
echo >&2 'ClickHouse init process failed.'
exit 1
fi
tries=$(( tries-1 ))
sleep 1
done
if [ ! -z "$CLICKHOUSE_PASSWORD" ]; then
printf -v WITH_PASSWORD '%s %q' "--password" "$CLICKHOUSE_PASSWORD"
fi
clickhouseclient=( clickhouse-client --multiquery -u $CLICKHOUSE_USER $WITH_PASSWORD )
clickhouseclient=( clickhouse-client --multiquery -u "$CLICKHOUSE_USER" --password "$CLICKHOUSE_PASSWORD" )
echo
@ -122,10 +138,11 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
"$f"
else
echo "$0: sourcing $f"
# shellcheck source=/dev/null
. "$f"
fi
;;
*.sql) echo "$0: running $f"; cat "$f" | "${clickhouseclient[@]}" ; echo ;;
*.sql) echo "$0: running $f"; "${clickhouseclient[@]}" < "$f" ; echo ;;
*.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "${clickhouseclient[@]}"; echo ;;
*) echo "$0: ignoring $f" ;;
esac
@ -140,7 +157,7 @@ fi
# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments
if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
exec $gosu /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG "$@"
exec $gosu /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" "$@"
fi
# Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image

View File

@ -329,6 +329,7 @@ function run_tests
# nc - command not found
01601_proxy_protocol
01622_defaults_for_url_engine
)
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"

View File

@ -55,10 +55,10 @@ In this case, ClickHouse can reload the dictionary earlier if the dictionary con
When upgrading the dictionaries, the ClickHouse server applies different logic depending on the type of [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md):
- For a text file, it checks the time of modification. If the time differs from the previously recorded time, the dictionary is updated.
- For MyISAM tables, the time of modification is checked using a `SHOW TABLE STATUS` query.
- For MySQL source, the time of modification is checked using a `SHOW TABLE STATUS` query (in case of MySQL 8 you need to disable meta-information caching in MySQL by `set global information_schema_stats_expiry=0`.
- Dictionaries from other sources are updated every time by default.
For MySQL (InnoDB), ODBC and ClickHouse sources, you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps:
For other sources (ODBC, ClickHouse, etc), you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps:
- The dictionary table must have a field that always changes when the source data is updated.
- The settings of the source must specify a query that retrieves the changing field. The ClickHouse server interprets the query result as a row, and if this row has changed relative to its previous state, the dictionary is updated. Specify the query in the `<invalidate_query>` field in the settings for the [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md).

View File

@ -13,9 +13,7 @@ Basic query format:
INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
```
You can specify a list of columns to insert using the `(c1, c2, c3)` or `COLUMNS(c1,c2,c3)` syntax.
Instead of listing all the required columns you can use the `(* EXCEPT(column_list))` syntax.
You can specify a list of columns to insert using the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier).
For example, consider the table:
@ -23,9 +21,8 @@ For example, consider the table:
SHOW CREATE insert_select_testtable;
```
```
┌─statement────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE insert_select_testtable
```text
CREATE TABLE insert_select_testtable
(
`a` Int8,
`b` String,
@ -33,8 +30,7 @@ SHOW CREATE insert_select_testtable;
)
ENGINE = MergeTree()
ORDER BY a
SETTINGS index_granularity = 8192 │
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
SETTINGS index_granularity = 8192
```
``` sql

View File

@ -0,0 +1,21 @@
---
toc_title: ALL
---
# ALL Clause {#select-all}
`SELECT ALL` is identical to `SELECT` without `DISTINCT`.
- If `ALL` specified, ignore it.
- If both `ALL` and `DISTINCT` specified, exception will be thrown.
`ALL` can also be specified inside aggregate function with the same effect(noop), for instance:
```sql
SELECT sum(ALL number) FROM numbers(10);
```
equals to
```sql
SELECT sum(number) FROM numbers(10);
```

View File

@ -133,7 +133,7 @@ ClickHouse имеет сильную типизацию, поэтому нет
## Агрегатные функции {#aggregate-functions}
Агрегатные функции - это функции с состоянием (stateful). Они накапливают переданные значения в некотором состоянии и позволяют получать результаты из этого состояния. Работа с ними осуществляется с помощью интерфейса `IAggregateFunction`. Состояния могут быть как простыми (состояние для `AggregateFunctionCount` это всего лишь один человек `UInt64` значение) так и довольно сложными (состояние `AggregateFunctionUniqCombined` представляет собой комбинацию линейного массива, хэш-таблицы и вероятностной структуры данных `HyperLogLog`).
Агрегатные функции - это функции с состоянием (stateful). Они накапливают переданные значения в некотором состоянии и позволяют получать результаты из этого состояния. Работа с ними осуществляется с помощью интерфейса `IAggregateFunction`. Состояния могут быть как простыми (состояние для `AggregateFunctionCount` это всего лишь одна переменная типа `UInt64`) так и довольно сложными (состояние `AggregateFunctionUniqCombined` представляет собой комбинацию линейного массива, хэш-таблицы и вероятностной структуры данных `HyperLogLog`).
Состояния распределяются в `Arena` (пул памяти) для работы с несколькими состояниями при выполнении запроса `GROUP BY` высокой кардинальности (большим числом уникальных данных). Состояния могут иметь нетривиальный конструктор и деструктор: например, сложные агрегатные состояния могут сами аллоцировать дополнительную память. Потому к созданию и уничтожению состояний, правильной передаче владения и порядку уничтожения следует уделять больше внимание.

View File

@ -54,10 +54,10 @@ LIFETIME(MIN 300 MAX 360)
При обновлении словарей сервер ClickHouse применяет различную логику в зависимости от типа [источника](external-dicts-dict-sources.md):
> - У текстового файла проверяется время модификации. Если время изменилось по отношению к запомненному ранее, то словарь обновляется.
> - Для таблиц типа MyISAM, время модификации проверяется запросом `SHOW TABLE STATUS`.
> - Для MySQL источника, время модификации проверяется запросом `SHOW TABLE STATUS` (для MySQL 8 необходимо отключить кеширование мета-информации в MySQL `set global information_schema_stats_expiry=0`.
> - Словари из других источников по умолчанию обновляются каждый раз.
Для источников MySQL (InnoDB), ODBC и ClickHouse можно настроить запрос, который позволит обновлять словари только в случае их фактического изменения, а не каждый раз. Чтобы это сделать необходимо выполнить следующие условия/действия:
Для других источников (ODBC, ClickHouse и т.д.) можно настроить запрос, который позволит обновлять словари только в случае их фактического изменения, а не каждый раз. Чтобы это сделать необходимо выполнить следующие условия/действия:
> - В таблице словаря должно быть поле, которое гарантированно изменяется при обновлении данных в источнике.
> - В настройках источника указывается запрос, который получает изменяющееся поле. Результат запроса сервер ClickHouse интерпретирует как строку и если эта строка изменилась по отношению к предыдущему состоянию, то словарь обновляется. Запрос следует указывать в поле `<invalidate_query>` настроек [источника](external-dicts-dict-sources.md).

View File

@ -13,9 +13,7 @@ toc_title: INSERT INTO
INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
```
Вы можете указать список столбцов для вставки, используя следующий синтаксис: `(c1, c2, c3)` или `COLUMNS(c1,c2,c3)`.
Можно не перечислять все необходимые столбцы, а использовать синтаксис `(* EXCEPT(column_list))`.
Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как `APPLY`, `EXCEPT`, `REPLACE`.
В качестве примера рассмотрим таблицу:

View File

@ -29,7 +29,7 @@ SELECT 1 - 0.9
- 当一行行阅读浮点数的时候,浮点数的结果可能不是机器最近显示的数值。
## 和Inf {#data_type-float-nan-inf}
## NaN和Inf {#data_type-float-nan-inf}
与标准SQL相比ClickHouse 支持以下类别的浮点数:

View File

@ -112,7 +112,6 @@ class GroupArrayNumericImpl final
{
using Data = GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>;
static constexpr bool limit_num_elems = Trait::has_limit;
DataTypePtr & data_type;
UInt64 max_elems;
UInt64 seed;
@ -121,7 +120,6 @@ public:
const DataTypePtr & data_type_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max(), UInt64 seed_ = 123456)
: IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>(
{data_type_}, {})
, data_type(this->argument_types[0])
, max_elems(max_elems_)
, seed(seed_)
{
@ -129,7 +127,7 @@ public:
String getName() const override { return getNameByTrait<Trait>(); }
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(data_type); }
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(this->argument_types[0]); }
void insert(Data & a, const T & v, Arena * arena) const
{

View File

@ -419,7 +419,7 @@ public:
if (isSmall())
return small.find(x) != small.end();
else
return rb->contains(x);
return rb->contains(static_cast<Value>(x));
}
/**
@ -613,7 +613,7 @@ public:
/**
* Replace value
*/
void rb_replace(const UInt32 * from_vals, const UInt32 * to_vals, size_t num)
void rb_replace(const UInt64 * from_vals, const UInt64 * to_vals, size_t num)
{
if (isSmall())
toLarge();
@ -622,9 +622,9 @@ public:
{
if (from_vals[i] == to_vals[i])
continue;
bool changed = rb->removeChecked(from_vals[i]);
bool changed = rb->removeChecked(static_cast<Value>(from_vals[i]));
if (changed)
rb->add(to_vals[i]);
rb->add(static_cast<Value>(to_vals[i]));
}
}
};

View File

@ -56,7 +56,7 @@ public:
DataTypePtr getReturnType() const override
{
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeNumber<T>>());
return std::make_shared<DataTypeArray>(this->argument_types[0]);
}
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override

View File

@ -12,6 +12,10 @@
#include <random>
#include <cstdlib>
#ifdef MEMORY_TRACKER_DEBUG_CHECKS
thread_local bool _memory_tracker_always_throw_logical_error_on_allocation = false;
#endif
namespace
{
@ -165,6 +169,14 @@ void MemoryTracker::alloc(Int64 size)
}
}
#ifdef MEMORY_TRACKER_DEBUG_CHECKS
if (unlikely(_memory_tracker_always_throw_logical_error_on_allocation))
{
_memory_tracker_always_throw_logical_error_on_allocation = false;
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Memory tracker: allocations not allowed.");
}
#endif
std::bernoulli_distribution fault(fault_probability);
if (unlikely(fault_probability && fault(thread_local_rng)) && memoryTrackerCanThrow(level, true))
{

View File

@ -5,6 +5,28 @@
#include <Common/CurrentMetrics.h>
#include <Common/VariableContext.h>
#if !defined(NDEBUG)
#define MEMORY_TRACKER_DEBUG_CHECKS
#endif
/// DENY_ALLOCATIONS_IN_SCOPE macro makes MemoryTracker throw LOGICAL_ERROR on any allocation attempt
/// until the end of the scope. It's useful to ensure that no allocations happen in signal handlers and
/// outside of try/catch block of thread functions. ALLOW_ALLOCATIONS_IN_SCOPE cancels effect of
/// DENY_ALLOCATIONS_IN_SCOPE in the inner scope. In Release builds these macros do nothing.
#ifdef MEMORY_TRACKER_DEBUG_CHECKS
#include <ext/scope_guard.h>
extern thread_local bool _memory_tracker_always_throw_logical_error_on_allocation;
#define ALLOCATIONS_IN_SCOPE_IMPL_CONCAT(n, val) \
bool _allocations_flag_prev_val##n = _memory_tracker_always_throw_logical_error_on_allocation; \
_memory_tracker_always_throw_logical_error_on_allocation = val; \
SCOPE_EXIT({ _memory_tracker_always_throw_logical_error_on_allocation = _allocations_flag_prev_val##n; })
#define ALLOCATIONS_IN_SCOPE_IMPL(n, val) ALLOCATIONS_IN_SCOPE_IMPL_CONCAT(n, val)
#define DENY_ALLOCATIONS_IN_SCOPE ALLOCATIONS_IN_SCOPE_IMPL(__LINE__, true)
#define ALLOW_ALLOCATIONS_IN_SCOPE ALLOCATIONS_IN_SCOPE_IMPL(__LINE__, false)
#else
#define DENY_ALLOCATIONS_IN_SCOPE static_assert(true)
#define ALLOW_ALLOCATIONS_IN_SCOPE static_assert(true)
#endif
/** Tracks memory consumption.
* It throws an exception if amount of consumed memory become greater than certain limit.

View File

@ -181,6 +181,7 @@ QueryProfilerReal::QueryProfilerReal(const UInt64 thread_id, const UInt32 period
void QueryProfilerReal::signalHandler(int sig, siginfo_t * info, void * context)
{
DENY_ALLOCATIONS_IN_SCOPE;
writeTraceInfo(TraceType::Real, sig, info, context);
}
@ -190,6 +191,7 @@ QueryProfilerCpu::QueryProfilerCpu(const UInt64 thread_id, const UInt32 period)
void QueryProfilerCpu::signalHandler(int sig, siginfo_t * info, void * context)
{
DENY_ALLOCATIONS_IN_SCOPE;
writeTraceInfo(TraceType::CPU, sig, info, context);
}

View File

@ -197,6 +197,7 @@ static void injection(
void ThreadFuzzer::signalHandler(int)
{
DENY_ALLOCATIONS_IN_SCOPE;
auto saved_errno = errno;
auto & fuzzer = ThreadFuzzer::instance();

View File

@ -208,6 +208,7 @@ size_t ThreadPoolImpl<Thread>::active() const
template <typename Thread>
void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_it)
{
DENY_ALLOCATIONS_IN_SCOPE;
CurrentMetrics::Increment metric_all_threads(
std::is_same_v<Thread, std::thread> ? CurrentMetrics::GlobalThread : CurrentMetrics::LocalThread);
@ -223,7 +224,9 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
if (!jobs.empty())
{
job = std::move(jobs.top().job);
/// std::priority_queue does not provide interface for getting non-const reference to an element
/// to prevent us from modifying its priority. We have to use const_cast to force move semantics on JobWithPriority::job.
job = std::move(const_cast<Job &>(jobs.top().job));
jobs.pop();
}
else
@ -237,6 +240,7 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
{
try
{
ALLOW_ALLOCATIONS_IN_SCOPE;
CurrentMetrics::Increment metric_active_threads(
std::is_same_v<Thread, std::thread> ? CurrentMetrics::GlobalThreadActive : CurrentMetrics::LocalThreadActive);

View File

@ -371,8 +371,9 @@ class IColumn;
M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \
M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
M(Bool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \
M(Bool, optimize_respect_aliases, true, "If it is set to true, it will respect aliases in WHERE/GROUP BY/ORDER BY, that will help with partition pruning/secondary indexes/optimize_aggregation_in_order/optimize_read_in_order/optimize_trivial_count", 0) \
M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
M(Bool, optimize_move_functions_out_of_any, true, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \
M(Bool, optimize_move_functions_out_of_any, false, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \
M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \
M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \
M(Bool, optimize_duplicate_order_by_and_distinct, true, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \

View File

@ -6,6 +6,7 @@
#include <Columns/ColumnConst.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnsNumber.h>
#include <Interpreters/castColumn.h>
#include <DataTypes/DataTypeAggregateFunction.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypesNumber.h>
@ -14,6 +15,7 @@
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
// TODO include this last because of a broken roaring header. See the comment
// inside.
#include <AggregateFunctions/AggregateFunctionGroupBitmapData.h>
@ -282,18 +284,16 @@ public:
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
const auto * arg_type1 = typeid_cast<const DataTypeNumber<UInt32> *>(arguments[1].get());
if (!(arg_type1))
for (size_t i = 1; i < 3; ++i)
{
WhichDataType which(arguments[i].get());
if (!(which.isUInt8() || which.isUInt16() || which.isUInt32() || which.isUInt64()))
{
throw Exception(
"Second argument for function " + getName() + " must be UInt32 but it has type " + arguments[1]->getName() + ".",
"The second and third arguments for function " + getName() + " must be one of [UInt8, UInt16, UInt32, UInt64] but one of them has type " + arguments[1]->getName() + ".",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
const auto * arg_type2 = typeid_cast<const DataTypeNumber<UInt32> *>(arguments[1].get());
if (!(arg_type2))
throw Exception(
"Third argument for function " + getName() + " must be UInt32 but it has type " + arguments[2]->getName() + ".",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
}
return arguments[0];
}
@ -327,13 +327,23 @@ private:
bool is_column_const[3];
const ColumnAggregateFunction * col_agg_func;
const PaddedPODArray<AggregateDataPtr> * container0;
const PaddedPODArray<UInt32> * container1, * container2;
const PaddedPODArray<UInt64> * container1, * container2;
ColumnPtr column_holder[2];
for (size_t i = 0; i < 3; ++i)
{
if (i > 0)
{
column_holder[i - 1] = castColumn(arguments[i], std::make_shared<DataTypeUInt64>());
column_ptrs[i] = column_holder[i-1].get();
}
else
{
column_ptrs[i] = arguments[i].column.get();
}
is_column_const[i] = isColumnConst(*column_ptrs[i]);
}
if (is_column_const[0])
col_agg_func = typeid_cast<const ColumnAggregateFunction*>(typeid_cast<const ColumnConst*>(column_ptrs[0])->getDataColumnPtr().get());
else
@ -341,13 +351,13 @@ private:
container0 = &col_agg_func->getData();
if (is_column_const[1])
container1 = &typeid_cast<const ColumnUInt32*>(typeid_cast<const ColumnConst*>(column_ptrs[1])->getDataColumnPtr().get())->getData();
container1 = &typeid_cast<const ColumnUInt64*>(typeid_cast<const ColumnConst*>(column_ptrs[1])->getDataColumnPtr().get())->getData();
else
container1 = &typeid_cast<const ColumnUInt32*>(column_ptrs[1])->getData();
container1 = &typeid_cast<const ColumnUInt64*>(column_ptrs[1])->getData();
if (is_column_const[2])
container2 = &typeid_cast<const ColumnUInt32*>(typeid_cast<const ColumnConst*>(column_ptrs[2])->getDataColumnPtr().get())->getData();
container2 = &typeid_cast<const ColumnUInt64*>(typeid_cast<const ColumnConst*>(column_ptrs[2])->getDataColumnPtr().get())->getData();
else
container2 = &typeid_cast<const ColumnUInt32*>(column_ptrs[2])->getData();
container2 = &typeid_cast<const ColumnUInt64*>(column_ptrs[2])->getData();
auto col_to = ColumnAggregateFunction::create(col_agg_func->getAggregateFunction());
col_to->reserve(input_rows_count);
@ -357,8 +367,8 @@ private:
const AggregateDataPtr data_ptr_0 = is_column_const[0] ? (*container0)[0] : (*container0)[i];
const AggregateFunctionGroupBitmapData<T> & bitmap_data_0
= *reinterpret_cast<const AggregateFunctionGroupBitmapData<T>*>(data_ptr_0);
const UInt32 range_start = is_column_const[1] ? (*container1)[0] : (*container1)[i];
const UInt32 range_end = is_column_const[2] ? (*container2)[0] : (*container2)[i];
const UInt64 range_start = is_column_const[1] ? (*container1)[0] : (*container1)[i];
const UInt64 range_end = is_column_const[2] ? (*container2)[0] : (*container2)[i];
col_to->insertDefault();
AggregateFunctionGroupBitmapData<T> & bitmap_data_2
@ -374,7 +384,7 @@ struct BitmapSubsetInRangeImpl
public:
static constexpr auto name = "bitmapSubsetInRange";
template <typename T>
static void apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_0, UInt32 range_start, UInt32 range_end, AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
static void apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_0, UInt64 range_start, UInt64 range_end, AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
{
bitmap_data_0.rbs.rb_range(range_start, range_end, bitmap_data_2.rbs);
}
@ -385,7 +395,7 @@ struct BitmapSubsetLimitImpl
public:
static constexpr auto name = "bitmapSubsetLimit";
template <typename T>
static void apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_0, UInt32 range_start, UInt32 range_end, AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
static void apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_0, UInt64 range_start, UInt64 range_end, AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
{
bitmap_data_0.rbs.rb_limit(range_start, range_end, bitmap_data_2.rbs);
}
@ -418,14 +428,14 @@ public:
for (size_t i = 0; i < 2; ++i)
{
const auto * array_type = typeid_cast<const DataTypeArray *>(arguments[i + 1].get());
String msg(i == 0 ? "Second" : "Third");
msg += " argument for function " + getName() + " must be an UInt32 array but it has type " + arguments[i + 1]->getName() + ".";
String msg = "The second and third arguments for function " + getName() + " must be an one of [Array(UInt8), Array(UInt16), Array(UInt32), Array(UInt64)] but one of them has type " + arguments[i + 1]->getName() + ".";
if (!array_type)
throw Exception(msg, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
auto nested_type = array_type->getNestedType();
WhichDataType which(nested_type);
if (!which.isUInt32())
if (!(which.isUInt8() || which.isUInt16() || which.isUInt32() || which.isUInt64()))
throw Exception(msg, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
return arguments[0];
@ -461,13 +471,26 @@ private:
bool is_column_const[3];
const ColumnAggregateFunction * col_agg_func;
const PaddedPODArray<AggregateDataPtr> * container0;
const ColumnArray * array;
const ColumnArray * array1;
const ColumnArray * array2;
ColumnPtr column_holder[2];
for (size_t i = 0; i < 3; ++i)
{
if (i > 0)
{
auto array_type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());
column_holder[i - 1] = castColumn(arguments[i], array_type);
column_ptrs[i] = column_holder[i-1].get();
}
else
{
column_ptrs[i] = arguments[i].column.get();
}
is_column_const[i] = isColumnConst(*column_ptrs[i]);
}
if (is_column_const[0])
{
col_agg_func = typeid_cast<const ColumnAggregateFunction*>(typeid_cast<const ColumnConst*>(column_ptrs[0])->getDataColumnPtr().get());
@ -479,21 +502,20 @@ private:
container0 = &col_agg_func->getData();
if (is_column_const[1])
array = typeid_cast<const ColumnArray*>(typeid_cast<const ColumnConst*>(column_ptrs[1])->getDataColumnPtr().get());
array1 = typeid_cast<const ColumnArray*>(typeid_cast<const ColumnConst*>(column_ptrs[1])->getDataColumnPtr().get());
else
{
array = typeid_cast<const ColumnArray *>(arguments[1].column.get());
}
const ColumnArray::Offsets & from_offsets = array->getOffsets();
const ColumnVector<UInt32>::Container & from_container = typeid_cast<const ColumnVector<UInt32> *>(&array->getData())->getData();
array1 = typeid_cast<const ColumnArray *>(column_ptrs[1]);
const ColumnArray::Offsets & from_offsets = array1->getOffsets();
const ColumnVector<UInt64>::Container & from_container = typeid_cast<const ColumnVector<UInt64> *>(&array1->getData())->getData();
if (is_column_const[2])
array = typeid_cast<const ColumnArray*>(typeid_cast<const ColumnConst*>(column_ptrs[2])->getDataColumnPtr().get());
array2 = typeid_cast<const ColumnArray*>(typeid_cast<const ColumnConst*>(column_ptrs[2])->getDataColumnPtr().get());
else
array = typeid_cast<const ColumnArray *>(arguments[2].column.get());
array2 = typeid_cast<const ColumnArray *>(column_ptrs[2]);
const ColumnArray::Offsets & to_offsets = array->getOffsets();
const ColumnVector<UInt32>::Container & to_container = typeid_cast<const ColumnVector<UInt32> *>(&array->getData())->getData();
const ColumnArray::Offsets & to_offsets = array2->getOffsets();
const ColumnVector<UInt64>::Container & to_container = typeid_cast<const ColumnVector<UInt64> *>(&array2->getData())->getData();
auto col_to = ColumnAggregateFunction::create(col_agg_func->getAggregateFunction());
col_to->reserve(input_rows_count);
@ -526,6 +548,7 @@ private:
to_start = i == 0 ? 0 : to_offsets[i - 1];
to_end = to_offsets[i];
}
if (from_end - from_start != to_end - to_start)
throw Exception("From array size and to array size mismatch", ErrorCodes::LOGICAL_ERROR);
@ -724,10 +747,11 @@ public:
throw Exception(
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
const auto * arg_type1 = typeid_cast<const DataTypeNumber<UInt32> *>(arguments[1].get());
if (!(arg_type1))
WhichDataType which(arguments[1].get());
if (!(which.isUInt8() || which.isUInt16() || which.isUInt32() || which.isUInt64()))
throw Exception(
"Second argument for function " + getName() + " must be UInt32 but it has type " + arguments[1]->getName() + ".",
"Second argument for function " + getName() + " must be one of [UInt8, UInt16, UInt32, UInt64] but it has type " + arguments[1]->getName() + ".",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<DataTypeNumber<UInt8>>();
@ -765,27 +789,32 @@ private:
{
const IColumn * column_ptrs[2];
bool is_column_const[2];
const PaddedPODArray<AggregateDataPtr> * container0;
const PaddedPODArray<UInt32> * container1;
for (size_t i = 0; i < 2; ++i)
{
column_ptrs[i] = arguments[i].column.get();
is_column_const[i] = isColumnConst(*column_ptrs[i]);
}
const PaddedPODArray<AggregateDataPtr> * container0;
const PaddedPODArray<UInt64> * container1;
column_ptrs[0] = arguments[0].column.get();
is_column_const[0] = isColumnConst(*column_ptrs[0]);
if (is_column_const[0])
container0 = &typeid_cast<const ColumnAggregateFunction*>(typeid_cast<const ColumnConst*>(column_ptrs[0])->getDataColumnPtr().get())->getData();
else
container0 = &typeid_cast<const ColumnAggregateFunction*>(column_ptrs[0])->getData();
// we can always cast the second column to ColumnUInt64
auto uint64_column = castColumn(arguments[1], std::make_shared<DataTypeUInt64>());
column_ptrs[1] = uint64_column.get();
is_column_const[1] = isColumnConst(*column_ptrs[1]);
if (is_column_const[1])
container1 = &typeid_cast<const ColumnUInt32*>(typeid_cast<const ColumnConst*>(column_ptrs[1])->getDataColumnPtr().get())->getData();
container1 = &typeid_cast<const ColumnUInt64*>(typeid_cast<const ColumnConst*>(column_ptrs[1])->getDataColumnPtr().get())->getData();
else
container1 = &typeid_cast<const ColumnUInt32*>(column_ptrs[1])->getData();
container1 = &typeid_cast<const ColumnUInt64*>(column_ptrs[1])->getData();
for (size_t i = 0; i < input_rows_count; ++i)
{
const AggregateDataPtr data_ptr_0 = is_column_const[0] ? (*container0)[0] : (*container0)[i];
const UInt32 data1 = is_column_const[1] ? (*container1)[0] : (*container1)[i];
const UInt64 data1 = is_column_const[1] ? (*container1)[0] : (*container1)[i];
const AggregateFunctionGroupBitmapData<T> & bitmap_data_0
= *reinterpret_cast<const AggregateFunctionGroupBitmapData<T> *>(data_ptr_0);
vec_to[i] = bitmap_data_0.rbs.rb_contains(data1);

View File

@ -30,6 +30,10 @@ public:
bool useDefaultImplementationForNulls() const override { return false; }
/// We should never return LowCardinality result, cause we declare that result is always constant zero.
/// (in getResultIfAlwaysReturnsConstantAndHasArguments)
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
String getName() const override
{
return name;

View File

@ -62,7 +62,11 @@ template <typename T> WriteBuffer & operator<< (QuoteManipWriteBuffer buf,
template <typename T> WriteBuffer & operator<< (DoubleQuoteManipWriteBuffer buf, const T & x) { writeDoubleQuoted(x, buf.get()); return buf; }
template <typename T> WriteBuffer & operator<< (BinaryManipWriteBuffer buf, const T & x) { writeBinary(x, buf.get()); return buf; }
inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, const char * x) { writeAnyEscapedString<'\''>(x, x + strlen(x), buf.get()); return buf; }
inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, const String & x) { writeEscapedString(x, buf); return buf; }
inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, const std::string_view & x) { writeEscapedString(x, buf); return buf; }
inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, const StringRef & x) { writeEscapedString(x, buf); return buf; }
inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, const char * x) { writeEscapedString(x, strlen(x), buf); return buf; }
inline WriteBuffer & operator<< (QuoteManipWriteBuffer buf, const char * x) { writeAnyQuotedString<'\''>(x, x + strlen(x), buf.get()); return buf; }
inline WriteBuffer & operator<< (DoubleQuoteManipWriteBuffer buf, const char * x) { writeAnyQuotedString<'"'>(x, x + strlen(x), buf.get()); return buf; }
inline WriteBuffer & operator<< (BinaryManipWriteBuffer buf, const char * x) { writeStringBinary(x, buf.get()); return buf; }

View File

@ -483,6 +483,10 @@ inline void writeEscapedString(const StringRef & ref, WriteBuffer & buf)
writeEscapedString(ref.data, ref.size, buf);
}
inline void writeEscapedString(const std::string_view & ref, WriteBuffer & buf)
{
writeEscapedString(ref.data(), ref.size(), buf);
}
template <char quote_character>
void writeAnyQuotedString(const char * begin, const char * end, WriteBuffer & buf)
@ -512,17 +516,31 @@ inline void writeQuotedString(const String & s, WriteBuffer & buf)
writeAnyQuotedString<'\''>(s, buf);
}
inline void writeQuotedString(const StringRef & ref, WriteBuffer & buf)
{
writeAnyQuotedString<'\''>(ref, buf);
}
inline void writeQuotedString(const std::string_view & ref, WriteBuffer & buf)
{
writeAnyQuotedString<'\''>(ref.data(), ref.data() + ref.size(), buf);
}
inline void writeDoubleQuotedString(const String & s, WriteBuffer & buf)
{
writeAnyQuotedString<'"'>(s, buf);
}
inline void writeDoubleQuotedString(const StringRef & s, WriteBuffer & buf)
{
writeAnyQuotedString<'"'>(s, buf);
}
inline void writeDoubleQuotedString(const std::string_view & s, WriteBuffer & buf)
{
writeAnyQuotedString<'"'>(s.data(), s.data() + s.size(), buf);
}
/// Outputs a string in backquotes.
inline void writeBackQuotedString(const StringRef & s, WriteBuffer & buf)
{
@ -901,6 +919,7 @@ writeBinary(const T & x, WriteBuffer & buf) { writePODBinary(x, buf); }
inline void writeBinary(const String & x, WriteBuffer & buf) { writeStringBinary(x, buf); }
inline void writeBinary(const StringRef & x, WriteBuffer & buf) { writeStringBinary(x, buf); }
inline void writeBinary(const std::string_view & x, WriteBuffer & buf) { writeStringBinary(x, buf); }
inline void writeBinary(const Int128 & x, WriteBuffer & buf) { writePODBinary(x, buf); }
inline void writeBinary(const UInt128 & x, WriteBuffer & buf) { writePODBinary(x, buf); }
inline void writeBinary(const DummyUInt256 & x, WriteBuffer & buf) { writePODBinary(x, buf); }
@ -1001,6 +1020,10 @@ writeQuoted(const T & x, WriteBuffer & buf) { writeText(x, buf); }
inline void writeQuoted(const String & x, WriteBuffer & buf) { writeQuotedString(x, buf); }
inline void writeQuoted(const std::string_view & x, WriteBuffer & buf) { writeQuotedString(x, buf); }
inline void writeQuoted(const StringRef & x, WriteBuffer & buf) { writeQuotedString(x, buf); }
inline void writeQuoted(const LocalDate & x, WriteBuffer & buf)
{
writeChar('\'', buf);
@ -1043,6 +1066,10 @@ writeDoubleQuoted(const T & x, WriteBuffer & buf) { writeText(x, buf); }
inline void writeDoubleQuoted(const String & x, WriteBuffer & buf) { writeDoubleQuotedString(x, buf); }
inline void writeDoubleQuoted(const std::string_view & x, WriteBuffer & buf) { writeDoubleQuotedString(x, buf); }
inline void writeDoubleQuoted(const StringRef & x, WriteBuffer & buf) { writeDoubleQuotedString(x, buf); }
inline void writeDoubleQuoted(const LocalDate & x, WriteBuffer & buf)
{
writeChar('"', buf);

View File

@ -0,0 +1,82 @@
#include <gtest/gtest.h>
#include <string>
#include <type_traits>
#include <common/StringRef.h>
#include <IO/Operators.h>
#include <IO/WriteHelpers.h>
#include <IO/WriteBufferFromString.h>
using namespace DB;
template <typename T, typename U>
void checkString(const T & str, U manip, const std::string & expected)
{
WriteBufferFromOwnString buf;
buf << manip << str;
EXPECT_EQ(expected, buf.str()) << "str type:" << typeid(str).name();
}
TEST(OperatorsManipTest, EscapingTest)
{
checkString("Hello 'world'", escape, "Hello \\'world\\'");
checkString("Hello \\world\\", escape, "Hello \\\\world\\\\"); // NOLINT
std::string s1 = "Hello 'world'";
checkString(s1, escape, "Hello \\'world\\'");
std::string s2 = "Hello \\world\\";
checkString(s2, escape, "Hello \\\\world\\\\"); // NOLINT
std::string_view sv1 = s1;
checkString(sv1, escape, "Hello \\'world\\'");
std::string_view sv2 = s2;
checkString(sv2, escape, "Hello \\\\world\\\\"); // NOLINT
StringRef sr1 = s1;
checkString(sr1, escape, "Hello \\'world\\'");
StringRef sr2 = s2;
checkString(sr2, escape, "Hello \\\\world\\\\"); // NOLINT
}
TEST(OperatorsManipTest, QuouteTest)
{
checkString("Hello 'world'", quote, "'Hello \\'world\\''");
std::string s1 = "Hello 'world'";
checkString(s1, quote, "'Hello \\'world\\''");
std::string_view sv1 = s1;
checkString(sv1, quote, "'Hello \\'world\\''");
StringRef sr1 = s1;
checkString(sr1, quote, "'Hello \\'world\\''");
}
TEST(OperatorsManipTest, DoubleQuouteTest)
{
checkString("Hello 'world'", double_quote, "\"Hello 'world'\"");
std::string s1 = "Hello 'world'";
checkString(s1, double_quote, "\"Hello 'world'\"");
std::string_view sv1 = s1;
checkString(sv1, double_quote, "\"Hello 'world'\"");
StringRef sr1 = s1;
checkString(sr1, double_quote, "\"Hello 'world'\"");
}
TEST(OperatorsManipTest, binary)
{
checkString("Hello", binary, "\x5Hello");
std::string s1 = "Hello";
checkString(s1, binary, "\x5Hello");
std::string_view sv1 = s1;
checkString(sv1, binary, "\x5Hello");
StringRef sr1 = s1;
checkString(sr1, binary, "\x5Hello");
}

View File

@ -80,7 +80,7 @@ public:
};
/// Index is used to:
/// * find Node buy it's result_name
/// * find Node by it's result_name
/// * specify order of columns in result
/// It represents a set of available columns.
/// Removing of column from index is equivalent to removing of column from final result.

View File

@ -0,0 +1,100 @@
#include <Interpreters/ColumnAliasesVisitor.h>
#include <Interpreters/IdentifierSemantic.h>
#include <Interpreters/RequiredSourceColumnsVisitor.h>
#include <Interpreters/addTypeConversionToAST.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTAlterQuery.h>
#include <Parsers/ASTInsertQuery.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/queryToString.h>
namespace DB
{
bool ColumnAliasesMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &)
{
if (const auto * f = node->as<ASTFunction>())
{
/// "lambda" visits children itself.
if (f->name == "lambda")
return false;
}
return !(node->as<ASTTableExpression>()
|| node->as<ASTSubquery>()
|| node->as<ASTArrayJoin>()
|| node->as<ASTSelectQuery>()
|| node->as<ASTSelectWithUnionQuery>());
}
void ColumnAliasesMatcher::visit(ASTPtr & ast, Data & data)
{
// If it's select query, only replace filters.
if (auto * query = ast->as<ASTSelectQuery>())
{
if (query->where())
Visitor(data).visit(query->refWhere());
if (query->prewhere())
Visitor(data).visit(query->refPrewhere());
return;
}
if (auto * node = ast->as<ASTFunction>())
{
visit(*node, ast, data);
return;
}
if (auto * node = ast->as<ASTIdentifier>())
{
visit(*node, ast, data);
return;
}
}
void ColumnAliasesMatcher::visit(ASTFunction & node, ASTPtr & /*ast*/, Data & data)
{
/// Do not add formal parameters of the lambda expression
if (node.name == "lambda")
{
Names local_aliases;
auto names_from_lambda = RequiredSourceColumnsMatcher::extractNamesFromLambda(node);
for (const auto & name : names_from_lambda)
{
if (data.private_aliases.insert(name).second)
{
local_aliases.push_back(name);
}
}
/// visit child with masked local aliases
Visitor(data).visit(node.arguments->children[1]);
for (const auto & name : local_aliases)
data.private_aliases.erase(name);
}
}
void ColumnAliasesMatcher::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
{
if (auto column_name = IdentifierSemantic::getColumnName(node))
{
if (data.forbidden_columns.count(*column_name) || data.private_aliases.count(*column_name) || !data.columns.has(*column_name))
return;
const auto & col = data.columns.get(*column_name);
if (col.default_desc.kind == ColumnDefaultKind::Alias)
{
ast = addTypeConversionToAST(col.default_desc.expression->clone(), col.type->getName(), data.columns.getAll(), data.context);
auto str = queryToString(ast);
// revisit ast to track recursive alias columns
Visitor(data).visit(ast);
}
}
}
}

View File

@ -0,0 +1,81 @@
#pragma once
#include <Core/Names.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Storages/ColumnsDescription.h>
namespace DB
{
class IAST;
using ASTPtr = std::shared_ptr<IAST>;
class IDataType;
class ASTFunction;
class ASTIdentifier;
using DataTypePtr = std::shared_ptr<const IDataType>;
/// Visits AST node to rewrite alias columns in query
/// Currently works only 3 kind ways below
/// For example:
// CREATE TABLE test_table
// (
// `timestamp` DateTime,
// `value` UInt64,
// `day` Date ALIAS toDate(timestamp),
// `day1` Date ALIAS day + 1,
// `day2` Date ALIAS day1 + 1,
// `time` DateTime ALIAS timestamp
// )ENGINE = MergeTree
// PARTITION BY toYYYYMMDD(timestamp)
// ORDER BY timestamp SETTINGS index_granularity = 1;
/// 1. Rewrite the filters in query when enable optimize_respect_aliases
/// this could help with `optimize_trivial_count`, Partition Prune in `KeyCondition` and secondary indexes.
/// eg: select max(value) from test_table where day2 = today(), filters will be: ((toDate(timestamp) + 1) + 1) = today() .
/// 2. Alias on alias for `required_columns` extracted in `InterpreterSelectQuery.cpp`, it could help get all dependent physical columns for query.
/// eg: select day2 from test_table. `required_columns` can got require columns from the temporary rewritten AST `((toDate(timestamp) + 1) + 1)`.
/// 3. Help with `optimize_aggregation_in_order` and `optimize_read_in_order` in `ReadInOrderOptimizer.cpp`:
/// For queries with alias columns in `orderBy` and `groupBy`, these ASTs will not change.
/// But we generate temporary asts and generate temporary Actions to get the `InputOrderInfo`
/// eg: select day1 from test_table order by day1;
class ColumnAliasesMatcher
{
public:
using Visitor = InDepthNodeVisitor<ColumnAliasesMatcher, false>;
struct Data
{
const ColumnsDescription & columns;
/// forbidden_columns are from array join, we can't rewrite alias columns involved in array join.
/// Do not analyze joined columns.
/// They may have aliases and come to description as is.
const NameSet & forbidden_columns;
const Context & context;
/// private_aliases are from lambda, so these are local names.
NameSet private_aliases;
Data(const ColumnsDescription & columns_, const NameSet & forbidden_columns_, const Context & context_)
: columns(columns_)
, forbidden_columns(forbidden_columns_)
, context(context_)
{}
};
static void visit(ASTPtr & ast, Data & data);
static bool needChildVisit(const ASTPtr & node, const ASTPtr & child);
private:
static void visit(ASTIdentifier & node, ASTPtr & ast, Data & data);
static void visit(ASTFunction & node, ASTPtr & ast, Data & data);
};
using ColumnAliasesVisitor = ColumnAliasesMatcher::Visitor;
}

View File

@ -62,7 +62,7 @@ public:
using Actions = std::vector<Action>;
/// This map helps to find input position bu it's name.
/// This map helps to find input position by it's name.
/// Key is a view to input::result_name.
/// Result is a list because it is allowed for inputs to have same names.
using NameToInputMap = std::unordered_map<std::string_view, std::list<size_t>>;

View File

@ -53,6 +53,13 @@ BlockInputStreamPtr InterpreterExistsQuery::executeImpl()
result = DatabaseCatalog::instance().isTableExist({database, exists_query->table}, context);
}
}
else if ((exists_query = query_ptr->as<ASTExistsViewQuery>()))
{
String database = context.resolveDatabase(exists_query->database);
context.checkAccess(AccessType::SHOW_TABLES, database, exists_query->table);
auto tbl = DatabaseCatalog::instance().tryGetTable({database, exists_query->table}, context);
result = tbl != nullptr && tbl->isView();
}
else if ((exists_query = query_ptr->as<ASTExistsDatabaseQuery>()))
{
String database = context.resolveDatabase(exists_query->database);

View File

@ -156,6 +156,10 @@ std::unique_ptr<IInterpreter> InterpreterFactory::get(ASTPtr & query, Context &
{
return std::make_unique<InterpreterExistsQuery>(query, context);
}
else if (query->as<ASTExistsViewQuery>())
{
return std::make_unique<InterpreterExistsQuery>(query, context);
}
else if (query->as<ASTExistsDictionaryQuery>())
{
return std::make_unique<InterpreterExistsQuery>(query, context);

View File

@ -106,7 +106,7 @@ Block InterpreterInsertQuery::getSampleBlock(
/// The table does not have a column with that name
if (!table_sample.has(current_name))
throw Exception("No such column " + current_name + " in table " + query.table_id.getNameForLogs(),
throw Exception("No such column " + current_name + " in table " + table->getStorageID().getNameForLogs(),
ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
if (!allow_materialized && !table_sample_non_materialized.has(current_name))

View File

@ -33,6 +33,7 @@
#include <Interpreters/JoinedTables.h>
#include <Interpreters/OpenTelemetrySpanLog.h>
#include <Interpreters/QueryAliasesVisitor.h>
#include <Interpreters/replaceAliasColumnsInQuery.h>
#include <Processors/Pipe.h>
#include <Processors/QueryPlan/AddingDelayedSourceStep.h>
@ -1223,6 +1224,7 @@ void InterpreterSelectQuery::executeFetchColumns(
temp_query_info.query = query_ptr;
temp_query_info.syntax_analyzer_result = syntax_analyzer_result;
temp_query_info.sets = query_analyzer->getPreparedSets();
num_rows = storage->totalRowsByPartitionPredicate(temp_query_info, *context);
}
if (num_rows)
@ -1329,9 +1331,12 @@ void InterpreterSelectQuery::executeFetchColumns(
if (is_alias)
{
auto column_decl = storage_columns.get(column);
/// TODO: can make CAST only if the type is different (but requires SyntaxAnalyzer).
auto cast_column_default = addTypeConversionToAST(column_default->expression->clone(), column_decl.type->getName());
column_expr = setAlias(cast_column_default->clone(), column);
column_expr = column_default->expression->clone();
// recursive visit for alias to alias
replaceAliasColumnsInQuery(column_expr, metadata_snapshot->getColumns(), syntax_analyzer_result->getArrayJoinSourceNameSet(), *context);
column_expr = addTypeConversionToAST(std::move(column_expr), column_decl.type->getName(), metadata_snapshot->getColumns().getAll(), *context);
column_expr = setAlias(column_expr, column);
}
else
column_expr = std::make_shared<ASTIdentifier>(column);
@ -1543,7 +1548,7 @@ void InterpreterSelectQuery::executeFetchColumns(
getSortDescriptionFromGroupBy(query),
query_info.syntax_analyzer_result);
query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot);
query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, *context);
}
StreamLocalLimits limits;

View File

@ -78,6 +78,9 @@ void RewriteAnyFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast, Da
auto & func_arguments = func.arguments->children;
if (func_arguments.size() != 1)
return;
const auto * first_arg_func = func_arguments[0]->as<ASTFunction>();
if (!first_arg_func || first_arg_func->arguments->children.empty())
return;

View File

@ -230,16 +230,8 @@ void TableJoin::addJoinedColumn(const NameAndTypePair & joined_column)
void TableJoin::addJoinedColumnsAndCorrectNullability(ColumnsWithTypeAndName & columns) const
{
for (auto & col : columns)
{
/// Materialize column.
/// Column is not empty if it is constant, but after Join all constants will be materialized.
/// So, we need remove constants from header.
if (col.column)
col.column = nullptr;
if (leftBecomeNullable(col.type))
col.type = makeNullable(col.type);
}
for (const auto & col : columns_added_by_join)
{

View File

@ -18,6 +18,7 @@
#include <Interpreters/ExpressionActions.h> /// getSmallestColumn()
#include <Interpreters/getTableExpressions.h>
#include <Interpreters/TreeOptimizer.h>
#include <Interpreters/replaceAliasColumnsInQuery.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
@ -427,6 +428,7 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & sele
}
}
std::vector<const ASTFunction *> getAggregates(ASTPtr & query, const ASTSelectQuery & select_query)
{
/// There can not be aggregate functions inside the WHERE and PREWHERE.
@ -721,6 +723,13 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
required_source_columns.swap(source_columns);
}
NameSet TreeRewriterResult::getArrayJoinSourceNameSet() const
{
NameSet forbidden_columns;
for (const auto & elem : array_join_result_to_source)
forbidden_columns.insert(elem.first);
return forbidden_columns;
}
TreeRewriterResultPtr TreeRewriter::analyzeSelect(
ASTPtr & query,
@ -784,6 +793,12 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
result.analyzed_join->table_join);
collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases);
/// rewrite filters for select query, must go after getArrayJoinedColumns
if (settings.optimize_respect_aliases && result.metadata_snapshot)
{
replaceAliasColumnsInQuery(query, result.metadata_snapshot->getColumns(), result.getArrayJoinSourceNameSet(), context);
}
result.aggregates = getAggregates(query, *select_query);
result.window_function_asts = getWindowFunctions(query, *select_query);
result.collectUsedColumns(query, true);

View File

@ -70,6 +70,7 @@ struct TreeRewriterResult
void collectSourceColumns(bool add_special);
void collectUsedColumns(const ASTPtr & query, bool is_select);
Names requiredSourceColumns() const { return required_source_columns.getNames(); }
NameSet getArrayJoinSourceNameSet() const;
const Scalars & getScalars() const { return scalars; }
};

View File

@ -4,11 +4,20 @@
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTWithAlias.h>
#include <Storages/ColumnsDescription.h>
#include <Interpreters/Context.h>
#include <Interpreters/TreeRewriter.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/ExpressionActions.h>
namespace DB
{
namespace ErrorCodes
{
extern const int THERE_IS_NO_DEFAULT_VALUE;
}
ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name)
{
auto func = makeASTFunction("cast", ast, std::make_shared<ASTLiteral>(type_name));
@ -23,4 +32,23 @@ ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name)
return func;
}
ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name, const NamesAndTypesList & all_columns, const Context & context)
{
auto syntax_analyzer_result = TreeRewriter(context).analyze(ast, all_columns);
const auto actions = ExpressionAnalyzer(ast, syntax_analyzer_result, context).getActions(true);
for (const auto & action : actions->getActions())
if (action.node->type == ActionsDAG::ActionType::ARRAY_JOIN)
throw Exception("Unsupported default value that requires ARRAY JOIN action", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE);
auto block = actions->getSampleBlock();
auto desc_type = block.getByName(ast->getColumnName()).type;
if (desc_type->getName() != type_name)
return addTypeConversionToAST(std::move(ast), type_name);
return std::move(ast);
}
}

View File

@ -6,8 +6,12 @@
namespace DB
{
class Context;
class NamesAndTypesList;
/// It will produce an expression with CAST to get an AST with the required type.
ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name);
// If same type, then ignore the wrapper of CAST function
ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name, const NamesAndTypesList & all_columns, const Context & context);
}

View File

@ -0,0 +1,16 @@
#include <Interpreters/replaceAliasColumnsInQuery.h>
#include <Interpreters/ColumnAliasesVisitor.h>
#include <Storages/ColumnsDescription.h>
#include <Parsers/ASTSelectQuery.h>
namespace DB
{
void replaceAliasColumnsInQuery(ASTPtr & ast, const ColumnsDescription & columns, const NameSet & forbidden_columns, const Context & context)
{
ColumnAliasesVisitor::Data aliase_column_data(columns, forbidden_columns, context);
ColumnAliasesVisitor aliase_column_visitor(aliase_column_data);
aliase_column_visitor.visit(ast);
}
}

View File

@ -0,0 +1,14 @@
#pragma once
#include <common/types.h>
#include <Core/Names.h>
#include <Parsers/IAST_fwd.h>
namespace DB
{
class ColumnsDescription;
class Context;
void replaceAliasColumnsInQuery(ASTPtr & ast, const ColumnsDescription & columns, const NameSet & forbidden_columns, const Context & context);
}

View File

@ -37,6 +37,7 @@ SRCS(
ClusterProxy/SelectStreamFactory.cpp
ClusterProxy/executeQuery.cpp
CollectJoinOnKeysVisitor.cpp
ColumnAliasesVisitor.cpp
Context.cpp
CrashLog.cpp
CrossToInnerJoinVisitor.cpp
@ -157,6 +158,7 @@ SRCS(
interpretSubquery.cpp
join_common.cpp
loadMetadata.cpp
replaceAliasColumnsInQuery.cpp
processColumnTransformers.cpp
sortBlock.cpp

View File

@ -262,11 +262,13 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ParserIdentifier id_parser;
ParserKeyword distinct("DISTINCT");
ParserKeyword all("ALL");
ParserExpressionList contents(false);
ParserSelectWithUnionQuery select;
ParserKeyword over("OVER");
bool has_distinct_modifier = false;
bool has_all = false;
bool has_distinct = false;
ASTPtr identifier;
ASTPtr query;
@ -280,10 +282,34 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
return false;
++pos;
auto pos_after_bracket = pos;
auto old_expected = expected;
if (all.ignore(pos, expected))
has_all = true;
if (distinct.ignore(pos, expected))
has_distinct_modifier = true;
else
has_distinct = true;
if (!has_all && all.ignore(pos, expected))
has_all = true;
if (has_all && has_distinct)
return false;
if (has_all || has_distinct)
{
/// case f(ALL), f(ALL, x), f(DISTINCT), f(DISTINCT, x), ALL and DISTINCT should be treat as identifier
if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket)
{
pos = pos_after_bracket;
expected = old_expected;
has_all = false;
has_distinct = false;
}
}
if (!has_distinct && !has_all)
{
auto old_pos = pos;
auto maybe_an_subquery = pos->type == TokenType::OpeningRoundBracket;
@ -371,14 +397,37 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
++pos;
/// Parametric aggregate functions cannot have DISTINCT in parameters list.
if (has_distinct_modifier)
if (has_distinct)
return false;
expr_list_params = expr_list_args;
expr_list_args = nullptr;
pos_after_bracket = pos;
old_expected = expected;
if (all.ignore(pos, expected))
has_all = true;
if (distinct.ignore(pos, expected))
has_distinct_modifier = true;
has_distinct = true;
if (!has_all && all.ignore(pos, expected))
has_all = true;
if (has_all && has_distinct)
return false;
if (has_all || has_distinct)
{
/// case f(ALL), f(ALL, x), f(DISTINCT), f(DISTINCT, x), ALL and DISTINCT should be treat as identifier
if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket)
{
pos = pos_after_bracket;
expected = old_expected;
has_distinct = false;
}
}
if (!contents.parse(pos, expr_list_args, expected))
return false;
@ -392,7 +441,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
tryGetIdentifierNameInto(identifier, function_node->name);
/// func(DISTINCT ...) is equivalent to funcDistinct(...)
if (has_distinct_modifier)
if (has_distinct)
function_node->name += "Distinct";
function_node->arguments = expr_list_args;

View File

@ -30,6 +30,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
node = select_query;
ParserKeyword s_select("SELECT");
ParserKeyword s_all("ALL");
ParserKeyword s_distinct("DISTINCT");
ParserKeyword s_from("FROM");
ParserKeyword s_prewhere("PREWHERE");
@ -93,14 +94,24 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
}
}
/// SELECT [DISTINCT] [TOP N [WITH TIES]] expr list
/// SELECT [ALL/DISTINCT] [TOP N [WITH TIES]] expr list
{
bool has_all = false;
if (!s_select.ignore(pos, expected))
return false;
if (s_all.ignore(pos, expected))
has_all = true;
if (s_distinct.ignore(pos, expected))
select_query->distinct = true;
if (!has_all && s_all.ignore(pos, expected))
has_all = true;
if (has_all && select_query->distinct)
return false;
if (s_top.ignore(pos, expected))
{
ParserNumber num;

View File

@ -32,6 +32,7 @@ bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected &
bool parse_only_database_name = false;
bool parse_show_create_view = false;
bool exists_view = false;
bool temporary = false;
if (s_exists.ignore(pos, expected))
@ -41,6 +42,11 @@ bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected &
query = std::make_shared<ASTExistsDatabaseQuery>();
parse_only_database_name = true;
}
else if (s_view.ignore(pos, expected))
{
query = std::make_shared<ASTExistsViewQuery>();
exists_view = true;
}
else
{
if (s_temporary.ignore(pos, expected))
@ -86,7 +92,7 @@ bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected &
}
else
{
if (!parse_show_create_view)
if (!(exists_view || parse_show_create_view))
{
if (temporary || s_temporary.ignore(pos, expected))
query->temporary = true;

View File

@ -22,6 +22,15 @@ struct ASTExistsTableQueryIDAndQueryNames
static constexpr auto QueryTemporary = "EXISTS TEMPORARY TABLE";
};
struct ASTExistsViewQueryIDAndQueryNames
{
static constexpr auto ID = "ExistsViewQuery";
static constexpr auto Query = "EXISTS VIEW";
/// No temporary view are supported, just for parsing
static constexpr auto QueryTemporary = "";
};
struct ASTExistsDictionaryQueryIDAndQueryNames
{
static constexpr auto ID = "ExistsDictionaryQuery";
@ -69,6 +78,7 @@ struct ASTDescribeQueryExistsQueryIDAndQueryNames
using ASTExistsDatabaseQuery = ASTQueryWithTableAndOutputImpl<ASTExistsDatabaseQueryIDAndQueryNames>;
using ASTExistsTableQuery = ASTQueryWithTableAndOutputImpl<ASTExistsTableQueryIDAndQueryNames>;
using ASTExistsViewQuery = ASTQueryWithTableAndOutputImpl<ASTExistsViewQueryIDAndQueryNames>;
using ASTExistsDictionaryQuery = ASTQueryWithTableAndOutputImpl<ASTExistsDictionaryQueryIDAndQueryNames>;
using ASTShowCreateTableQuery = ASTQueryWithTableAndOutputImpl<ASTShowCreateTableQueryIDAndQueryNames>;
using ASTShowCreateViewQuery = ASTQueryWithTableAndOutputImpl<ASTShowCreateViewQueryIDAndQueryNames>;

View File

@ -1,8 +1,11 @@
#include <Storages/ReadInOrderOptimizer.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/TreeRewriter.h>
#include <Interpreters/replaceAliasColumnsInQuery.h>
#include <Functions/IFunction.h>
#include <Interpreters/TableJoin.h>
#include <Interpreters/TreeRewriter.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/StorageFromMergeTreeDataPart.h>
@ -28,11 +31,10 @@ ReadInOrderOptimizer::ReadInOrderOptimizer(
/// Do not analyze joined columns.
/// They may have aliases and come to description as is.
/// We can mismatch them with order key columns at stage of fetching columns.
for (const auto & elem : syntax_result->array_join_result_to_source)
forbidden_columns.insert(elem.first);
forbidden_columns = syntax_result->getArrayJoinSourceNameSet();
}
InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr & metadata_snapshot) const
InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr & metadata_snapshot, const Context & context) const
{
Names sorting_key_columns = metadata_snapshot->getSortingKeyColumns();
if (!metadata_snapshot->hasSortingKey())
@ -42,6 +44,7 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr &
int read_direction = required_sort_description.at(0).direction;
size_t prefix_size = std::min(required_sort_description.size(), sorting_key_columns.size());
auto aliase_columns = metadata_snapshot->getColumns().getAliases();
for (size_t i = 0; i < prefix_size; ++i)
{
@ -50,17 +53,27 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr &
/// Optimize in case of exact match with order key element
/// or in some simple cases when order key element is wrapped into monotonic function.
auto apply_order_judge = [&] (const ExpressionActions::Actions & actions, const String & sort_column)
{
int current_direction = required_sort_description[i].direction;
if (required_sort_description[i].column_name == sorting_key_columns[i] && current_direction == read_direction)
order_key_prefix_descr.push_back(required_sort_description[i]);
/// For the path: order by (sort_column, ...)
if (sort_column == sorting_key_columns[i] && current_direction == read_direction)
{
return true;
}
/// For the path: order by (function(sort_column), ...)
/// Allow only one simple monotonic functions with one argument
/// Why not allow multi monotonic functions?
else
{
/// Allow only one simple monotonic functions with one argument
bool found_function = false;
for (const auto & action : elements_actions[i]->getActions())
for (const auto & action : actions)
{
if (action.node->type != ActionsDAG::ActionType::FUNCTION)
{
continue;
}
if (found_function)
{
@ -97,18 +110,43 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr &
current_direction = 0;
if (!current_direction || (i > 0 && current_direction != read_direction))
break;
return false;
if (i == 0)
read_direction = current_direction;
order_key_prefix_descr.push_back(required_sort_description[i]);
return true;
}
};
const auto & actions = elements_actions[i]->getActions();
bool ok;
/// check if it's alias column
/// currently we only support alias column without any function wrapper
/// ie: `order by aliased_column` can have this optimization, but `order by function(aliased_column)` can not.
/// This suits most cases.
if (context.getSettingsRef().optimize_respect_aliases && aliase_columns.contains(required_sort_description[i].column_name))
{
auto column_expr = metadata_snapshot->getColumns().get(required_sort_description[i].column_name).default_desc.expression->clone();
replaceAliasColumnsInQuery(column_expr, metadata_snapshot->getColumns(), forbidden_columns, context);
auto syntax_analyzer_result = TreeRewriter(context).analyze(column_expr, metadata_snapshot->getColumns().getAll());
const auto expression_analyzer = ExpressionAnalyzer(column_expr, syntax_analyzer_result, context).getActions(true);
const auto & alias_actions = expression_analyzer->getActions();
ok = apply_order_judge(alias_actions, column_expr->getColumnName());
}
else
ok = apply_order_judge(actions, required_sort_description[i].column_name);
if (ok)
order_key_prefix_descr.push_back(required_sort_description[i]);
else
break;
}
if (order_key_prefix_descr.empty())
return {};
return std::make_shared<InputOrderInfo>(std::move(order_key_prefix_descr), read_direction);
}

View File

@ -12,6 +12,8 @@ namespace DB
* common prefix, which is needed for
* performing reading in order of PK.
*/
class Context;
class ReadInOrderOptimizer
{
public:
@ -20,7 +22,7 @@ public:
const SortDescription & required_sort_description,
const TreeRewriterResultPtr & syntax_result);
InputOrderInfoPtr getInputOrder(const StorageMetadataPtr & metadata_snapshot) const;
InputOrderInfoPtr getInputOrder(const StorageMetadataPtr & metadata_snapshot, const Context & context) const;
private:
/// Actions for every element of order expression to analyze functions for monotonicity
@ -28,5 +30,4 @@ private:
NameSet forbidden_columns;
SortDescription required_sort_description;
};
}

View File

@ -195,7 +195,7 @@ void StorageBuffer::read(
if (dst_has_same_structure)
{
if (query_info.order_optimizer)
query_info.input_order_info = query_info.order_optimizer->getInputOrder(destination_metadata_snapshot);
query_info.input_order_info = query_info.order_optimizer->getInputOrder(destination_metadata_snapshot, context);
/// The destination table has the same structure of the requested columns and we can simply read blocks from there.
destination->read(

View File

@ -142,7 +142,7 @@ void StorageMaterializedView::read(
auto metadata_snapshot = storage->getInMemoryMetadataPtr();
if (query_info.order_optimizer)
query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot);
query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, context);
storage->read(query_plan, column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams);

View File

@ -239,7 +239,7 @@ Pipe StorageMerge::read(
{
auto storage_ptr = std::get<0>(*it);
auto storage_metadata_snapshot = storage_ptr->getInMemoryMetadataPtr();
auto current_info = query_info.order_optimizer->getInputOrder(storage_metadata_snapshot);
auto current_info = query_info.order_optimizer->getInputOrder(storage_metadata_snapshot, context);
if (it == selected_tables.begin())
input_sorting_info = current_info;
else if (!current_info || (input_sorting_info && *current_info != *input_sorting_info))

View File

@ -294,11 +294,17 @@ IDataType::OutputStreamGetter TinyLogBlockOutputStream::createStreamGetter(
void TinyLogBlockOutputStream::writeData(const String & name, const IDataType & type, const IColumn & column, WrittenStreams & written_streams)
{
IDataType::SerializeBinaryBulkSettings settings;
settings.getter = createStreamGetter(name, written_streams);
if (serialize_states.count(name) == 0)
{
/// Some stream getters may be called form `serializeBinaryBulkStatePrefix`.
/// Use different WrittenStreams set, or we get nullptr for them in `serializeBinaryBulkWithMultipleStreams`
WrittenStreams prefix_written_streams;
settings.getter = createStreamGetter(name, prefix_written_streams);
type.serializeBinaryBulkStatePrefix(settings, serialize_states[name]);
}
settings.getter = createStreamGetter(name, written_streams);
type.serializeBinaryBulkWithMultipleStreams(column, 0, 0, settings, serialize_states[name]);
}

View File

@ -60,6 +60,7 @@ namespace
void signalHandler(int, siginfo_t * info, void * context)
{
DENY_ALLOCATIONS_IN_SCOPE;
auto saved_errno = errno; /// We must restore previous value of errno in signal handler.
/// In case malicious user is sending signals manually (for unknown reason).

View File

@ -137,10 +137,10 @@ DROP TABLE IF EXISTS bitmap_column_expr_test3;
CREATE TABLE bitmap_column_expr_test3
(
tag_id String,
z AggregateFunction(groupBitmap, UInt32),
z AggregateFunction(groupBitmap, UInt64),
replace Nested (
from UInt32,
to UInt32
from UInt16,
to UInt64
)
)
ENGINE = MergeTree
@ -149,10 +149,10 @@ ORDER BY tag_id;
DROP TABLE IF EXISTS numbers10;
CREATE VIEW numbers10 AS SELECT number FROM system.numbers LIMIT 10;
INSERT INTO bitmap_column_expr_test3(tag_id, z, replace.from, replace.to) SELECT 'tag1', groupBitmapState(toUInt32(number)), cast([] as Array(UInt32)), cast([] as Array(UInt32)) FROM numbers10;
INSERT INTO bitmap_column_expr_test3(tag_id, z, replace.from, replace.to) SELECT 'tag2', groupBitmapState(toUInt32(number)), cast([0] as Array(UInt32)), cast([2] as Array(UInt32)) FROM numbers10;
INSERT INTO bitmap_column_expr_test3(tag_id, z, replace.from, replace.to) SELECT 'tag3', groupBitmapState(toUInt32(number)), cast([0,7] as Array(UInt32)), cast([3,101] as Array(UInt32)) FROM numbers10;
INSERT INTO bitmap_column_expr_test3(tag_id, z, replace.from, replace.to) SELECT 'tag4', groupBitmapState(toUInt32(number)), cast([5,999,2] as Array(UInt32)), cast([2,888,20] as Array(UInt32)) FROM numbers10;
INSERT INTO bitmap_column_expr_test3(tag_id, z, replace.from, replace.to) SELECT 'tag1', groupBitmapState(toUInt64(number)), cast([] as Array(UInt16)), cast([] as Array(UInt64)) FROM numbers10;
INSERT INTO bitmap_column_expr_test3(tag_id, z, replace.from, replace.to) SELECT 'tag2', groupBitmapState(toUInt64(number)), cast([0] as Array(UInt16)), cast([2] as Array(UInt64)) FROM numbers10;
INSERT INTO bitmap_column_expr_test3(tag_id, z, replace.from, replace.to) SELECT 'tag3', groupBitmapState(toUInt64(number)), cast([0,7] as Array(UInt16)), cast([3,101] as Array(UInt64)) FROM numbers10;
INSERT INTO bitmap_column_expr_test3(tag_id, z, replace.from, replace.to) SELECT 'tag4', groupBitmapState(toUInt64(number)), cast([5,999,2] as Array(UInt16)), cast([2,888,20] as Array(UInt64)) FROM numbers10;
SELECT tag_id, bitmapToArray(z), replace.from, replace.to, bitmapToArray(bitmapTransform(z, replace.from, replace.to)) FROM bitmap_column_expr_test3 ORDER BY tag_id;
@ -232,11 +232,11 @@ select bitmapHasAll(bitmapBuild([
-- bitmapContains:
---- Empty
SELECT bitmapContains(bitmapBuild(emptyArrayUInt32()), toUInt32(0));
SELECT bitmapContains(bitmapBuild(emptyArrayUInt16()), toUInt32(5));
SELECT bitmapContains(bitmapBuild(emptyArrayUInt32()), toUInt8(0));
SELECT bitmapContains(bitmapBuild(emptyArrayUInt16()), toUInt16(5));
---- Small
select bitmapContains(bitmapBuild([1,5,7,9]),toUInt32(0));
select bitmapContains(bitmapBuild([1,5,7,9]),toUInt32(9));
select bitmapContains(bitmapBuild([1,5,7,9]),toUInt64(9));
---- Large
select bitmapContains(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
@ -250,31 +250,31 @@ select bitmapContains(bitmapBuild([
-- bitmapSubsetInRange:
---- Empty
SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild(emptyArrayUInt32()), toUInt32(0), toUInt32(10)));
SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild(emptyArrayUInt16()), toUInt32(0), toUInt32(10)));
SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild(emptyArrayUInt32()), toUInt64(0), toUInt32(10)));
SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild(emptyArrayUInt16()), toUInt32(0), toUInt64(10)));
---- Small
select bitmapToArray(bitmapSubsetInRange(bitmapBuild([1,5,7,9]), toUInt32(0), toUInt32(4)));
select bitmapToArray(bitmapSubsetInRange(bitmapBuild([1,5,7,9]), toUInt32(10), toUInt32(10)));
select bitmapToArray(bitmapSubsetInRange(bitmapBuild([1,5,7,9]), toUInt32(3), toUInt32(7)));
select bitmapToArray(bitmapSubsetInRange(bitmapBuild([1,5,7,9]), toUInt8(0), toUInt16(4)));
select bitmapToArray(bitmapSubsetInRange(bitmapBuild([1,5,7,9]), toUInt32(10), toUInt64(10)));
select bitmapToArray(bitmapSubsetInRange(bitmapBuild([1,5,7,9]), toUInt64(3), toUInt32(7)));
---- Large
select bitmapToArray(bitmapSubsetInRange(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]), toUInt32(0), toUInt32(100)));
100,200,500]), toUInt8(0), toUInt32(100)));
select bitmapToArray(bitmapSubsetInRange(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]), toUInt32(30), toUInt32(200)));
100,200,500]), toUInt64(30), toUInt32(200)));
select bitmapToArray(bitmapSubsetInRange(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]), toUInt32(100), toUInt32(200)));
100,200,500]), toUInt32(100), toUInt64(200)));
-- bitmapSubsetLimit:
---- Empty
SELECT bitmapToArray(bitmapSubsetLimit(bitmapBuild(emptyArrayUInt32()), toUInt32(0), toUInt32(10)));
SELECT bitmapToArray(bitmapSubsetLimit(bitmapBuild(emptyArrayUInt16()), toUInt32(0), toUInt32(10)));
SELECT bitmapToArray(bitmapSubsetLimit(bitmapBuild(emptyArrayUInt32()), toUInt8(0), toUInt32(10)));
SELECT bitmapToArray(bitmapSubsetLimit(bitmapBuild(emptyArrayUInt16()), toUInt32(0), toUInt64(10)));
---- Small
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([1,5,7,9]), toUInt32(0), toUInt32(4)));
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([1,5,7,9]), toUInt32(10), toUInt32(10)));
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([1,5,7,9]), toUInt32(3), toUInt32(7)));
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([1,5,7,9]), toUInt8(0), toUInt32(4)));
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([1,5,7,9]), toUInt32(10), toUInt64(10)));
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([1,5,7,9]), toUInt16(3), toUInt32(7)));
---- Large
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
@ -284,7 +284,7 @@ select bitmapToArray(bitmapSubsetLimit(bitmapBuild([
100,200,500]), toUInt32(30), toUInt32(200)));
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]), toUInt32(100), toUInt32(200)));
100,200,500]), toUInt32(100), toUInt16(200)));
-- bitmapMin:
---- Empty

View File

@ -1,5 +1,8 @@
DROP TABLE IF EXISTS test;
CREATE TABLE test (num UInt64, str String) ENGINE = MergeTree ORDER BY num;
INSERT INTO test (num) VALUES (1), (2), (10), (15), (23);
SELECT count(*) FROM test WHERE bitmapContains(bitmapBuild([1, 5, 7, 9]), toUInt8(num));
SELECT count(*) FROM test WHERE bitmapContains(bitmapBuild([1, 5, 7, 9]), toUInt16(num));
SELECT count(*) FROM test WHERE bitmapContains(bitmapBuild([1, 5, 7, 9]), toUInt32(num));
SELECT count(*) FROM test WHERE bitmapContains(bitmapBuild([1, 5, 7, 9]), toUInt64(num));
DROP TABLE test;

View File

@ -21,6 +21,13 @@
0
0
0
1
0
0
0
0
0
0
0
0
0

View File

@ -40,6 +40,20 @@ EXISTS db_01048.t_01048;
EXISTS TABLE db_01048.t_01048;
EXISTS DICTIONARY db_01048.t_01048;
CREATE TABLE db_01048.t_01048_2 (x UInt8) ENGINE = Memory;
CREATE VIEW db_01048.v_01048 AS SELECT * FROM db_01048.t_01048_2;
EXISTS VIEW db_01048.v_01048;
EXISTS VIEW db_01048.t_01048_2;
EXISTS VIEW db_01048.v_not_exist;
DROP VIEW db_01048.v_01048;
EXISTS VIEW db_01048.v_01048;
EXISTS VIEW db_01048.t_01048_2;
EXISTS VIEW db_01048.v_not_exist;
EXISTS VIEW db_not_exists.v_not_exist;
DROP TABLE db_01048.t_01048_2;
DROP DATABASE db_01048;
EXISTS db_01048.t_01048;
EXISTS TABLE db_01048.t_01048;

View File

@ -1,3 +1,5 @@
SET optimize_move_functions_out_of_any = 1;
SELECT any(number * number) AS n FROM numbers(100) FORMAT CSVWithNames;
EXPLAIN SYNTAX SELECT any(number * number) AS n FROM numbers(100);

View File

@ -27,9 +27,9 @@ SELECT
avg(k)
FROM columns_transformers
SELECT
toDate(any(i)),
toDate(any(j)),
toDate(any(k))
any(toDate(i)),
any(toDate(j)),
any(toDate(k))
FROM columns_transformers AS a
SELECT
length(toString(j)),
@ -44,9 +44,9 @@ SELECT
avg(k)
FROM columns_transformers
SELECT
toDate(any(i)),
toDate(any(j)),
toDate(any(k))
any(toDate(i)),
any(toDate(j)),
any(toDate(k))
FROM columns_transformers AS a
SELECT
sum(i + 1 AS i),
@ -59,9 +59,9 @@ SELECT
avg(k)
FROM columns_transformers
SELECT
toDate(any(i)),
toDate(any(j)),
toDate(any(k))
any(toDate(i)),
any(toDate(j)),
any(toDate(k))
FROM columns_transformers AS a
SELECT
(i + 1) + 1 AS i,

View File

@ -0,0 +1,74 @@
test-partition-prune
1
1
1
1
1
test-join
1
1
alias2alias
1
1
1
1
1
1
1
array-join
1
0 0
lambda
1
optimize_read_in_order
Expression (Projection)
Limit (preliminary LIMIT)
MergingSorted (Merge sorted streams for ORDER BY)
MergeSorting (Merge sorted blocks for ORDER BY)
PartialSorting (Sort each block for ORDER BY)
Expression (Before ORDER BY + Add table aliases)
SettingQuotaAndLimits (Set limits and quota after reading from storage)
ReadFromStorage (MergeTree)
Expression (Projection)
Limit (preliminary LIMIT)
FinishSorting
Expression (Before ORDER BY + Add table aliases)
SettingQuotaAndLimits (Set limits and quota after reading from storage)
Union
ReadFromStorage (MergeTree with order)
ReadFromStorage (MergeTree with order)
ReadFromStorage (MergeTree with order)
Expression (Projection)
Limit (preliminary LIMIT)
FinishSorting
Expression (Before ORDER BY)
SettingQuotaAndLimits (Set limits and quota after reading from storage)
Union
ReadFromStorage (MergeTree with order)
ReadFromStorage (MergeTree with order)
ReadFromStorage (MergeTree with order)
optimize_aggregation_in_order
Expression (Projection + Before ORDER BY)
Aggregating
Expression (Before GROUP BY + Add table aliases)
SettingQuotaAndLimits (Set limits and quota after reading from storage)
ReadFromStorage (MergeTree)
Expression (Projection + Before ORDER BY)
Aggregating
Expression (Before GROUP BY + Add table aliases)
SettingQuotaAndLimits (Set limits and quota after reading from storage)
Union
ReadFromStorage (MergeTree with order)
ReadFromStorage (MergeTree with order)
ReadFromStorage (MergeTree with order)
Expression (Projection + Before ORDER BY)
Aggregating
Expression (Before GROUP BY)
SettingQuotaAndLimits (Set limits and quota after reading from storage)
Union
ReadFromStorage (MergeTree with order)
ReadFromStorage (MergeTree with order)
ReadFromStorage (MergeTree with order)
second-index
1
1

View File

@ -0,0 +1,117 @@
DROP TABLE IF EXISTS test_table;
CREATE TABLE test_table
(
`timestamp` DateTime,
`value` UInt64,
`day` Date ALIAS toDate(timestamp),
`day1` Date ALIAS day + 1,
`day2` Date ALIAS day1 + 1,
`time` DateTime ALIAS timestamp
)
ENGINE = MergeTree
PARTITION BY toYYYYMMDD(timestamp)
ORDER BY timestamp SETTINGS index_granularity = 1;
INSERT INTO test_table(timestamp, value) SELECT toDateTime('2020-01-01 12:00:00'), 1 FROM numbers(10);
INSERT INTO test_table(timestamp, value) SELECT toDateTime('2020-01-02 12:00:00'), 1 FROM numbers(10);
INSERT INTO test_table(timestamp, value) SELECT toDateTime('2020-01-03 12:00:00'), 1 FROM numbers(10);
set optimize_respect_aliases = 1;
SELECT 'test-partition-prune';
SELECT COUNT() = 10 FROM test_table WHERE day = '2020-01-01' SETTINGS max_rows_to_read = 10;
SELECT t = '2020-01-03' FROM (SELECT day AS t FROM test_table WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 10);
SELECT COUNT() = 10 FROM test_table WHERE day = '2020-01-01' UNION ALL SELECT 1 FROM numbers(1) SETTINGS max_rows_to_read = 11;
SELECT COUNT() = 0 FROM (SELECT toDate('2019-01-01') AS day, day AS t FROM test_table PREWHERE t = '2020-01-03' WHERE t = '2020-01-03' GROUP BY t );
SELECT 'test-join';
SELECT day = '2020-01-03'
FROM
(
SELECT toDate('2020-01-03') AS day
FROM numbers(1)
) AS a
INNER JOIN
(
SELECT day
FROM test_table
WHERE day = '2020-01-03'
GROUP BY day
) AS b ON a.day = b.day SETTINGS max_rows_to_read = 11;
SELECT day = '2020-01-01'
FROM
(
SELECT day
FROM test_table
WHERE day = '2020-01-01'
GROUP BY day
) AS a
INNER JOIN
(
SELECT toDate('2020-01-01') AS day
FROM numbers(1)
) AS b ON a.day = b.day SETTINGS max_rows_to_read = 11;
SELECT 'alias2alias';
SELECT COUNT() = 10 FROM test_table WHERE day1 = '2020-01-02' SETTINGS max_rows_to_read = 10;
SELECT t = '2020-01-03' FROM (SELECT day1 AS t FROM test_table WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 10);
SELECT t = '2020-01-03' FROM (SELECT day2 AS t FROM test_table WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 10);
SELECT COUNT() = 10 FROM test_table WHERE day1 = '2020-01-03' UNION ALL SELECT 1 FROM numbers(1) SETTINGS max_rows_to_read = 11;
SELECT COUNT() = 0 FROM (SELECT toDate('2019-01-01') AS day1, day1 AS t FROM test_table PREWHERE t = '2020-01-03' WHERE t = '2020-01-03' GROUP BY t );
SELECT day1 = '2020-01-04' FROM test_table PREWHERE day1 = '2020-01-04' WHERE day1 = '2020-01-04' GROUP BY day1 SETTINGS max_rows_to_read = 10;
ALTER TABLE test_table add column array Array(UInt8) default [1, 2, 3];
ALTER TABLE test_table add column struct.key Array(UInt8) default [2, 4, 6], add column struct.value Array(UInt8) alias array;
SELECT 'array-join';
set max_rows_to_read = 10;
SELECT count() == 10 FROM test_table WHERE day = '2020-01-01';
SELECT sum(struct.key) == 30, sum(struct.value) == 30 FROM (SELECT struct.key, struct.value FROM test_table array join struct WHERE day = '2020-01-01');
SELECT 'lambda';
-- lambda parameters in filter should not be rewrite
SELECT count() == 10 FROM test_table WHERE arrayMap((day) -> day + 1, [1,2,3]) [1] = 2 AND day = '2020-01-03';
set max_rows_to_read = 0;
SELECT 'optimize_read_in_order';
EXPLAIN SELECT day AS s FROM test_table ORDER BY s LIMIT 1 SETTINGS optimize_read_in_order = 0;
EXPLAIN SELECT day AS s FROM test_table ORDER BY s LIMIT 1 SETTINGS optimize_read_in_order = 1;
EXPLAIN SELECT toDate(timestamp) AS s FROM test_table ORDER BY toDate(timestamp) LIMIT 1 SETTINGS optimize_read_in_order = 1;
SELECT 'optimize_aggregation_in_order';
EXPLAIN SELECT day, count() AS s FROM test_table GROUP BY day SETTINGS optimize_aggregation_in_order = 0;
EXPLAIN SELECT day, count() AS s FROM test_table GROUP BY day SETTINGS optimize_aggregation_in_order = 1;
EXPLAIN SELECT toDate(timestamp), count() AS s FROM test_table GROUP BY toDate(timestamp) SETTINGS optimize_aggregation_in_order = 1;
DROP TABLE test_table;
SELECT 'second-index';
DROP TABLE IF EXISTS test_index;
CREATE TABLE test_index
(
`key_string` String,
`key_uint32` ALIAS toUInt32(key_string),
INDEX idx toUInt32(key_string) TYPE set(0) GRANULARITY 1
)
ENGINE = MergeTree
PARTITION BY tuple()
PRIMARY KEY tuple()
ORDER BY key_string SETTINGS index_granularity = 1;
INSERT INTO test_index SELECT * FROM numbers(10);
set max_rows_to_read = 1;
SELECT COUNT() == 1 FROM test_index WHERE key_uint32 = 1;
SELECT COUNT() == 1 FROM test_index WHERE toUInt32(key_string) = 1;
DROP TABLE IF EXISTS test_index;

View File

@ -0,0 +1,5 @@
1
2020-01-01 10:00:00
2020-01-01 10:00:00
1
2020-01-01 10:00:00

View File

@ -0,0 +1,37 @@
DROP TABLE IF EXISTS foo;
DROP TABLE IF EXISTS bar;
CREATE TABLE foo (server_date Date, server_time Datetime('Europe/Moscow'), dimension_1 String) ENGINE = MergeTree() PARTITION BY toYYYYMM(server_date) ORDER BY (server_date);
CREATE TABLE bar (server_date Date, dimension_1 String) ENGINE = MergeTree() PARTITION BY toYYYYMM(server_date) ORDER BY (server_date);
INSERT INTO foo VALUES ('2020-01-01', '2020-01-01 12:00:00', 'test1'), ('2020-01-01', '2020-01-01 13:00:00', 'test2');
INSERT INTO bar VALUES ('2020-01-01', 'test2'), ('2020-01-01', 'test3');
SET optimize_move_to_prewhere = 1;
SET any_join_distinct_right_table_keys = 0;
SELECT count()
FROM foo ANY INNER JOIN bar USING (dimension_1)
WHERE (foo.server_date <= '2020-11-07') AND (toDate(foo.server_time, 'Asia/Yekaterinburg') <= '2020-11-07');
SELECT toDateTime(foo.server_time, 'UTC')
FROM foo
ANY INNER JOIN bar USING (dimension_1)
WHERE toDate(foo.server_time, 'UTC') <= toDate('2020-04-30');
SELECT toDateTime(foo.server_time, 'UTC') FROM foo
SEMI JOIN bar USING (dimension_1) WHERE toDate(foo.server_time, 'UTC') <= toDate('2020-04-30');
SET any_join_distinct_right_table_keys = 1;
SELECT count()
FROM foo ANY INNER JOIN bar USING (dimension_1)
WHERE (foo.server_date <= '2020-11-07') AND (toDate(foo.server_time, 'Asia/Yekaterinburg') <= '2020-11-07');
SELECT toDateTime(foo.server_time, 'UTC')
FROM foo
ANY INNER JOIN bar USING (dimension_1)
WHERE toDate(foo.server_time, 'UTC') <= toDate('2020-04-30');
DROP TABLE foo;
DROP TABLE bar;

View File

@ -0,0 +1 @@
1 7 8

View File

@ -0,0 +1,7 @@
insert into table function file("data1622.json", "TSV", "value String") VALUES ('{"a":1}');
drop table if exists json;
create table json(a int, b int default 7, c default a + b) engine File(JSONEachRow, 'data1622.json');
set input_format_defaults_for_omitted_fields = 1;
select * from json;
truncate table json;
drop table if exists json;

View File

@ -0,0 +1,37 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
PORT="$(($RANDOM%63000+2001))"
TEMP_FILE="$CURDIR/01622_defaults_for_url_engine.tmp"
function thread1
{
while true; do
echo -e "HTTP/1.1 200 OK\n\n{\"a\": 1}" | nc -l -p $1 -q 1;
done
}
function thread2
{
while true; do
$CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 -q "SELECT * FROM url('http://127.0.0.1:$1/', JSONEachRow, 'a int, b int default 7, c default a + b') format Values"
done
}
# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout
export -f thread1;
export -f thread2;
TIMEOUT=5
timeout $TIMEOUT bash -c "thread1 $PORT" > /dev/null 2>&1 &
timeout $TIMEOUT bash -c "thread2 $PORT" 2> /dev/null > $TEMP_FILE &
wait
grep -q '(1,7,8)' $TEMP_FILE && echo "Ok"

View File

@ -0,0 +1,35 @@
a
a
1
1
2
45
45
45
2
1
1
1
a
aa
aaa
aaaa
aaaaa
aaaaaa
aaaaaaa
aaaaaaaa
aaaaaaaaa
a
aa
aaa
aaaa
aaaaa
aaaaaa
aaaaaaa
aaaaaaaa
aaaaaaaaa
aaaaa
aaaaa
aaaaa

View File

@ -0,0 +1,25 @@
SELECT ALL 'a';
SELECT DISTINCT 'a';
SELECT ALL * FROM (SELECT 1 UNION ALL SELECT 1);
SELECT DISTINCT * FROM (SELECT 2 UNION ALL SELECT 2);
SELECT sum(number) FROM numbers(10);
SELECT sum(ALL number) FROM numbers(10);
SELECT sum(DISTINCT number) FROM numbers(10);
SELECT sum(ALL x) FROM (SELECT 1 x UNION ALL SELECT 1);
SELECT sum(DISTINCT x) FROM (SELECT 1 x UNION ALL SELECT 1);
SELECT sum(ALL) FROM (SELECT 1 AS ALL);
SELECT sum(DISTINCT) FROM (SELECT 1 AS DISTINCT);
SELECT repeat('a', ALL) FROM (SELECT number AS ALL FROM numbers(10));
SELECT repeat('a', DISTINCT) FROM (SELECT number AS DISTINCT FROM numbers(10));
SELECT repeat(ALL, 5) FROM (SELECT 'a' AS ALL);
SELECT repeat(DISTINCT, 5) FROM (SELECT 'a' AS DISTINCT);
SELECT repeat(ALL, DISTINCT) FROM (SELECT 'a' AS ALL, 5 AS DISTINCT);

View File

@ -0,0 +1 @@
Hello

View File

@ -0,0 +1,6 @@
SELECT any(nullIf(s, '')) FROM (SELECT arrayJoin(['', 'Hello']) AS s);
SET optimize_move_functions_out_of_any = 0;
EXPLAIN SYNTAX select any(nullIf('', ''), 'some text'); -- { serverError 42 }
SET optimize_move_functions_out_of_any = 1;
EXPLAIN SYNTAX select any(nullIf('', ''), 'some text'); -- { serverError 42 }

View File

@ -0,0 +1,3 @@
['Hello','World','Упячка']
['Hello','World','World','Упячка','Упячка','Упячка']
['world','hello'] Array(Enum8(\'world\' = 0, \'hello\' = 1)) ['world','hello'] Array(Enum8(\'world\' = 0, \'hello\' = 1))

View File

@ -0,0 +1,13 @@
SELECT arraySort(groupUniqArray(x)) FROM (SELECT CAST(arrayJoin([1, 2, 3, 2, 3, 3]) AS Enum('Hello' = 1, 'World' = 2, 'Упячка' = 3)) AS x);
SELECT arraySort(groupArray(x)) FROM (SELECT CAST(arrayJoin([1, 2, 3, 2, 3, 3]) AS Enum('Hello' = 1, 'World' = 2, 'Упячка' = 3)) AS x);
SELECT
arraySort(groupUniqArray(val)) AS uniq,
toTypeName(uniq),
arraySort(groupArray(val)) AS arr,
toTypeName(arr)
FROM
(
SELECT CAST(number % 2, 'Enum(\'hello\' = 1, \'world\' = 0)') AS val
FROM numbers(2)
);

View File

@ -0,0 +1,12 @@
10000000
10000000 1274991808
20000000
20000000 2549983616
10000000
10000000 1274991808
20000000
20000000 2549983616
10000000
10000000 1274991808
20000000
20000000 2549983616

View File

@ -0,0 +1,47 @@
drop table if exists perf_lc_num;
CREATE TABLE perf_lc_num(  num UInt8,  arr Array(LowCardinality(Int64)) default [num]  ) ENGINE = TinyLog;
INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000);
select sum(length(arr)) from perf_lc_num;
select sum(length(arr)), sum(num) from perf_lc_num;
INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000);
select sum(length(arr)) from perf_lc_num;
select sum(length(arr)), sum(num) from perf_lc_num;
drop table if exists perf_lc_num;
CREATE TABLE perf_lc_num(  num UInt8,  arr Array(LowCardinality(Int64)) default [num]  ) ENGINE = Log;
INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000);
select sum(length(arr)) from perf_lc_num;
select sum(length(arr)), sum(num) from perf_lc_num;
INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000);
select sum(length(arr)) from perf_lc_num;
select sum(length(arr)), sum(num) from perf_lc_num;
drop table if exists perf_lc_num;
CREATE TABLE perf_lc_num(  num UInt8,  arr Array(LowCardinality(Int64)) default [num]  ) ENGINE = StripeLog;
INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000);
select sum(length(arr)) from perf_lc_num;
select sum(length(arr)), sum(num) from perf_lc_num;
INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000);
select sum(length(arr)) from perf_lc_num;
select sum(length(arr)), sum(num) from perf_lc_num;
drop table if exists perf_lc_num;

View File

@ -0,0 +1,6 @@
set allow_suspicious_low_cardinality_types = 1;
CREATE TABLE lc_null_int8_defnull (val LowCardinality(Nullable(Int8)) DEFAULT NULL) ENGINE = MergeTree order by tuple();
SELECT ignore(10, ignore(*), ignore(ignore(-2, 1025, *)), NULL, *), * FROM lc_null_int8_defnull AS values;
SELECT ignore(toLowCardinality(1), toLowCardinality(2), 3);