Merge remote-tracking branch 'origin' into add-sqlancer-test-docker

This commit is contained in:
Yatsishin Ilya 2021-01-15 15:25:46 +03:00
commit ea1f15f619
200 changed files with 3501 additions and 866 deletions

View File

@ -112,11 +112,13 @@ static void writeSignalIDtoSignalPipe(int sig)
/** Signal handler for HUP / USR1 */
static void closeLogsSignalHandler(int sig, siginfo_t *, void *)
{
DENY_ALLOCATIONS_IN_SCOPE;
writeSignalIDtoSignalPipe(sig);
}
static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *)
{
DENY_ALLOCATIONS_IN_SCOPE;
writeSignalIDtoSignalPipe(sig);
}
@ -125,6 +127,7 @@ static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *)
*/
static void signalHandler(int sig, siginfo_t * info, void * context)
{
DENY_ALLOCATIONS_IN_SCOPE;
auto saved_errno = errno; /// We must restore previous value of errno in signal handler.
char buf[signal_pipe_buf_size];

View File

@ -3,7 +3,7 @@ SET(VERSION_REVISION 54445)
SET(VERSION_MAJOR 21)
SET(VERSION_MINOR 1)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH 53d0c9fa7255aa1dc48991d19f4246ff71cc2fd7)
SET(VERSION_DESCRIBE v21.1.1.5643-prestable)
SET(VERSION_STRING 21.1.1.5643)
SET(VERSION_GITHASH 667dd0cf0ccecdaa6f334177b7ece2f53bd196a1)
SET(VERSION_DESCRIBE v21.1.1.5646-prestable)
SET(VERSION_STRING 21.1.1.5646)
# end of autochange

View File

@ -1,5 +1,4 @@
# Freebsd: contrib/cppkafka/include/cppkafka/detail/endianness.h:53:23: error: 'betoh16' was not declared in this scope
if (NOT ARCH_ARM AND NOT OS_FREEBSD AND OPENSSL_FOUND)
if (NOT ARCH_ARM AND OPENSSL_FOUND)
option (ENABLE_RDKAFKA "Enable kafka" ${ENABLE_LIBRARIES})
elseif(ENABLE_RDKAFKA AND NOT OPENSSL_FOUND)
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use librdkafka without SSL")

View File

@ -1,2 +1,2 @@
wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz
tar xJf MacOSX10.14.sdk.tar.xz --strip-components=1
wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz
tar xJf MacOSX10.15.sdk.tar.xz --strip-components=1

2
contrib/libcxx vendored

@ -1 +1 @@
Subproject commit 95650a0db4399ee871d5fd698ad12384fe9fa964
Subproject commit 8b80a151d12b98ffe2d0c22f7cec12c3b9ff88d7

View File

@ -5,6 +5,8 @@ set(LIBCXX_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcxx)
set(SRCS
${LIBCXX_SOURCE_DIR}/src/algorithm.cpp
${LIBCXX_SOURCE_DIR}/src/any.cpp
${LIBCXX_SOURCE_DIR}/src/atomic.cpp
${LIBCXX_SOURCE_DIR}/src/barrier.cpp
${LIBCXX_SOURCE_DIR}/src/bind.cpp
${LIBCXX_SOURCE_DIR}/src/charconv.cpp
${LIBCXX_SOURCE_DIR}/src/chrono.cpp
@ -20,6 +22,7 @@ ${LIBCXX_SOURCE_DIR}/src/functional.cpp
${LIBCXX_SOURCE_DIR}/src/future.cpp
${LIBCXX_SOURCE_DIR}/src/hash.cpp
${LIBCXX_SOURCE_DIR}/src/ios.cpp
${LIBCXX_SOURCE_DIR}/src/ios.instantiations.cpp
${LIBCXX_SOURCE_DIR}/src/iostream.cpp
${LIBCXX_SOURCE_DIR}/src/locale.cpp
${LIBCXX_SOURCE_DIR}/src/memory.cpp
@ -28,6 +31,7 @@ ${LIBCXX_SOURCE_DIR}/src/mutex_destructor.cpp
${LIBCXX_SOURCE_DIR}/src/new.cpp
${LIBCXX_SOURCE_DIR}/src/optional.cpp
${LIBCXX_SOURCE_DIR}/src/random.cpp
${LIBCXX_SOURCE_DIR}/src/random_shuffle.cpp
${LIBCXX_SOURCE_DIR}/src/regex.cpp
${LIBCXX_SOURCE_DIR}/src/shared_mutex.cpp
${LIBCXX_SOURCE_DIR}/src/stdexcept.cpp

2
contrib/libcxxabi vendored

@ -1 +1 @@
Subproject commit 1ebc83af4c06dbcd56b4d166c1314a7d4c1173f9
Subproject commit df8f1e727dbc9e2bedf2282096fa189dc3fe0076

View File

@ -11,7 +11,6 @@ ${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp
${LIBCXXABI_SOURCE_DIR}/src/stdlib_exception.cpp
${LIBCXXABI_SOURCE_DIR}/src/abort_message.cpp
${LIBCXXABI_SOURCE_DIR}/src/cxa_demangle.cpp
${LIBCXXABI_SOURCE_DIR}/src/cxa_unexpected.cpp
${LIBCXXABI_SOURCE_DIR}/src/cxa_exception.cpp
${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp
${LIBCXXABI_SOURCE_DIR}/src/cxa_exception_storage.cpp

View File

@ -83,7 +83,8 @@
#if (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ <= 101400)
#define _TTHREAD_EMULATE_TIMESPEC_GET_
#endif
#elif defined(__FreeBSD__)
#define HAVE_PTHREAD_SETNAME_FREEBSD 1
#else
// pthread_setname_gnu
#define HAVE_PTHREAD_SETNAME_GNU 1

View File

@ -45,7 +45,8 @@
"name": "yandex/clickhouse-stateless-test",
"dependent": [
"docker/test/stateful",
"docker/test/coverage"
"docker/test/coverage",
"docker/test/unit"
]
},
"docker/test/stateless_pytest": {
@ -134,7 +135,9 @@
"name": "yandex/clickhouse-test-base",
"dependent": [
"docker/test/stateless",
"docker/test/stateless_pytest"
"docker/test/stateless_unbundled",
"docker/test/stateless_pytest",
"docker/test/integration/base"
]
},
"docker/packager/unbundled": {

View File

@ -82,7 +82,7 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \
&& rm -rf cctools-port
# Download toolchain for Darwin
RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz
RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz
# Download toolchain for ARM
# It contains all required headers and libraries. Note that it's named as "gcc" but actually we are using clang for cross compiling.

View File

@ -3,7 +3,7 @@
set -x -e
mkdir -p build/cmake/toolchain/darwin-x86_64
tar xJf MacOSX10.14.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1
tar xJf MacOSX10.15.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1
mkdir -p build/cmake/toolchain/linux-aarch64
tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build/cmake/toolchain/linux-aarch64 --strip-components=1

View File

@ -4,5 +4,5 @@ alpine-root/install/*
# docs (looks useless)
alpine-root/usr/share/doc/*
# packages, etc. (used by prepare.sh)
alpine-root/tgz-packages/*
# packages, etc. (used by alpine-build.sh)
tgz-packages/*

View File

@ -1 +1,2 @@
alpine-root/*
alpine-root/*
tgz-packages/*

View File

@ -16,7 +16,7 @@ RUN addgroup clickhouse \
&& chown root:clickhouse /var/log/clickhouse-server \
&& chmod 775 /var/log/clickhouse-server \
&& chmod +x /entrypoint.sh \
&& apk add --no-cache su-exec
&& apk add --no-cache su-exec bash
EXPOSE 9000 8123 9009

View File

@ -4,6 +4,7 @@ set -x
REPO_CHANNEL="${REPO_CHANNEL:-stable}" # lts / testing / prestable / etc
REPO_URL="${REPO_URL:-"https://repo.yandex.ru/clickhouse/tgz/${REPO_CHANNEL}"}"
VERSION="${VERSION:-20.9.3.45}"
DOCKER_IMAGE="${DOCKER_IMAGE:-yandex/clickhouse-server}"
# where original files live
DOCKER_BUILD_FOLDER="${BASH_SOURCE%/*}"
@ -11,12 +12,12 @@ DOCKER_BUILD_FOLDER="${BASH_SOURCE%/*}"
# we will create root for our image here
CONTAINER_ROOT_FOLDER="${DOCKER_BUILD_FOLDER}/alpine-root"
# where to put downloaded tgz
TGZ_PACKAGES_FOLDER="${CONTAINER_ROOT_FOLDER}/tgz-packages"
# clean up the root from old runs
# clean up the root from old runs, it's reconstructed each time
rm -rf "$CONTAINER_ROOT_FOLDER"
mkdir -p "$CONTAINER_ROOT_FOLDER"
# where to put downloaded tgz
TGZ_PACKAGES_FOLDER="${DOCKER_BUILD_FOLDER}/tgz-packages"
mkdir -p "$TGZ_PACKAGES_FOLDER"
PACKAGES=( "clickhouse-client" "clickhouse-server" "clickhouse-common-static" )
@ -24,7 +25,7 @@ PACKAGES=( "clickhouse-client" "clickhouse-server" "clickhouse-common-static" )
# download tars from the repo
for package in "${PACKAGES[@]}"
do
wget -q --show-progress "${REPO_URL}/${package}-${VERSION}.tgz" -O "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz"
wget -c -q --show-progress "${REPO_URL}/${package}-${VERSION}.tgz" -O "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz"
done
# unpack tars
@ -42,7 +43,7 @@ mkdir -p "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/users.d" \
"${CONTAINER_ROOT_FOLDER}/lib64"
cp "${DOCKER_BUILD_FOLDER}/docker_related_config.xml" "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/config.d/"
cp "${DOCKER_BUILD_FOLDER}/entrypoint.alpine.sh" "${CONTAINER_ROOT_FOLDER}/entrypoint.sh"
cp "${DOCKER_BUILD_FOLDER}/entrypoint.sh" "${CONTAINER_ROOT_FOLDER}/entrypoint.sh"
## get glibc components from ubuntu 20.04 and put them to expected place
docker pull ubuntu:20.04
@ -56,4 +57,5 @@ docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_dns.so.2 "${CONTAIN
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libresolv.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
docker cp -L "${ubuntu20image}":/lib64/ld-linux-x86-64.so.2 "${CONTAINER_ROOT_FOLDER}/lib64"
docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "yandex/clickhouse-server:${VERSION}-alpine" --pull
docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "${DOCKER_IMAGE}:${VERSION}-alpine" --pull
rm -rf "$CONTAINER_ROOT_FOLDER"

View File

@ -1,152 +0,0 @@
#!/bin/sh
#set -x
DO_CHOWN=1
if [ "$CLICKHOUSE_DO_NOT_CHOWN" = 1 ]; then
DO_CHOWN=0
fi
CLICKHOUSE_UID="${CLICKHOUSE_UID:-"$(id -u clickhouse)"}"
CLICKHOUSE_GID="${CLICKHOUSE_GID:-"$(id -g clickhouse)"}"
# support --user
if [ "$(id -u)" = "0" ]; then
USER=$CLICKHOUSE_UID
GROUP=$CLICKHOUSE_GID
# busybox has setuidgid & chpst buildin
gosu="su-exec $USER:$GROUP"
else
USER="$(id -u)"
GROUP="$(id -g)"
gosu=""
DO_CHOWN=0
fi
# set some vars
CLICKHOUSE_CONFIG="${CLICKHOUSE_CONFIG:-/etc/clickhouse-server/config.xml}"
# port is needed to check if clickhouse-server is ready for connections
HTTP_PORT="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=http_port)"
# get CH directories locations
DATA_DIR="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=path || true)"
TMP_DIR="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=tmp_path || true)"
USER_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=user_files_path || true)"
LOG_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=logger.log || true)"
LOG_DIR="$(dirname "${LOG_PATH}" || true)"
ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=logger.errorlog || true)"
ERROR_LOG_DIR="$(dirname "${ERROR_LOG_PATH}" || true)"
FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=format_schema_path || true)"
CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}"
CLICKHOUSE_DB="${CLICKHOUSE_DB:-}"
for dir in "$DATA_DIR" \
"$ERROR_LOG_DIR" \
"$LOG_DIR" \
"$TMP_DIR" \
"$USER_PATH" \
"$FORMAT_SCHEMA_PATH"
do
# check if variable not empty
[ -z "$dir" ] && continue
# ensure directories exist
if ! mkdir -p "$dir"; then
echo "Couldn't create necessary directory: $dir"
exit 1
fi
if [ "$DO_CHOWN" = "1" ]; then
# ensure proper directories permissions
chown -R "$USER:$GROUP" "$dir"
elif [ "$(stat -c %u "$dir")" != "$USER" ]; then
echo "Necessary directory '$dir' isn't owned by user with id '$USER'"
exit 1
fi
done
# if clickhouse user is defined - create it (user "default" already exists out of box)
if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CLICKHOUSE_PASSWORD" ]; then
echo "$0: create new user '$CLICKHOUSE_USER' instead 'default'"
cat <<EOT > /etc/clickhouse-server/users.d/default-user.xml
<yandex>
<!-- Docs: <https://clickhouse.tech/docs/en/operations/settings/settings_users/> -->
<users>
<!-- Remove default user -->
<default remove="remove">
</default>
<${CLICKHOUSE_USER}>
<profile>default</profile>
<networks>
<ip>::/0</ip>
</networks>
<password>${CLICKHOUSE_PASSWORD}</password>
<quota>default</quota>
</${CLICKHOUSE_USER}>
</users>
</yandex>
EOT
fi
if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
# Listen only on localhost until the initialization is done
$gosu /usr/bin/clickhouse-server --config-file="${CLICKHOUSE_CONFIG}" -- --listen_host=127.0.0.1 &
pid="$!"
# check if clickhouse is ready to accept connections
# will try to send ping clickhouse via http_port (max 6 retries, with 1 sec timeout and 1 sec delay between retries)
tries=6
while ! wget --spider -T 1 -q "http://localhost:$HTTP_PORT/ping" 2>/dev/null; do
if [ "$tries" -le "0" ]; then
echo >&2 'ClickHouse init process failed.'
exit 1
fi
tries=$(( tries-1 ))
sleep 1
done
if [ -n "$CLICKHOUSE_PASSWORD" ]; then
printf -v WITH_PASSWORD '%s %q' "--password" "$CLICKHOUSE_PASSWORD"
fi
clickhouseclient="clickhouse-client --multiquery -u $CLICKHOUSE_USER $WITH_PASSWORD "
# create default database, if defined
if [ -n "$CLICKHOUSE_DB" ]; then
echo "$0: create database '$CLICKHOUSE_DB'"
"$clickhouseclient" -q "CREATE DATABASE IF NOT EXISTS $CLICKHOUSE_DB";
fi
for f in /docker-entrypoint-initdb.d/*; do
case "$f" in
*.sh)
if [ -x "$f" ]; then
echo "$0: running $f"
"$f"
else
echo "$0: sourcing $f"
. "$f"
fi
;;
*.sql) echo "$0: running $f"; "$clickhouseclient" < "$f" ; echo ;;
*.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "$clickhouseclient"; echo ;;
*) echo "$0: ignoring $f" ;;
esac
echo
done
if ! kill -s TERM "$pid" || ! wait "$pid"; then
echo >&2 'Finishing of ClickHouse init process failed.'
exit 1
fi
fi
# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments
if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
exec $gosu /usr/bin/clickhouse-server --config-file="${CLICKHOUSE_CONFIG}" "$@"
fi
# Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image
exec "$@"

71
docker/server/entrypoint.sh Normal file → Executable file
View File

@ -1,7 +1,10 @@
#!/bin/bash
set -eo pipefail
shopt -s nullglob
DO_CHOWN=1
if [ "$CLICKHOUSE_DO_NOT_CHOWN" = 1 ]; then
if [ "${CLICKHOUSE_DO_NOT_CHOWN:-0}" = "1" ]; then
DO_CHOWN=0
fi
@ -9,10 +12,17 @@ CLICKHOUSE_UID="${CLICKHOUSE_UID:-"$(id -u clickhouse)"}"
CLICKHOUSE_GID="${CLICKHOUSE_GID:-"$(id -g clickhouse)"}"
# support --user
if [ x"$UID" == x0 ]; then
if [ "$(id -u)" = "0" ]; then
USER=$CLICKHOUSE_UID
GROUP=$CLICKHOUSE_GID
gosu="gosu $USER:$GROUP"
if command -v gosu &> /dev/null; then
gosu="gosu $USER:$GROUP"
elif command -v su-exec &> /dev/null; then
gosu="su-exec $USER:$GROUP"
else
echo "No gosu/su-exec detected!"
exit 1
fi
else
USER="$(id -u)"
GROUP="$(id -g)"
@ -23,18 +33,23 @@ fi
# set some vars
CLICKHOUSE_CONFIG="${CLICKHOUSE_CONFIG:-/etc/clickhouse-server/config.xml}"
if ! $gosu test -f "$CLICKHOUSE_CONFIG" -a -r "$CLICKHOUSE_CONFIG"; then
echo "Configuration file '$dir' isn't readable by user with id '$USER'"
exit 1
fi
# port is needed to check if clickhouse-server is ready for connections
HTTP_PORT="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=http_port)"
HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port)"
# get CH directories locations
DATA_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=path || true)"
TMP_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=tmp_path || true)"
USER_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=user_files_path || true)"
LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.log || true)"
LOG_DIR="$(dirname $LOG_PATH || true)"
ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.errorlog || true)"
ERROR_LOG_DIR="$(dirname $ERROR_LOG_PATH || true)"
FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=format_schema_path || true)"
DATA_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=path || true)"
TMP_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=tmp_path || true)"
USER_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=user_files_path || true)"
LOG_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=logger.log || true)"
LOG_DIR="$(dirname "$LOG_PATH" || true)"
ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=logger.errorlog || true)"
ERROR_LOG_DIR="$(dirname "$ERROR_LOG_PATH" || true)"
FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=format_schema_path || true)"
CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}"
@ -58,8 +73,8 @@ do
if [ "$DO_CHOWN" = "1" ]; then
# ensure proper directories permissions
chown -R "$USER:$GROUP" "$dir"
elif [ "$(stat -c %u "$dir")" != "$USER" ]; then
echo "Necessary directory '$dir' isn't owned by user with id '$USER'"
elif ! $gosu test -d "$dir" -a -w "$dir" -a -r "$dir"; then
echo "Necessary directory '$dir' isn't accessible by user with id '$USER'"
exit 1
fi
done
@ -90,21 +105,22 @@ fi
if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
# Listen only on localhost until the initialization is done
$gosu /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG -- --listen_host=127.0.0.1 &
$gosu /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" -- --listen_host=127.0.0.1 &
pid="$!"
# check if clickhouse is ready to accept connections
# will try to send ping clickhouse via http_port (max 12 retries by default, with 1 sec delay)
if ! wget --spider --quiet --prefer-family=IPv6 --tries="${CLICKHOUSE_INIT_TIMEOUT:-12}" --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then
echo >&2 'ClickHouse init process failed.'
exit 1
fi
# will try to send ping clickhouse via http_port (max 12 retries by default, with 1 sec timeout and 1 sec delay between retries)
tries=${CLICKHOUSE_INIT_TIMEOUT:-12}
while ! wget --spider -T 1 -q "http://127.0.0.1:$HTTP_PORT/ping" 2>/dev/null; do
if [ "$tries" -le "0" ]; then
echo >&2 'ClickHouse init process failed.'
exit 1
fi
tries=$(( tries-1 ))
sleep 1
done
if [ ! -z "$CLICKHOUSE_PASSWORD" ]; then
printf -v WITH_PASSWORD '%s %q' "--password" "$CLICKHOUSE_PASSWORD"
fi
clickhouseclient=( clickhouse-client --multiquery -u $CLICKHOUSE_USER $WITH_PASSWORD )
clickhouseclient=( clickhouse-client --multiquery -u "$CLICKHOUSE_USER" --password "$CLICKHOUSE_PASSWORD" )
echo
@ -122,10 +138,11 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
"$f"
else
echo "$0: sourcing $f"
# shellcheck source=/dev/null
. "$f"
fi
;;
*.sql) echo "$0: running $f"; cat "$f" | "${clickhouseclient[@]}" ; echo ;;
*.sql) echo "$0: running $f"; "${clickhouseclient[@]}" < "$f" ; echo ;;
*.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "${clickhouseclient[@]}"; echo ;;
*) echo "$0: ignoring $f" ;;
esac
@ -140,7 +157,7 @@ fi
# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments
if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
exec $gosu /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG "$@"
exec $gosu /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" "$@"
fi
# Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image

View File

@ -329,6 +329,7 @@ function run_tests
# nc - command not found
01601_proxy_protocol
01622_defaults_for_url_engine
)
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"

View File

@ -30,3 +30,4 @@ RUN curl 'https://cdn.mysql.com//Downloads/Connector-ODBC/8.0/mysql-connector-od
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

View File

@ -66,3 +66,6 @@ function run_tests()
export -f run_tests
timeout "$MAX_RUN_TIME" bash -c run_tests ||:
tar -chf /test_output/text_log_dump.tar /var/lib/clickhouse/data/system/text_log ||:
tar -chf /test_output/query_log_dump.tar /var/lib/clickhouse/data/system/query_log ||:

View File

@ -86,3 +86,4 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
COPY run.sh /
CMD ["/bin/bash", "/run.sh"]

View File

@ -7,3 +7,4 @@ RUN apt-get install gdb
CMD service zookeeper start && sleep 7 && /usr/share/zookeeper/bin/zkCli.sh -server localhost:2181 -create create /clickhouse_test ''; \
gdb -q -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms | tee test_output/test_result.txt

View File

@ -42,9 +42,9 @@ Also, we need to download macOS X SDK into the working tree.
``` bash
cd ClickHouse
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz'
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz'
mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
tar xJf MacOSX10.15.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
```
## Build ClickHouse {#build-clickhouse}

View File

@ -0,0 +1,67 @@
# system.distributed_ddl_queue {#system_tables-distributed_ddl_queue}
Contains information about distributed ddl queries (ON CLUSTER queries) that were executed on a cluster.
Columns:
- `entry` ([String](../../sql-reference/data-types/string.md)) - Query id.
- `host_name` ([String](../../sql-reference/data-types/string.md)) - Hostname.
- `host_address` ([String](../../sql-reference/data-types/string.md)) - IP address that the Hostname resolves to.
- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) - Host Port.
- `status` ([Enum](../../sql-reference/data-types/enum.md)) - Stats of the query.
- `cluster` ([String](../../sql-reference/data-types/string.md)) - Cluster name.
- `query` ([String](../../sql-reference/data-types/string.md)) - Query executed.
- `initiator` ([String](../../sql-reference/data-types/string.md)) - Nod that executed the query.
- `query_start_time` ([Date](../../sql-reference/data-types/date.md)) — Query start time.
- `query_finish_time` ([Date](../../sql-reference/data-types/date.md)) — Query finish time.
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — Duration of query execution in milliseconds.
- `exception_code` ([Enum](../../sql-reference/data-types/enum.md)) - Exception code from ZooKeeper.
**Example**
``` sql
SELECT *
FROM system.distributed_ddl_queue
WHERE cluster = 'test_cluster'
LIMIT 2
FORMAT Vertical
Query id: f544e72a-6641-43f1-836b-24baa1c9632a
Row 1:
──────
entry: query-0000000000
host_name: clickhouse01
host_address: 172.23.0.11
port: 9000
status: Finished
cluster: test_cluster
query: CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster
initiator: clickhouse01:9000
query_start_time: 2020-12-30 13:07:51
query_finish_time: 2020-12-30 13:07:51
query_duration_ms: 6
exception_code: ZOK
Row 2:
──────
entry: query-0000000000
host_name: clickhouse02
host_address: 172.23.0.12
port: 9000
status: Finished
cluster: test_cluster
query: CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster
initiator: clickhouse01:9000
query_start_time: 2020-12-30 13:07:51
query_finish_time: 2020-12-30 13:07:51
query_duration_ms: 6
exception_code: ZOK
2 rows in set. Elapsed: 0.025 sec.
```
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/distributed_ddl_queuedistributed_ddl_queue.md) <!--hide-->

View File

@ -55,10 +55,10 @@ In this case, ClickHouse can reload the dictionary earlier if the dictionary con
When upgrading the dictionaries, the ClickHouse server applies different logic depending on the type of [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md):
- For a text file, it checks the time of modification. If the time differs from the previously recorded time, the dictionary is updated.
- For MyISAM tables, the time of modification is checked using a `SHOW TABLE STATUS` query.
- For MySQL source, the time of modification is checked using a `SHOW TABLE STATUS` query (in case of MySQL 8 you need to disable meta-information caching in MySQL by `set global information_schema_stats_expiry=0`.
- Dictionaries from other sources are updated every time by default.
For MySQL (InnoDB), ODBC and ClickHouse sources, you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps:
For other sources (ODBC, ClickHouse, etc), you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps:
- The dictionary table must have a field that always changes when the source data is updated.
- The settings of the source must specify a query that retrieves the changing field. The ClickHouse server interprets the query result as a row, and if this row has changed relative to its previous state, the dictionary is updated. Specify the query in the `<invalidate_query>` field in the settings for the [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md).

View File

@ -583,7 +583,7 @@ Example of settings:
or
``` sql
SOURCE(MONGO(
SOURCE(MONGODB(
host 'localhost'
port 27017
user ''

View File

@ -23,6 +23,7 @@ The following actions are supported:
- [CLEAR COLUMN](#alter_clear-column) — Resets column values.
- [COMMENT COLUMN](#alter_comment-column) — Adds a text comment to the column.
- [MODIFY COLUMN](#alter_modify-column) — Changes columns type, default expression and TTL.
- [MODIFY COLUMN REMOVE](#modify-remove) — Removes one of the column properties.
These actions are described in detail below.
@ -145,6 +146,26 @@ The `ALTER` query is atomic. For MergeTree tables it is also lock-free.
The `ALTER` query for changing columns is replicated. The instructions are saved in ZooKeeper, then each replica applies them. All `ALTER` queries are run in the same order. The query waits for the appropriate actions to be completed on the other replicas. However, a query to change columns in a replicated table can be interrupted, and all actions will be performed asynchronously.
## MODIFY COLUMN REMOVE {#modify-remove}
Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC`, `COMMENT`, `TTL`.
Syntax:
```sql
ALTER TABLE table_name MODIFY column_name REMOVE property;
```
**Example**
```sql
ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
```
## See Also
- [REMOVE TTL](ttl.md).
## Limitations {#alter-query-limitations}
The `ALTER` query lets you create and delete separate elements (columns) in nested data structures, but not whole nested data structures. To add a nested data structure, you can add columns with a name like `name.nested_name` and the type `Array(T)`. A nested data structure is equivalent to multiple array columns with a name that has the same prefix before the dot.

View File

@ -286,7 +286,7 @@ ALTER TABLE mt DELETE IN PARTITION 2 WHERE p = 2;
You can specify the partition expression in `ALTER ... PARTITION` queries in different ways:
- As a value from the `partition` column of the `system.parts` table. For example, `ALTER TABLE visits DETACH PARTITION 201901`.
- As the expression from the table column. Constants and constant expressions are supported. For example, `ALTER TABLE visits DETACH PARTITION toYYYYMM(toDate('2019-01-25'))`.
- As a tuple of expressions or constants that matches (in types) the table partitioning keys tuple. In the case of a single element partitioning key, the expression should be wrapped in the `tuple (...)` function. For example, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`.
- Using the partition ID. Partition ID is a string identifier of the partition (human-readable, if possible) that is used as the names of partitions in the file system and in ZooKeeper. The partition ID must be specified in the `PARTITION ID` clause, in a single quotes. For example, `ALTER TABLE visits DETACH PARTITION ID '201901'`.
- In the [ALTER ATTACH PART](#alter_attach-partition) and [DROP DETACHED PART](#alter_drop-detached) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached_parts](../../../operations/system-tables/detached_parts.md#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`.

View File

@ -3,10 +3,83 @@ toc_priority: 44
toc_title: TTL
---
### Manipulations with Table TTL {#manipulations-with-table-ttl}
# Manipulations with Table TTL {#manipulations-with-table-ttl}
## MODIFY TTL {#modify-ttl}
You can change [table TTL](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) with a request of the following form:
``` sql
ALTER TABLE table-name MODIFY TTL ttl-expression
ALTER TABLE table_name MODIFY TTL ttl_expression;
```
## REMOVE TTL {#remove-ttl}
TTL-property can be removed from table with the following query:
```sql
ALTER TABLE table_name REMOVE TTL
```
**Example**
Consider the table with table `TTL`:
```sql
CREATE TABLE table_with_ttl
(
event_time DateTime,
UserID UInt64,
Comment String
)
ENGINE MergeTree()
ORDER BY tuple()
TTL event_time + INTERVAL 3 MONTH;
SETTINGS min_bytes_for_wide_part = 0;
INSERT INTO table_with_ttl VALUES (now(), 1, 'username1');
INSERT INTO table_with_ttl VALUES (now() - INTERVAL 4 MONTH, 2, 'username2');
```
Run `OPTIMIZE` to force `TTL` cleanup:
```sql
OPTIMIZE TABLE table_with_ttl FINAL;
SELECT * FROM table_with_ttl FORMAT PrettyCompact;
```
Second row was deleted from table.
```text
┌─────────event_time────┬──UserID─┬─────Comment──┐
│ 2020-12-11 12:44:57 │ 1 │ username1 │
└───────────────────────┴─────────┴──────────────┘
```
Now remove table `TTL` with the following query:
```sql
ALTER TABLE table_with_ttl REMOVE TTL;
```
Re-insert the deleted row and force the `TTL` cleanup again with `OPTIMIZE`:
```sql
INSERT INTO table_with_ttl VALUES (now() - INTERVAL 4 MONTH, 2, 'username2');
OPTIMIZE TABLE table_with_ttl FINAL;
SELECT * FROM table_with_ttl FORMAT PrettyCompact;
```
The `TTL` is no longer there, so the second row is not deleted:
```text
┌─────────event_time────┬──UserID─┬─────Comment──┐
│ 2020-12-11 12:44:57 │ 1 │ username1 │
│ 2020-08-11 12:44:57 │ 2 │ username2 │
└───────────────────────┴─────────┴──────────────┘
```
### See Also
- More about the [TTL-expression](../../../sql-reference/statements/create/table#ttl-expression).
- Modify column [with TTL](../../../sql-reference/statements/alter/column#alter_modify-column).

View File

@ -13,9 +13,7 @@ Basic query format:
INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
```
You can specify a list of columns to insert using the `(c1, c2, c3)` or `COLUMNS(c1,c2,c3)` syntax.
Instead of listing all the required columns you can use the `(* EXCEPT(column_list))` syntax.
You can specify a list of columns to insert using the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier).
For example, consider the table:
@ -23,9 +21,8 @@ For example, consider the table:
SHOW CREATE insert_select_testtable;
```
```
┌─statement────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE insert_select_testtable
```text
CREATE TABLE insert_select_testtable
(
`a` Int8,
`b` String,
@ -33,8 +30,7 @@ SHOW CREATE insert_select_testtable;
)
ENGINE = MergeTree()
ORDER BY a
SETTINGS index_granularity = 8192 │
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
SETTINGS index_granularity = 8192
```
``` sql

View File

@ -0,0 +1,21 @@
---
toc_title: ALL
---
# ALL Clause {#select-all}
`SELECT ALL` is identical to `SELECT` without `DISTINCT`.
- If `ALL` specified, ignore it.
- If both `ALL` and `DISTINCT` specified, exception will be thrown.
`ALL` can also be specified inside aggregate function with the same effect(noop), for instance:
```sql
SELECT sum(ALL number) FROM numbers(10);
```
equals to
```sql
SELECT sum(number) FROM numbers(10);
```

View File

@ -18,10 +18,6 @@ It is possible to obtain the same result by applying [GROUP BY](../../../sql-ref
- When [ORDER BY](../../../sql-reference/statements/select/order-by.md) is omitted and [LIMIT](../../../sql-reference/statements/select/limit.md) is defined, the query stops running immediately after the required number of different rows has been read.
- Data blocks are output as they are processed, without waiting for the entire query to finish running.
## Limitations {#limitations}
`DISTINCT` is not supported if `SELECT` has at least one array column.
## Examples {#examples}
ClickHouse supports using the `DISTINCT` and `ORDER BY` clauses for different columns in one query. The `DISTINCT` clause is executed before the `ORDER BY` clause.

View File

@ -44,9 +44,9 @@ Además, necesitamos descargar macOS X SDK en el árbol de trabajo.
``` bash
cd ClickHouse
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz'
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz'
mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
tar xJf MacOSX10.15.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
```
# Construir ClickHouse {#build-clickhouse}

View File

@ -44,9 +44,9 @@ En outre, nous devons télécharger macOS X SDK dans l'arbre de travail.
``` bash
cd ClickHouse
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz'
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz'
mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
tar xJf MacOSX10.15.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
```
# Construire ClickHouse {#build-clickhouse}

View File

@ -45,9 +45,9 @@ make install
``` bash
cd ClickHouse
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz'
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz'
mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
tar xJf MacOSX10.15.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
```
# ビルドClickHouse {#build-clickhouse}

View File

@ -133,7 +133,7 @@ ClickHouse имеет сильную типизацию, поэтому нет
## Агрегатные функции {#aggregate-functions}
Агрегатные функции - это функции с состоянием (stateful). Они накапливают переданные значения в некотором состоянии и позволяют получать результаты из этого состояния. Работа с ними осуществляется с помощью интерфейса `IAggregateFunction`. Состояния могут быть как простыми (состояние для `AggregateFunctionCount` это всего лишь один человек `UInt64` значение) так и довольно сложными (состояние `AggregateFunctionUniqCombined` представляет собой комбинацию линейного массива, хэш-таблицы и вероятностной структуры данных `HyperLogLog`).
Агрегатные функции - это функции с состоянием (stateful). Они накапливают переданные значения в некотором состоянии и позволяют получать результаты из этого состояния. Работа с ними осуществляется с помощью интерфейса `IAggregateFunction`. Состояния могут быть как простыми (состояние для `AggregateFunctionCount` это всего лишь одна переменная типа `UInt64`) так и довольно сложными (состояние `AggregateFunctionUniqCombined` представляет собой комбинацию линейного массива, хэш-таблицы и вероятностной структуры данных `HyperLogLog`).
Состояния распределяются в `Arena` (пул памяти) для работы с несколькими состояниями при выполнении запроса `GROUP BY` высокой кардинальности (большим числом уникальных данных). Состояния могут иметь нетривиальный конструктор и деструктор: например, сложные агрегатные состояния могут сами аллоцировать дополнительную память. Потому к созданию и уничтожению состояний, правильной передаче владения и порядку уничтожения следует уделять больше внимание.

View File

@ -54,10 +54,10 @@ LIFETIME(MIN 300 MAX 360)
При обновлении словарей сервер ClickHouse применяет различную логику в зависимости от типа [источника](external-dicts-dict-sources.md):
> - У текстового файла проверяется время модификации. Если время изменилось по отношению к запомненному ранее, то словарь обновляется.
> - Для таблиц типа MyISAM, время модификации проверяется запросом `SHOW TABLE STATUS`.
> - Для MySQL источника, время модификации проверяется запросом `SHOW TABLE STATUS` (для MySQL 8 необходимо отключить кеширование мета-информации в MySQL `set global information_schema_stats_expiry=0`.
> - Словари из других источников по умолчанию обновляются каждый раз.
Для источников MySQL (InnoDB), ODBC и ClickHouse можно настроить запрос, который позволит обновлять словари только в случае их фактического изменения, а не каждый раз. Чтобы это сделать необходимо выполнить следующие условия/действия:
Для других источников (ODBC, ClickHouse и т.д.) можно настроить запрос, который позволит обновлять словари только в случае их фактического изменения, а не каждый раз. Чтобы это сделать необходимо выполнить следующие условия/действия:
> - В таблице словаря должно быть поле, которое гарантированно изменяется при обновлении данных в источнике.
> - В настройках источника указывается запрос, который получает изменяющееся поле. Результат запроса сервер ClickHouse интерпретирует как строку и если эта строка изменилась по отношению к предыдущему состоянию, то словарь обновляется. Запрос следует указывать в поле `<invalidate_query>` настроек [источника](external-dicts-dict-sources.md).

View File

@ -12,6 +12,7 @@ toc_title: "\u041c\u0430\u043d\u0438\u043f\u0443\u043b\u044f\u0446\u0438\u0438\u
- [CLEAR COLUMN](#alter_clear-column) — сбрасывает все значения в столбце для заданной партиции;
- [COMMENT COLUMN](#alter_comment-column) — добавляет комментарий к столбцу;
- [MODIFY COLUMN](#alter_modify-column) — изменяет тип столбца, выражение для значения по умолчанию и TTL.
- [MODIFY COLUMN REMOVE](#modify-remove) — удаляет какое-либо из свойств столбца.
Подробное описание для каждого действия приведено ниже.
@ -135,6 +136,28 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)
Запрос `ALTER` на изменение столбцов реплицируется. Соответствующие инструкции сохраняются в ZooKeeper, и затем каждая реплика их применяет. Все запросы `ALTER` выполняются в одном и том же порядке. Запрос ждёт выполнения соответствующих действий на всех репликах. Но при этом, запрос на изменение столбцов в реплицируемой таблице можно прервать, и все действия будут осуществлены асинхронно.
## MODIFY COLUMN REMOVE {#modify-remove}
Удаляет какое-либо из свойств столбца: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC`, `COMMENT`, `TTL`.
Синтаксис:
```sql
ALTER TABLE table_name MODIFY column_name REMOVE property;
```
**Пример**
Удаление свойства TTL:
```sql
ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
```
## Смотрите также
- [REMOVE TTL](ttl.md).
## Ограничения запроса ALTER {#ogranicheniia-zaprosa-alter}
Запрос `ALTER` позволяет создавать и удалять отдельные элементы (столбцы) вложенных структур данных, но не вложенные структуры данных целиком. Для добавления вложенной структуры данных, вы можете добавить столбцы с именем вида `name.nested_name` и типом `Array(T)` - вложенная структура данных полностью эквивалентна нескольким столбцам-массивам с именем, имеющим одинаковый префикс до точки.

View File

@ -288,7 +288,7 @@ ALTER TABLE mt DELETE IN PARTITION 2 WHERE p = 2;
Чтобы задать нужную партицию в запросах `ALTER ... PARTITION`, можно использовать:
- Имя партиции. Посмотреть имя партиции можно в столбце `partition` системной таблицы [system.parts](../../../operations/system-tables/parts.md#system_tables-parts). Например, `ALTER TABLE visits DETACH PARTITION 201901`.
- Произвольное выражение из столбцов исходной таблицы. Также поддерживаются константы и константные выражения. Например, `ALTER TABLE visits DETACH PARTITION toYYYYMM(toDate('2019-01-25'))`.
- Кортеж из выражений или констант, совпадающий (в типах) с кортежем партиционирования. В случае ключа партиционирования из одного элемента, выражение следует обернуть в функцию `tuple(...)`. Например, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`.
- Строковый идентификатор партиции. Идентификатор партиции используется для именования кусков партиции на файловой системе и в ZooKeeper. В запросах `ALTER` идентификатор партиции нужно указывать в секции `PARTITION ID`, в одинарных кавычках. Например, `ALTER TABLE visits DETACH PARTITION ID '201901'`.
- Для запросов [ATTACH PART](#alter_attach-partition) и [DROP DETACHED PART](#alter_drop-detached): чтобы задать имя куска партиции, используйте строковой литерал со значением из столбца `name` системной таблицы [system.detached_parts](../../../operations/system-tables/detached_parts.md#system_tables-detached_parts). Например, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`.
@ -306,4 +306,4 @@ OPTIMIZE TABLE table_not_partitioned PARTITION tuple() FINAL;
Примеры запросов `ALTER ... PARTITION` можно посмотреть в тестах: [`00502_custom_partitioning_local`](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00502_custom_partitioning_local.sql) и [`00502_custom_partitioning_replicated_zookeeper`](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper.sql).
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/partition/) <!--hide-->
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/partition/) <!--hide-->

View File

@ -5,10 +5,82 @@ toc_title: TTL
# Манипуляции с TTL таблицы {#manipuliatsii-s-ttl-tablitsy}
## MODIFY TTL {#modify-ttl}
Вы можете изменить [TTL для таблицы](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-column-ttl) запросом следующего вида:
``` sql
ALTER TABLE table-name MODIFY TTL ttl-expression
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/ttl/) <!--hide-->
## REMOVE TTL {#remove-ttl}
Удалить табличный TTL можно запросом следующего вида:
```sql
ALTER TABLE table_name REMOVE TTL
```
**Пример**
Создадим таблицу с табличным `TTL` и заполним её данными:
```sql
CREATE TABLE table_with_ttl
(
event_time DateTime,
UserID UInt64,
Comment String
)
ENGINE MergeTree()
ORDER BY tuple()
TTL event_time + INTERVAL 3 MONTH;
SETTINGS min_bytes_for_wide_part = 0;
INSERT INTO table_with_ttl VALUES (now(), 1, 'username1');
INSERT INTO table_with_ttl VALUES (now() - INTERVAL 4 MONTH, 2, 'username2');
```
Выполним `OPTIMIZE` для принудительной очистки по `TTL`:
```sql
OPTIMIZE TABLE table_with_ttl FINAL;
SELECT * FROM table_with_ttl;
```
В результате видно, что вторая строка удалена.
```text
┌─────────event_time────┬──UserID─┬─────Comment──┐
│ 2020-12-11 12:44:57 │ 1 │ username1 │
└───────────────────────┴─────────┴──────────────┘
```
Удаляем табличный `TTL`:
```sql
ALTER TABLE table_with_ttl REMOVE TTL;
```
Заново вставляем удаленную строку и снова принудительно запускаем очистку по `TTL` с помощью `OPTIMIZE`:
```sql
INSERT INTO table_with_ttl VALUES (now() - INTERVAL 4 MONTH, 2, 'username2');
OPTIMIZE TABLE table_with_ttl FINAL;
SELECT * FROM table_with_ttl;
```
`TTL` больше нет, поэтому данные не удаляются:
```text
┌─────────event_time────┬──UserID─┬─────Comment──┐
│ 2020-12-11 12:44:57 │ 1 │ username1 │
│ 2020-08-11 12:44:57 │ 2 │ username2 │
└───────────────────────┴─────────┴──────────────┘
```
### Смотрите также
- Подробнее о [свойстве TTL](../../../engines/table-engines/mergetree-family/mergetree#table_engine-mergetree-ttl).
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/ttl/) <!--hide-->

View File

@ -13,9 +13,7 @@ toc_title: INSERT INTO
INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
```
Вы можете указать список столбцов для вставки, используя следующий синтаксис: `(c1, c2, c3)` или `COLUMNS(c1,c2,c3)`.
Можно не перечислять все необходимые столбцы, а использовать синтаксис `(* EXCEPT(column_list))`.
Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как `APPLY`, `EXCEPT`, `REPLACE`.
В качестве примера рассмотрим таблицу:

View File

@ -18,10 +18,6 @@ toc_title: DISTINCT
- Когда секция [ORDER BY](order-by.md) опущена, а секция [LIMIT](limit.md) присутствует, запрос прекращает выполнение сразу после считывания необходимого количества различных строк.
- Блоки данных выводятся по мере их обработки, не дожидаясь завершения выполнения всего запроса.
## Ограничения {#limitations}
`DISTINCT` не поддерживается, если `SELECT` имеет по крайней мере один столбец-массив.
## Примеры {#examples}
ClickHouse поддерживает использование секций `DISTINCT` и `ORDER BY` для разных столбцов в одном запросе. Секция `DISTINCT` выполняется до секции `ORDER BY`.

View File

@ -33,8 +33,8 @@ cd cctools-port/cctools
make install
cd ${CCTOOLS}
wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz
tar xJf MacOSX10.14.sdk.tar.xz
wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz
tar xJf MacOSX10.15.sdk.tar.xz
```
# 编译 ClickHouse {#bian-yi-clickhouse}
@ -46,7 +46,7 @@ CC=clang-8 CXX=clang++-8 cmake . -Bbuild-osx -DCMAKE_SYSTEM_NAME=Darwin \
-DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar \
-DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib \
-DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld \
-DSDK_PATH=${CCTOOLS}/MacOSX10.14.sdk
-DSDK_PATH=${CCTOOLS}/MacOSX10.15.sdk
ninja -C build-osx
```

View File

@ -29,7 +29,7 @@ SELECT 1 - 0.9
- 当一行行阅读浮点数的时候,浮点数的结果可能不是机器最近显示的数值。
## 和Inf {#data_type-float-nan-inf}
## NaN和Inf {#data_type-float-nan-inf}
与标准SQL相比ClickHouse 支持以下类别的浮点数:

View File

@ -287,7 +287,7 @@
</div>
<div id="run_div">
<button class="shadow" id="run">Run</button>
<span class="hint">&nbsp;(Ctrl+Enter)</span>
<span class="hint">&nbsp;(Ctrl/Cmd+Enter)</span>
<span id="hourglass"></span>
<span id="check-mark"></span>
<span id="stats"></span>
@ -424,10 +424,10 @@
post();
}
document.onkeypress = function(event)
document.onkeydown = function(event)
{
/// Firefox has code 13 for Enter and Chromium has code 10.
if (event.ctrlKey && (event.charCode == 13 || event.charCode == 10)) {
if ((event.metaKey || event.ctrlKey) && (event.keyCode == 13 || event.keyCode == 10)) {
post();
}
}

View File

@ -112,7 +112,6 @@ class GroupArrayNumericImpl final
{
using Data = GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>;
static constexpr bool limit_num_elems = Trait::has_limit;
DataTypePtr & data_type;
UInt64 max_elems;
UInt64 seed;
@ -121,7 +120,6 @@ public:
const DataTypePtr & data_type_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max(), UInt64 seed_ = 123456)
: IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>(
{data_type_}, {})
, data_type(this->argument_types[0])
, max_elems(max_elems_)
, seed(seed_)
{
@ -129,7 +127,7 @@ public:
String getName() const override { return getNameByTrait<Trait>(); }
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(data_type); }
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(this->argument_types[0]); }
void insert(Data & a, const T & v, Arena * arena) const
{

View File

@ -168,7 +168,7 @@ public:
{
for (const auto & x : small)
{
if (rb->contains(static_cast<Value>(x.getValue())))
if (r1.rb->contains(static_cast<Value>(x.getValue())))
buffer.push_back(x.getValue());
}
@ -264,7 +264,7 @@ public:
{
for (const auto & x : small)
{
if (rb->contains(static_cast<Value>(x.getValue())))
if (r1.rb->contains(static_cast<Value>(x.getValue())))
++ret;
}
}
@ -419,7 +419,7 @@ public:
if (isSmall())
return small.find(x) != small.end();
else
return rb->contains(x);
return rb->contains(static_cast<Value>(x));
}
/**
@ -613,7 +613,7 @@ public:
/**
* Replace value
*/
void rb_replace(const UInt32 * from_vals, const UInt32 * to_vals, size_t num)
void rb_replace(const UInt64 * from_vals, const UInt64 * to_vals, size_t num)
{
if (isSmall())
toLarge();
@ -622,9 +622,9 @@ public:
{
if (from_vals[i] == to_vals[i])
continue;
bool changed = rb->removeChecked(from_vals[i]);
bool changed = rb->removeChecked(static_cast<Value>(from_vals[i]));
if (changed)
rb->add(to_vals[i]);
rb->add(static_cast<Value>(to_vals[i]));
}
}
};

View File

@ -56,7 +56,7 @@ public:
DataTypePtr getReturnType() const override
{
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeNumber<T>>());
return std::make_shared<DataTypeArray>(this->argument_types[0]);
}
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override

View File

@ -670,4 +670,32 @@ ColumnAggregateFunction::ColumnAggregateFunction(const ColumnAggregateFunction &
{
}
MutableColumnPtr ColumnAggregateFunction::cloneResized(size_t size) const
{
if (size == 0)
return cloneEmpty();
size_t from_size = data.size();
if (size <= from_size)
{
auto res = createView();
auto & res_data = res->data;
res_data.assign(data.begin(), data.begin() + size);
return res;
}
else
{
/// Create a new column to return.
MutableColumnPtr cloned_col = cloneEmpty();
auto * res = typeid_cast<ColumnAggregateFunction *>(cloned_col.get());
res->insertRangeFrom(*this, 0, from_size);
for (size_t i = from_size; i < size; ++i)
res->insertDefault();
return cloned_col;
}
}
}

View File

@ -215,7 +215,7 @@ public:
void getExtremes(Field & min, Field & max) const override;
bool structureEquals(const IColumn &) const override;
MutableColumnPtr cloneResized(size_t size) const override;
};
}

View File

@ -12,6 +12,10 @@
#include <random>
#include <cstdlib>
#ifdef MEMORY_TRACKER_DEBUG_CHECKS
thread_local bool _memory_tracker_always_throw_logical_error_on_allocation = false;
#endif
namespace
{
@ -165,6 +169,14 @@ void MemoryTracker::alloc(Int64 size)
}
}
#ifdef MEMORY_TRACKER_DEBUG_CHECKS
if (unlikely(_memory_tracker_always_throw_logical_error_on_allocation))
{
_memory_tracker_always_throw_logical_error_on_allocation = false;
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Memory tracker: allocations not allowed.");
}
#endif
std::bernoulli_distribution fault(fault_probability);
if (unlikely(fault_probability && fault(thread_local_rng)) && memoryTrackerCanThrow(level, true))
{

View File

@ -5,6 +5,28 @@
#include <Common/CurrentMetrics.h>
#include <Common/VariableContext.h>
#if !defined(NDEBUG)
#define MEMORY_TRACKER_DEBUG_CHECKS
#endif
/// DENY_ALLOCATIONS_IN_SCOPE macro makes MemoryTracker throw LOGICAL_ERROR on any allocation attempt
/// until the end of the scope. It's useful to ensure that no allocations happen in signal handlers and
/// outside of try/catch block of thread functions. ALLOW_ALLOCATIONS_IN_SCOPE cancels effect of
/// DENY_ALLOCATIONS_IN_SCOPE in the inner scope. In Release builds these macros do nothing.
#ifdef MEMORY_TRACKER_DEBUG_CHECKS
#include <ext/scope_guard.h>
extern thread_local bool _memory_tracker_always_throw_logical_error_on_allocation;
#define ALLOCATIONS_IN_SCOPE_IMPL_CONCAT(n, val) \
bool _allocations_flag_prev_val##n = _memory_tracker_always_throw_logical_error_on_allocation; \
_memory_tracker_always_throw_logical_error_on_allocation = val; \
SCOPE_EXIT({ _memory_tracker_always_throw_logical_error_on_allocation = _allocations_flag_prev_val##n; })
#define ALLOCATIONS_IN_SCOPE_IMPL(n, val) ALLOCATIONS_IN_SCOPE_IMPL_CONCAT(n, val)
#define DENY_ALLOCATIONS_IN_SCOPE ALLOCATIONS_IN_SCOPE_IMPL(__LINE__, true)
#define ALLOW_ALLOCATIONS_IN_SCOPE ALLOCATIONS_IN_SCOPE_IMPL(__LINE__, false)
#else
#define DENY_ALLOCATIONS_IN_SCOPE static_assert(true)
#define ALLOW_ALLOCATIONS_IN_SCOPE static_assert(true)
#endif
/** Tracks memory consumption.
* It throws an exception if amount of consumed memory become greater than certain limit.

View File

@ -181,6 +181,7 @@ QueryProfilerReal::QueryProfilerReal(const UInt64 thread_id, const UInt32 period
void QueryProfilerReal::signalHandler(int sig, siginfo_t * info, void * context)
{
DENY_ALLOCATIONS_IN_SCOPE;
writeTraceInfo(TraceType::Real, sig, info, context);
}
@ -190,6 +191,7 @@ QueryProfilerCpu::QueryProfilerCpu(const UInt64 thread_id, const UInt32 period)
void QueryProfilerCpu::signalHandler(int sig, siginfo_t * info, void * context)
{
DENY_ALLOCATIONS_IN_SCOPE;
writeTraceInfo(TraceType::CPU, sig, info, context);
}

View File

@ -197,6 +197,7 @@ static void injection(
void ThreadFuzzer::signalHandler(int)
{
DENY_ALLOCATIONS_IN_SCOPE;
auto saved_errno = errno;
auto & fuzzer = ThreadFuzzer::instance();

View File

@ -208,6 +208,7 @@ size_t ThreadPoolImpl<Thread>::active() const
template <typename Thread>
void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_it)
{
DENY_ALLOCATIONS_IN_SCOPE;
CurrentMetrics::Increment metric_all_threads(
std::is_same_v<Thread, std::thread> ? CurrentMetrics::GlobalThread : CurrentMetrics::LocalThread);
@ -223,7 +224,9 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
if (!jobs.empty())
{
job = std::move(jobs.top().job);
/// std::priority_queue does not provide interface for getting non-const reference to an element
/// to prevent us from modifying its priority. We have to use const_cast to force move semantics on JobWithPriority::job.
job = std::move(const_cast<Job &>(jobs.top().job));
jobs.pop();
}
else
@ -237,6 +240,7 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
{
try
{
ALLOW_ALLOCATIONS_IN_SCOPE;
CurrentMetrics::Increment metric_active_threads(
std::is_same_v<Thread, std::thread> ? CurrentMetrics::GlobalThreadActive : CurrentMetrics::LocalThreadActive);

View File

@ -65,6 +65,7 @@ void ZooKeeper::init(const std::string & implementation_, const std::string & ho
Coordination::ZooKeeper::Nodes nodes;
nodes.reserve(hosts_strings.size());
bool dns_error = false;
for (auto & host_string : hosts_strings)
{
try
@ -76,14 +77,27 @@ void ZooKeeper::init(const std::string & implementation_, const std::string & ho
nodes.emplace_back(Coordination::ZooKeeper::Node{Poco::Net::SocketAddress{host_string}, secure});
}
catch (const Poco::Net::HostNotFoundException & e)
{
/// Most likely it's misconfiguration and wrong hostname was specified
LOG_ERROR(log, "Cannot use ZooKeeper host {}, reason: {}", host_string, e.displayText());
}
catch (const Poco::Net::DNSException & e)
{
LOG_ERROR(log, "Cannot use ZooKeeper host {}, reason: {}", host_string, e.displayText());
/// Most likely DNS is not available now
dns_error = true;
LOG_ERROR(log, "Cannot use ZooKeeper host {} due to DNS error: {}", host_string, e.displayText());
}
}
if (nodes.empty())
throw KeeperException("Cannot use any of provided ZooKeeper nodes", Coordination::Error::ZBADARGUMENTS);
{
/// For DNS errors we throw exception with ZCONNECTIONLOSS code, so it will be considered as hardware error, not user error
if (dns_error)
throw KeeperException("Cannot resolve any of provided ZooKeeper hosts due to DNS error", Coordination::Error::ZCONNECTIONLOSS);
else
throw KeeperException("Cannot use any of provided ZooKeeper nodes", Coordination::Error::ZBADARGUMENTS);
}
impl = std::make_unique<Coordination::ZooKeeper>(
nodes,

View File

@ -38,7 +38,15 @@ UInt32 CompressionCodecZSTD::getMaxCompressedDataSize(UInt32 uncompressed_size)
UInt32 CompressionCodecZSTD::doCompressData(const char * source, UInt32 source_size, char * dest) const
{
size_t compressed_size = ZSTD_compress(dest, ZSTD_compressBound(source_size), source, source_size, level);
ZSTD_CCtx * cctx = ZSTD_createCCtx();
ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level);
if (enable_long_range)
{
ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, window_log); // NB zero window_log means "use default" for libzstd
}
size_t compressed_size = ZSTD_compress2(cctx, dest, ZSTD_compressBound(source_size), source, source_size);
ZSTD_freeCCtx(cctx);
if (ZSTD_isError(compressed_size))
throw Exception("Cannot compress block with ZSTD: " + std::string(ZSTD_getErrorName(compressed_size)), ErrorCodes::CANNOT_COMPRESS);
@ -55,8 +63,13 @@ void CompressionCodecZSTD::doDecompressData(const char * source, UInt32 source_s
throw Exception("Cannot ZSTD_decompress: " + std::string(ZSTD_getErrorName(res)), ErrorCodes::CANNOT_DECOMPRESS);
}
CompressionCodecZSTD::CompressionCodecZSTD(int level_)
: level(level_)
CompressionCodecZSTD::CompressionCodecZSTD(int level_, int window_log_) : level(level_), enable_long_range(true), window_log(window_log_)
{
setCodecDescription(
"ZSTD", {std::make_shared<ASTLiteral>(static_cast<UInt64>(level)), std::make_shared<ASTLiteral>(static_cast<UInt64>(window_log))});
}
CompressionCodecZSTD::CompressionCodecZSTD(int level_) : level(level_), enable_long_range(false), window_log(0)
{
setCodecDescription("ZSTD", {std::make_shared<ASTLiteral>(static_cast<UInt64>(level))});
}
@ -64,13 +77,14 @@ CompressionCodecZSTD::CompressionCodecZSTD(int level_)
void registerCodecZSTD(CompressionCodecFactory & factory)
{
UInt8 method_code = UInt8(CompressionMethodByte::ZSTD);
factory.registerCompressionCodec("ZSTD", method_code, [&](const ASTPtr & arguments) -> CompressionCodecPtr
{
factory.registerCompressionCodec("ZSTD", method_code, [&](const ASTPtr & arguments) -> CompressionCodecPtr {
int level = CompressionCodecZSTD::ZSTD_DEFAULT_LEVEL;
if (arguments && !arguments->children.empty())
{
if (arguments->children.size() > 1)
throw Exception("ZSTD codec must have 1 parameter, given " + std::to_string(arguments->children.size()), ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE);
if (arguments->children.size() > 2)
throw Exception(
"ZSTD codec must have 1 or 2 parameters, given " + std::to_string(arguments->children.size()),
ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE);
const auto children = arguments->children;
const auto * literal = children[0]->as<ASTLiteral>();
@ -79,9 +93,32 @@ void registerCodecZSTD(CompressionCodecFactory & factory)
level = literal->value.safeGet<UInt64>();
if (level > ZSTD_maxCLevel())
throw Exception("ZSTD codec can't have level more that " + toString(ZSTD_maxCLevel()) + ", given " + toString(level), ErrorCodes::ILLEGAL_CODEC_PARAMETER);
}
throw Exception(
"ZSTD codec can't have level more than " + toString(ZSTD_maxCLevel()) + ", given " + toString(level),
ErrorCodes::ILLEGAL_CODEC_PARAMETER);
if (arguments->children.size() > 1)
{
const auto * window_literal = children[1]->as<ASTLiteral>();
if (!window_literal)
throw Exception("ZSTD codec second argument must be integer", ErrorCodes::ILLEGAL_CODEC_PARAMETER);
const int window_log = window_literal->value.safeGet<UInt64>();
ZSTD_bounds window_log_bounds = ZSTD_cParam_getBounds(ZSTD_c_windowLog);
if (ZSTD_isError(window_log_bounds.error))
throw Exception(
"ZSTD windowLog parameter is not supported " + std::string(ZSTD_getErrorName(window_log_bounds.error)),
ErrorCodes::ILLEGAL_CODEC_PARAMETER);
// 0 means "use default" for libzstd
if (window_log != 0 && (window_log > window_log_bounds.upperBound || window_log < window_log_bounds.lowerBound))
throw Exception(
"ZSTD codec can't have window log more than " + toString(window_log_bounds.upperBound) + " and lower than "
+ toString(window_log_bounds.lowerBound) + ", given " + toString(window_log),
ErrorCodes::ILLEGAL_CODEC_PARAMETER);
return std::make_shared<CompressionCodecZSTD>(level, window_log);
}
}
return std::make_shared<CompressionCodecZSTD>(level);
});
}

View File

@ -12,9 +12,12 @@ class CompressionCodecZSTD : public ICompressionCodec
{
public:
static constexpr auto ZSTD_DEFAULT_LEVEL = 1;
static constexpr auto ZSTD_DEFAULT_LOG_WINDOW = 24;
CompressionCodecZSTD(int level_);
CompressionCodecZSTD(int level_, int window_log);
uint8_t getMethodByte() const override;
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
@ -32,6 +35,8 @@ protected:
private:
const int level;
const bool enable_long_range;
const int window_log;
};
}

View File

@ -371,8 +371,9 @@ class IColumn;
M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \
M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
M(Bool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \
M(Bool, optimize_respect_aliases, true, "If it is set to true, it will respect aliases in WHERE/GROUP BY/ORDER BY, that will help with partition pruning/secondary indexes/optimize_aggregation_in_order/optimize_read_in_order/optimize_trivial_count", 0) \
M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
M(Bool, optimize_move_functions_out_of_any, true, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \
M(Bool, optimize_move_functions_out_of_any, false, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \
M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \
M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \
M(Bool, optimize_duplicate_order_by_and_distinct, true, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \

View File

@ -61,9 +61,12 @@ Block ColumnGathererStream::readImpl()
MutableColumnPtr output_column = column.column->cloneEmpty();
output_block = Block{column.cloneEmpty()};
/// Surprisingly this call may directly change output_block, bypassing
/// output_column. See ColumnGathererStream::gather.
output_column->gather(*this);
if (!output_column->empty())
output_block.getByPosition(0).column = std::move(output_column);
return output_block;
}

View File

@ -6,6 +6,7 @@
#include <Columns/ColumnConst.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnsNumber.h>
#include <Interpreters/castColumn.h>
#include <DataTypes/DataTypeAggregateFunction.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypesNumber.h>
@ -14,6 +15,7 @@
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
// TODO include this last because of a broken roaring header. See the comment
// inside.
#include <AggregateFunctions/AggregateFunctionGroupBitmapData.h>
@ -282,18 +284,16 @@ public:
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
const auto * arg_type1 = typeid_cast<const DataTypeNumber<UInt32> *>(arguments[1].get());
if (!(arg_type1))
throw Exception(
"Second argument for function " + getName() + " must be UInt32 but it has type " + arguments[1]->getName() + ".",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
const auto * arg_type2 = typeid_cast<const DataTypeNumber<UInt32> *>(arguments[1].get());
if (!(arg_type2))
throw Exception(
"Third argument for function " + getName() + " must be UInt32 but it has type " + arguments[2]->getName() + ".",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
for (size_t i = 1; i < 3; ++i)
{
WhichDataType which(arguments[i].get());
if (!(which.isUInt8() || which.isUInt16() || which.isUInt32() || which.isUInt64()))
{
throw Exception(
"The second and third arguments for function " + getName() + " must be one of [UInt8, UInt16, UInt32, UInt64] but one of them has type " + arguments[1]->getName() + ".",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
}
return arguments[0];
}
@ -327,13 +327,23 @@ private:
bool is_column_const[3];
const ColumnAggregateFunction * col_agg_func;
const PaddedPODArray<AggregateDataPtr> * container0;
const PaddedPODArray<UInt32> * container1, * container2;
const PaddedPODArray<UInt64> * container1, * container2;
ColumnPtr column_holder[2];
for (size_t i = 0; i < 3; ++i)
{
column_ptrs[i] = arguments[i].column.get();
if (i > 0)
{
column_holder[i - 1] = castColumn(arguments[i], std::make_shared<DataTypeUInt64>());
column_ptrs[i] = column_holder[i-1].get();
}
else
{
column_ptrs[i] = arguments[i].column.get();
}
is_column_const[i] = isColumnConst(*column_ptrs[i]);
}
if (is_column_const[0])
col_agg_func = typeid_cast<const ColumnAggregateFunction*>(typeid_cast<const ColumnConst*>(column_ptrs[0])->getDataColumnPtr().get());
else
@ -341,13 +351,13 @@ private:
container0 = &col_agg_func->getData();
if (is_column_const[1])
container1 = &typeid_cast<const ColumnUInt32*>(typeid_cast<const ColumnConst*>(column_ptrs[1])->getDataColumnPtr().get())->getData();
container1 = &typeid_cast<const ColumnUInt64*>(typeid_cast<const ColumnConst*>(column_ptrs[1])->getDataColumnPtr().get())->getData();
else
container1 = &typeid_cast<const ColumnUInt32*>(column_ptrs[1])->getData();
container1 = &typeid_cast<const ColumnUInt64*>(column_ptrs[1])->getData();
if (is_column_const[2])
container2 = &typeid_cast<const ColumnUInt32*>(typeid_cast<const ColumnConst*>(column_ptrs[2])->getDataColumnPtr().get())->getData();
container2 = &typeid_cast<const ColumnUInt64*>(typeid_cast<const ColumnConst*>(column_ptrs[2])->getDataColumnPtr().get())->getData();
else
container2 = &typeid_cast<const ColumnUInt32*>(column_ptrs[2])->getData();
container2 = &typeid_cast<const ColumnUInt64*>(column_ptrs[2])->getData();
auto col_to = ColumnAggregateFunction::create(col_agg_func->getAggregateFunction());
col_to->reserve(input_rows_count);
@ -357,8 +367,8 @@ private:
const AggregateDataPtr data_ptr_0 = is_column_const[0] ? (*container0)[0] : (*container0)[i];
const AggregateFunctionGroupBitmapData<T> & bitmap_data_0
= *reinterpret_cast<const AggregateFunctionGroupBitmapData<T>*>(data_ptr_0);
const UInt32 range_start = is_column_const[1] ? (*container1)[0] : (*container1)[i];
const UInt32 range_end = is_column_const[2] ? (*container2)[0] : (*container2)[i];
const UInt64 range_start = is_column_const[1] ? (*container1)[0] : (*container1)[i];
const UInt64 range_end = is_column_const[2] ? (*container2)[0] : (*container2)[i];
col_to->insertDefault();
AggregateFunctionGroupBitmapData<T> & bitmap_data_2
@ -374,7 +384,7 @@ struct BitmapSubsetInRangeImpl
public:
static constexpr auto name = "bitmapSubsetInRange";
template <typename T>
static void apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_0, UInt32 range_start, UInt32 range_end, AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
static void apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_0, UInt64 range_start, UInt64 range_end, AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
{
bitmap_data_0.rbs.rb_range(range_start, range_end, bitmap_data_2.rbs);
}
@ -385,7 +395,7 @@ struct BitmapSubsetLimitImpl
public:
static constexpr auto name = "bitmapSubsetLimit";
template <typename T>
static void apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_0, UInt32 range_start, UInt32 range_end, AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
static void apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_0, UInt64 range_start, UInt64 range_end, AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
{
bitmap_data_0.rbs.rb_limit(range_start, range_end, bitmap_data_2.rbs);
}
@ -418,14 +428,14 @@ public:
for (size_t i = 0; i < 2; ++i)
{
const auto * array_type = typeid_cast<const DataTypeArray *>(arguments[i + 1].get());
String msg(i == 0 ? "Second" : "Third");
msg += " argument for function " + getName() + " must be an UInt32 array but it has type " + arguments[i + 1]->getName() + ".";
String msg = "The second and third arguments for function " + getName() + " must be an one of [Array(UInt8), Array(UInt16), Array(UInt32), Array(UInt64)] but one of them has type " + arguments[i + 1]->getName() + ".";
if (!array_type)
throw Exception(msg, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
auto nested_type = array_type->getNestedType();
WhichDataType which(nested_type);
if (!which.isUInt32())
if (!(which.isUInt8() || which.isUInt16() || which.isUInt32() || which.isUInt64()))
throw Exception(msg, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
return arguments[0];
@ -461,13 +471,26 @@ private:
bool is_column_const[3];
const ColumnAggregateFunction * col_agg_func;
const PaddedPODArray<AggregateDataPtr> * container0;
const ColumnArray * array;
const ColumnArray * array1;
const ColumnArray * array2;
ColumnPtr column_holder[2];
for (size_t i = 0; i < 3; ++i)
{
column_ptrs[i] = arguments[i].column.get();
if (i > 0)
{
auto array_type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());
column_holder[i - 1] = castColumn(arguments[i], array_type);
column_ptrs[i] = column_holder[i-1].get();
}
else
{
column_ptrs[i] = arguments[i].column.get();
}
is_column_const[i] = isColumnConst(*column_ptrs[i]);
}
if (is_column_const[0])
{
col_agg_func = typeid_cast<const ColumnAggregateFunction*>(typeid_cast<const ColumnConst*>(column_ptrs[0])->getDataColumnPtr().get());
@ -479,21 +502,20 @@ private:
container0 = &col_agg_func->getData();
if (is_column_const[1])
array = typeid_cast<const ColumnArray*>(typeid_cast<const ColumnConst*>(column_ptrs[1])->getDataColumnPtr().get());
array1 = typeid_cast<const ColumnArray*>(typeid_cast<const ColumnConst*>(column_ptrs[1])->getDataColumnPtr().get());
else
{
array = typeid_cast<const ColumnArray *>(arguments[1].column.get());
}
const ColumnArray::Offsets & from_offsets = array->getOffsets();
const ColumnVector<UInt32>::Container & from_container = typeid_cast<const ColumnVector<UInt32> *>(&array->getData())->getData();
array1 = typeid_cast<const ColumnArray *>(column_ptrs[1]);
const ColumnArray::Offsets & from_offsets = array1->getOffsets();
const ColumnVector<UInt64>::Container & from_container = typeid_cast<const ColumnVector<UInt64> *>(&array1->getData())->getData();
if (is_column_const[2])
array = typeid_cast<const ColumnArray*>(typeid_cast<const ColumnConst*>(column_ptrs[2])->getDataColumnPtr().get());
array2 = typeid_cast<const ColumnArray*>(typeid_cast<const ColumnConst*>(column_ptrs[2])->getDataColumnPtr().get());
else
array = typeid_cast<const ColumnArray *>(arguments[2].column.get());
array2 = typeid_cast<const ColumnArray *>(column_ptrs[2]);
const ColumnArray::Offsets & to_offsets = array->getOffsets();
const ColumnVector<UInt32>::Container & to_container = typeid_cast<const ColumnVector<UInt32> *>(&array->getData())->getData();
const ColumnArray::Offsets & to_offsets = array2->getOffsets();
const ColumnVector<UInt64>::Container & to_container = typeid_cast<const ColumnVector<UInt64> *>(&array2->getData())->getData();
auto col_to = ColumnAggregateFunction::create(col_agg_func->getAggregateFunction());
col_to->reserve(input_rows_count);
@ -526,6 +548,7 @@ private:
to_start = i == 0 ? 0 : to_offsets[i - 1];
to_end = to_offsets[i];
}
if (from_end - from_start != to_end - to_start)
throw Exception("From array size and to array size mismatch", ErrorCodes::LOGICAL_ERROR);
@ -724,10 +747,11 @@ public:
throw Exception(
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
const auto * arg_type1 = typeid_cast<const DataTypeNumber<UInt32> *>(arguments[1].get());
if (!(arg_type1))
WhichDataType which(arguments[1].get());
if (!(which.isUInt8() || which.isUInt16() || which.isUInt32() || which.isUInt64()))
throw Exception(
"Second argument for function " + getName() + " must be UInt32 but it has type " + arguments[1]->getName() + ".",
"Second argument for function " + getName() + " must be one of [UInt8, UInt16, UInt32, UInt64] but it has type " + arguments[1]->getName() + ".",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<DataTypeNumber<UInt8>>();
@ -765,27 +789,32 @@ private:
{
const IColumn * column_ptrs[2];
bool is_column_const[2];
const PaddedPODArray<AggregateDataPtr> * container0;
const PaddedPODArray<UInt32> * container1;
for (size_t i = 0; i < 2; ++i)
{
column_ptrs[i] = arguments[i].column.get();
is_column_const[i] = isColumnConst(*column_ptrs[i]);
}
const PaddedPODArray<AggregateDataPtr> * container0;
const PaddedPODArray<UInt64> * container1;
column_ptrs[0] = arguments[0].column.get();
is_column_const[0] = isColumnConst(*column_ptrs[0]);
if (is_column_const[0])
container0 = &typeid_cast<const ColumnAggregateFunction*>(typeid_cast<const ColumnConst*>(column_ptrs[0])->getDataColumnPtr().get())->getData();
else
container0 = &typeid_cast<const ColumnAggregateFunction*>(column_ptrs[0])->getData();
// we can always cast the second column to ColumnUInt64
auto uint64_column = castColumn(arguments[1], std::make_shared<DataTypeUInt64>());
column_ptrs[1] = uint64_column.get();
is_column_const[1] = isColumnConst(*column_ptrs[1]);
if (is_column_const[1])
container1 = &typeid_cast<const ColumnUInt32*>(typeid_cast<const ColumnConst*>(column_ptrs[1])->getDataColumnPtr().get())->getData();
container1 = &typeid_cast<const ColumnUInt64*>(typeid_cast<const ColumnConst*>(column_ptrs[1])->getDataColumnPtr().get())->getData();
else
container1 = &typeid_cast<const ColumnUInt32*>(column_ptrs[1])->getData();
container1 = &typeid_cast<const ColumnUInt64*>(column_ptrs[1])->getData();
for (size_t i = 0; i < input_rows_count; ++i)
{
const AggregateDataPtr data_ptr_0 = is_column_const[0] ? (*container0)[0] : (*container0)[i];
const UInt32 data1 = is_column_const[1] ? (*container1)[0] : (*container1)[i];
const UInt64 data1 = is_column_const[1] ? (*container1)[0] : (*container1)[i];
const AggregateFunctionGroupBitmapData<T> & bitmap_data_0
= *reinterpret_cast<const AggregateFunctionGroupBitmapData<T> *>(data_ptr_0);
vec_to[i] = bitmap_data_0.rbs.rb_contains(data1);

View File

@ -2263,7 +2263,7 @@ private:
template <typename ToDataType>
std::enable_if_t<IsDataTypeDecimal<ToDataType>, WrapperType>
createDecimalWrapper(const DataTypePtr & from_type, const ToDataType * to_type) const
createDecimalWrapper(const DataTypePtr & from_type, const ToDataType * to_type, bool requested_result_is_nullable) const
{
TypeIndex type_index = from_type->getTypeId();
UInt32 scale = to_type->getScale();
@ -2282,11 +2282,12 @@ private:
auto wrapper_cast_type = cast_type;
return [wrapper_cast_type, type_index, scale, to_type]
return [wrapper_cast_type, type_index, scale, to_type, requested_result_is_nullable]
(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *column_nullable, size_t input_rows_count)
{
ColumnPtr result_column;
auto res = callOnIndexAndDataType<ToDataType>(type_index, [&](const auto & types) -> bool {
auto res = callOnIndexAndDataType<ToDataType>(type_index, [&](const auto & types) -> bool
{
using Types = std::decay_t<decltype(types)>;
using LeftDataType = typename Types::LeftType;
using RightDataType = typename Types::RightType;
@ -2312,6 +2313,19 @@ private:
return true;
}
}
else if constexpr (std::is_same_v<LeftDataType, DataTypeString>)
{
if (requested_result_is_nullable)
{
/// Consistent with CAST(Nullable(String) AS Nullable(Numbers))
/// In case when converting to Nullable type, we apply different parsing rule,
/// that will not throw an exception but return NULL in case of malformed input.
result_column = ConvertImpl<LeftDataType, RightDataType, NameCast, ConvertReturnNullOnErrorTag>::execute(
arguments, result_type, input_rows_count, scale);
return true;
}
}
result_column = ConvertImpl<LeftDataType, RightDataType, NameCast>::execute(arguments, result_type, input_rows_count, scale);
@ -2929,7 +2943,7 @@ private:
std::is_same_v<ToDataType, DataTypeDecimal<Decimal256>> ||
std::is_same_v<ToDataType, DataTypeDateTime64>)
{
ret = createDecimalWrapper(from_type, checkAndGetDataType<ToDataType>(to_type.get()));
ret = createDecimalWrapper(from_type, checkAndGetDataType<ToDataType>(to_type.get()), requested_result_is_nullable);
return true;
}

View File

@ -0,0 +1,238 @@
#include <Columns/ColumnString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/hex.h>
#include <common/find_symbols.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
namespace
{
struct DecodeXMLComponentName
{
static constexpr auto name = "decodeXMLComponent";
};
class FunctionDecodeXMLComponentImpl
{
public:
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
/// The size of result is always not more than the size of source.
/// Because entities decodes to the shorter byte sequence.
/// Example: &#xx... &#xx... will decode to UTF-8 byte sequence not longer than 4 bytes.
res_data.resize(data.size());
size_t size = offsets.size();
res_offsets.resize(size);
size_t prev_offset = 0;
size_t res_offset = 0;
for (size_t i = 0; i < size; ++i)
{
const char * src_data = reinterpret_cast<const char *>(&data[prev_offset]);
size_t src_size = offsets[i] - prev_offset;
size_t dst_size = execute(src_data, src_size, reinterpret_cast<char *>(res_data.data() + res_offset));
res_offset += dst_size;
res_offsets[i] = res_offset;
prev_offset = offsets[i];
}
res_data.resize(res_offset);
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
{
throw Exception("Function decodeXMLComponent cannot work with FixedString argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
private:
static const int max_legal_unicode_value = 0x10FFFF;
static const int max_decimal_length_of_unicode_point = 7; /// 1114111
static size_t execute(const char * src, size_t src_size, char * dst)
{
const char * src_pos = src;
const char * src_end = src + src_size;
char * dst_pos = dst;
while (true)
{
const char * entity_pos = find_first_symbols<'&'>(src_pos, src_end);
if (entity_pos + strlen("lt;") >= src_end)
break;
/// Copy text between entities.
size_t bytes_to_copy = entity_pos - src_pos;
memcpySmallAllowReadWriteOverflow15(dst_pos, src_pos, bytes_to_copy);
dst_pos += bytes_to_copy;
src_pos = entity_pos;
++entity_pos;
const char * entity_end = find_first_symbols<';'>(entity_pos, src_end);
if (entity_end == src_end)
break;
bool parsed = false;
/// &#NNNN; or &#xNNNN;
uint32_t code_point = 0;
if (isValidNumericEntity(entity_pos, entity_end, code_point))
{
codePointToUTF8(code_point, dst_pos);
parsed = true;
}
else if (entity_end - entity_pos == 2)
{
if (memcmp(entity_pos, "lt", 2) == 0)
{
*dst_pos = '<';
++dst_pos;
parsed = true;
}
else if (memcmp(entity_pos, "gt", 2) == 0)
{
*dst_pos = '>';
++dst_pos;
parsed = true;
}
}
else if (entity_end - entity_pos == 3)
{
if (memcmp(entity_pos, "amp", 3) == 0)
{
*dst_pos = '&';
++dst_pos;
parsed = true;
}
}
else if (entity_end - entity_pos == 4)
{
if (memcmp(entity_pos, "quot", 4) == 0)
{
*dst_pos = '"';
++dst_pos;
parsed = true;
}
else if (memcmp(entity_pos, "apos", 4) == 0)
{
*dst_pos = '\'';
++dst_pos;
parsed = true;
}
}
if (parsed)
{
/// Skip the parsed entity.
src_pos = entity_end + 1;
}
else
{
/// Copy one byte as is and skip it.
*dst_pos = *src_pos;
++dst_pos;
++src_pos;
}
}
/// Copy the rest of the string.
if (src_pos < src_end)
{
size_t bytes_to_copy = src_end - src_pos;
memcpySmallAllowReadWriteOverflow15(dst_pos, src_pos, bytes_to_copy);
dst_pos += bytes_to_copy;
}
return dst_pos - dst;
}
static void codePointToUTF8(uint32_t code_point, char *& dst_pos)
{
if (code_point < (1 << 7))
{
dst_pos[0] = (code_point & 0x7F);
++dst_pos;
}
else if (code_point < (1 << 11))
{
dst_pos[0] = ((code_point >> 6) & 0x1F) + 0xC0;
dst_pos[1] = (code_point & 0x3F) + 0x80;
dst_pos += 2;
}
else if (code_point < (1 << 16))
{
dst_pos[0] = ((code_point >> 12) & 0x0F) + 0xE0;
dst_pos[1] = ((code_point >> 6) & 0x3F) + 0x80;
dst_pos[2] = (code_point & 0x3F) + 0x80;
dst_pos += 3;
}
else
{
dst_pos[0] = ((code_point >> 18) & 0x07) + 0xF0;
dst_pos[1] = ((code_point >> 12) & 0x3F) + 0x80;
dst_pos[2] = ((code_point >> 6) & 0x3F) + 0x80;
dst_pos[3] = (code_point & 0x3F) + 0x80;
dst_pos += 4;
}
}
static bool isValidNumericEntity(const char * src, const char * end, uint32_t & code_point)
{
if (src + strlen("#") >= end)
return false;
if (src[0] != '#' || (end - src > 1 + max_decimal_length_of_unicode_point))
return false;
if (src + 2 < end && (src[1] == 'x' || src[1] == 'X'))
{
src += 2;
for (; src < end; ++src)
{
if (!isHexDigit(*src))
return false;
code_point *= 16;
code_point += unhex(*src);
}
}
else
{
src += 1;
for (; src < end; ++src)
{
if (!isNumericASCII(*src))
return false;
code_point *= 10;
code_point += *src - '0';
}
}
return code_point <= max_legal_unicode_value;
}
};
using FunctionDecodeXMLComponent = FunctionStringToString<FunctionDecodeXMLComponentImpl, DecodeXMLComponentName>;
}
void registerFunctionDecodeXMLComponent(FunctionFactory & factory)
{
factory.registerFunction<FunctionDecodeXMLComponent>();
}
}

View File

@ -30,6 +30,10 @@ public:
bool useDefaultImplementationForNulls() const override { return false; }
/// We should never return LowCardinality result, cause we declare that result is always constant zero.
/// (in getResultIfAlwaysReturnsConstantAndHasArguments)
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
String getName() const override
{
return name;

View File

@ -34,6 +34,8 @@ void registerFunctionNormalizeQuery(FunctionFactory &);
void registerFunctionNormalizedQueryHash(FunctionFactory &);
void registerFunctionCountMatches(FunctionFactory &);
void registerFunctionEncodeXMLComponent(FunctionFactory & factory);
void registerFunctionDecodeXMLComponent(FunctionFactory & factory);
#if USE_BASE64
void registerFunctionBase64Encode(FunctionFactory &);
@ -70,6 +72,7 @@ void registerFunctionsString(FunctionFactory & factory)
registerFunctionNormalizedQueryHash(factory);
registerFunctionCountMatches(factory);
registerFunctionEncodeXMLComponent(factory);
registerFunctionDecodeXMLComponent(factory);
#if USE_BASE64
registerFunctionBase64Encode(factory);
registerFunctionBase64Decode(factory);

View File

@ -11,205 +11,144 @@ namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
template <typename A, typename B>
struct TupleHammingDistanceImpl
{
using ResultType = UInt8;
static void NO_INLINE vectorVector(
const PaddedPODArray<A> & a1,
const PaddedPODArray<A> & b1,
const PaddedPODArray<B> & a2,
const PaddedPODArray<B> & b2,
PaddedPODArray<ResultType> & c)
{
size_t size = a1.size();
for (size_t i = 0; i < size; ++i)
c[i] = apply(a1[i], a2[i]) + apply(b1[i], b2[i]);
}
static void NO_INLINE
vectorConstant(const PaddedPODArray<A> & a1, const PaddedPODArray<A> & b1, UInt64 a2, UInt64 b2, PaddedPODArray<ResultType> & c)
{
size_t size = a1.size();
for (size_t i = 0; i < size; ++i)
c[i] = apply(a1[i], a2) + apply(b1[i], b2);
}
static void NO_INLINE
constantVector(UInt64 a1, UInt64 b1, const PaddedPODArray<B> & a2, const PaddedPODArray<B> & b2, PaddedPODArray<ResultType> & c)
{
size_t size = a2.size();
for (size_t i = 0; i < size; ++i)
c[i] = apply(a1, a2[i]) + apply(b1, b2[i]);
}
static ResultType constantConstant(UInt64 a1, UInt64 b1, UInt64 a2, UInt64 b2) { return apply(a1, a2) + apply(b1, b2); }
private:
static inline UInt8 apply(UInt64 a, UInt64 b) { return a != b; }
};
template <typename F>
bool castType(const IDataType * type, F && f)
{
return castTypeToEither<
DataTypeInt8,
DataTypeInt16,
DataTypeInt32,
DataTypeInt64,
DataTypeUInt8,
DataTypeUInt16,
DataTypeUInt32,
DataTypeUInt64>(type, std::forward<F>(f));
}
template <typename F>
static bool castBothTypes(const IDataType * left, const IDataType * right, F && f)
{
return castType(left, [&](const auto & left_) { return castType(right, [&](const auto & right_) { return f(left_, right_); }); });
}
// tupleHammingDistance function: (Tuple(Integer, Integer), Tuple(Integer, Integer))->0/1/2
// in order to avoid code bloating, for non-constant tuple, we make sure that the elements
// in the tuple should have same data type, and for constant tuple, elements can be any integer
// data type, we cast all of them into UInt64
/// tupleHammingDistance function: (Tuple(...), Tuple(...))-> N
/// Return the number of non-equal tuple elements
class FunctionTupleHammingDistance : public IFunction
{
private:
const Context & context;
public:
static constexpr auto name = "tupleHammingDistance";
using ResultType = UInt8;
static FunctionPtr create(const Context &) { return std::make_shared<FunctionTupleHammingDistance>(); }
explicit FunctionTupleHammingDistance(const Context & context_) : context(context_) {}
static FunctionPtr create(const Context & context) { return std::make_shared<FunctionTupleHammingDistance>(context); }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 2; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
bool useDefaultImplementationForConstants() const override { return true; }
Columns getTupleElements(const IColumn & column) const
{
if (!isTuple(arguments[0]))
throw Exception(
"Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (!isTuple(arguments[1]))
throw Exception(
"Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<DataTypeUInt8>();
if (const auto * const_column = typeid_cast<const ColumnConst *>(&column))
return convertConstTupleToConstantElements(*const_column);
if (const auto * column_tuple = typeid_cast<const ColumnTuple *>(&column))
{
Columns columns(column_tuple->tupleSize());
for (size_t i = 0; i < columns.size(); ++i)
columns[i] = column_tuple->getColumnPtr(i);
return columns;
}
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument of function {} should be tuples, got {}",
getName(), column.getName());
}
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
const auto * left_tuple = checkAndGetDataType<DataTypeTuple>(arguments[0].type.get());
const auto * right_tuple = checkAndGetDataType<DataTypeTuple>(arguments[1].type.get());
if (!left_tuple)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument 0 of function {} should be tuples, got {}",
getName(), arguments[0].type->getName());
if (!right_tuple)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument 1 of function {} should be tuples, got {}",
getName(), arguments[1].type->getName());
const auto & left_types = left_tuple->getElements();
const auto & right_types = right_tuple->getElements();
Columns left_elements;
Columns right_elements;
if (arguments[0].column)
left_elements = getTupleElements(*arguments[0].column);
if (arguments[1].column)
right_elements = getTupleElements(*arguments[1].column);
if (left_types.size() != right_types.size())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Expected tuples of the same size as arguments of function {}. Got {} and {}",
getName(), arguments[0].type->getName(), arguments[1].type->getName());
size_t tuple_size = left_types.size();
if (tuple_size == 0)
return std::make_shared<DataTypeUInt8>();
auto compare = FunctionFactory::instance().get("notEquals", context);
auto plus = FunctionFactory::instance().get("plus", context);
DataTypes types(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
{
try
{
ColumnWithTypeAndName left{left_elements.empty() ? nullptr : left_elements[i], left_types[i], {}};
ColumnWithTypeAndName right{right_elements.empty() ? nullptr : right_elements[i], right_types[i], {}};
auto elem_compare = compare->build(ColumnsWithTypeAndName{left, right});
types[i] = elem_compare->getResultType();
}
catch (DB::Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;
}
}
auto res_type = types[0];
for (size_t i = 1; i < tuple_size; ++i)
{
ColumnWithTypeAndName left{res_type, {}};
ColumnWithTypeAndName right{types[i], {}};
auto plus_elem = plus->build({left, right});
res_type = plus_elem->getResultType();
}
return res_type;
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnWithTypeAndName & arg1 = arguments[0];
const ColumnWithTypeAndName & arg2 = arguments[1];
const DataTypeTuple & type1 = static_cast<const DataTypeTuple &>(*arg1.type);
const DataTypeTuple & type2 = static_cast<const DataTypeTuple &>(*arg2.type);
const auto & left_elems = type1.getElements();
const auto & right_elems = type2.getElements();
if (left_elems.size() != 2 || right_elems.size() != 2)
throw Exception(
"Illegal column of arguments of function " + getName() + ", tuple should have exactly two elements.",
ErrorCodes::ILLEGAL_COLUMN);
const auto * left_tuple = checkAndGetDataType<DataTypeTuple>(arguments[0].type.get());
const auto * right_tuple = checkAndGetDataType<DataTypeTuple>(arguments[1].type.get());
const auto & left_types = left_tuple->getElements();
const auto & right_types = right_tuple->getElements();
auto left_elements = getTupleElements(*arguments[0].column);
auto right_elements = getTupleElements(*arguments[1].column);
ColumnPtr result_column;
size_t tuple_size = left_elements.size();
if (tuple_size == 0)
return DataTypeUInt8().createColumnConstWithDefaultValue(input_rows_count);
bool valid = castBothTypes(left_elems[0].get(), right_elems[0].get(), [&](const auto & left, const auto & right)
auto compare = FunctionFactory::instance().get("notEquals", context);
auto plus = FunctionFactory::instance().get("plus", context);
ColumnsWithTypeAndName columns(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
{
using LeftDataType = std::decay_t<decltype(left)>;
using RightDataType = std::decay_t<decltype(right)>;
using T0 = typename LeftDataType::FieldType;
using T1 = typename RightDataType::FieldType;
using ColVecT0 = ColumnVector<T0>;
using ColVecT1 = ColumnVector<T1>;
using ColVecResult = ColumnVector<ResultType>;
ColumnWithTypeAndName left{left_elements[i], left_types[i], {}};
ColumnWithTypeAndName right{right_elements[i], right_types[i], {}};
auto elem_compare = compare->build(ColumnsWithTypeAndName{left, right});
columns[i].type = elem_compare->getResultType();
columns[i].column = elem_compare->execute({left, right}, columns[i].type, input_rows_count);
}
using OpImpl = TupleHammingDistanceImpl<T0, T1>;
auto res = columns[0];
for (size_t i = 1; i < tuple_size; ++i)
{
auto plus_elem = plus->build({res, columns[i]});
auto res_type = plus_elem->getResultType();
res.column = plus_elem->execute({res, columns[i]}, res_type, input_rows_count);
res.type = res_type;
}
// we can not useDefaultImplementationForConstants,
// because with that, tupleHammingDistance((10, 300), (10, 20)) does not work,
// since 10 has data type UInt8, and 300 has data type UInt16
if (const ColumnConst * const_col_left = checkAndGetColumnConst<ColumnTuple>(arg1.column.get()))
{
if (const ColumnConst * const_col_right = checkAndGetColumnConst<ColumnTuple>(arg2.column.get()))
{
auto cols1 = convertConstTupleToConstantElements(*const_col_left);
auto cols2 = convertConstTupleToConstantElements(*const_col_right);
Field a1, b1, a2, b2;
cols1[0]->get(0, a1);
cols1[1]->get(0, b1);
cols2[0]->get(0, a2);
cols2[1]->get(0, b2);
auto res = OpImpl::constantConstant(a1.get<UInt64>(), b1.get<UInt64>(), a2.get<UInt64>(), b2.get<UInt64>());
result_column = DataTypeUInt8().createColumnConst(const_col_left->size(), toField(res));
return true;
}
}
typename ColVecResult::MutablePtr col_res = nullptr;
col_res = ColVecResult::create();
auto & vec_res = col_res->getData();
vec_res.resize(input_rows_count);
// constant tuple - non-constant tuple
if (const ColumnConst * const_col_left = checkAndGetColumnConst<ColumnTuple>(arg1.column.get()))
{
if (const ColumnTuple * col_right = typeid_cast<const ColumnTuple *>(arg2.column.get()))
{
auto const_cols = convertConstTupleToConstantElements(*const_col_left);
Field a1, b1;
const_cols[0]->get(0, a1);
const_cols[1]->get(0, b1);
auto col_r1 = checkAndGetColumn<ColVecT1>(&col_right->getColumn(0));
auto col_r2 = checkAndGetColumn<ColVecT1>(&col_right->getColumn(1));
if (col_r1 && col_r2)
OpImpl::constantVector(a1.get<UInt64>(), b1.get<UInt64>(), col_r1->getData(), col_r2->getData(), vec_res);
else
return false;
}
else
return false;
}
else if (const ColumnTuple * col_left = typeid_cast<const ColumnTuple *>(arg1.column.get()))
{
auto col_l1 = checkAndGetColumn<ColVecT0>(&col_left->getColumn(0));
auto col_l2 = checkAndGetColumn<ColVecT0>(&col_left->getColumn(1));
if (col_l1 && col_l2)
{
// non-constant tuple - constant tuple
if (const ColumnConst * const_col_right = checkAndGetColumnConst<ColumnTuple>(arg2.column.get()))
{
auto const_cols = convertConstTupleToConstantElements(*const_col_right);
Field a2, b2;
const_cols[0]->get(0, a2);
const_cols[1]->get(0, b2);
OpImpl::vectorConstant(col_l1->getData(), col_l2->getData(), a2.get<UInt64>(), a2.get<UInt64>(), vec_res);
}
// non-constant tuple - non-constant tuple
else if (const ColumnTuple * col_right = typeid_cast<const ColumnTuple *>(arg2.column.get()))
{
auto col_r1 = checkAndGetColumn<ColVecT1>(&col_right->getColumn(0));
auto col_r2 = checkAndGetColumn<ColVecT1>(&col_right->getColumn(1));
if (col_r1 && col_r2)
OpImpl::vectorVector(col_l1->getData(), col_l2->getData(), col_r1->getData(), col_r2->getData(), vec_res);
else
return false;
}
else
return false;
}
else
return false;
}
else
return false;
result_column = std::move(col_res);
return true;
});
if (!valid)
throw Exception(getName() + "'s arguments do not match the expected data types", ErrorCodes::ILLEGAL_COLUMN);
return result_column;
return res.column;
}
};

View File

@ -221,6 +221,7 @@ SRCS(
currentUser.cpp
dateDiff.cpp
date_trunc.cpp
decodeXMLComponent.cpp
decrypt.cpp
defaultValueOfArgumentType.cpp
defaultValueOfTypeName.cpp

View File

@ -61,7 +61,11 @@ template <typename T> WriteBuffer & operator<< (QuoteManipWriteBuffer buf,
template <typename T> WriteBuffer & operator<< (DoubleQuoteManipWriteBuffer buf, const T & x) { writeDoubleQuoted(x, buf.get()); return buf; }
template <typename T> WriteBuffer & operator<< (BinaryManipWriteBuffer buf, const T & x) { writeBinary(x, buf.get()); return buf; }
inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, const char * x) { writeAnyEscapedString<'\''>(x, x + strlen(x), buf.get()); return buf; }
inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, const String & x) { writeEscapedString(x, buf); return buf; }
inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, const std::string_view & x) { writeEscapedString(x, buf); return buf; }
inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, const StringRef & x) { writeEscapedString(x, buf); return buf; }
inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, const char * x) { writeEscapedString(x, strlen(x), buf); return buf; }
inline WriteBuffer & operator<< (QuoteManipWriteBuffer buf, const char * x) { writeAnyQuotedString<'\''>(x, x + strlen(x), buf.get()); return buf; }
inline WriteBuffer & operator<< (DoubleQuoteManipWriteBuffer buf, const char * x) { writeAnyQuotedString<'"'>(x, x + strlen(x), buf.get()); return buf; }
inline WriteBuffer & operator<< (BinaryManipWriteBuffer buf, const char * x) { writeStringBinary(x, buf.get()); return buf; }

View File

@ -4,13 +4,11 @@ namespace DB
{
namespace ErrorCodes
{
extern const int MEMORY_LIMIT_EXCEEDED;
extern const int LOGICAL_ERROR;
}
PeekableReadBuffer::PeekableReadBuffer(ReadBuffer & sub_buf_, size_t start_size_ /*= DBMS_DEFAULT_BUFFER_SIZE*/,
size_t unread_limit_ /* = default_limit*/)
: BufferWithOwnMemory(start_size_), sub_buf(sub_buf_), unread_limit(unread_limit_)
PeekableReadBuffer::PeekableReadBuffer(ReadBuffer & sub_buf_, size_t start_size_ /*= DBMS_DEFAULT_BUFFER_SIZE*/)
: BufferWithOwnMemory(start_size_), sub_buf(sub_buf_)
{
padded &= sub_buf.isPadded();
/// Read from sub-buffer
@ -191,8 +189,6 @@ void PeekableReadBuffer::checkStateCorrect() const
}
if (currentlyReadFromOwnMemory() && !peeked_size)
throw DB::Exception("Pos in empty own buffer", ErrorCodes::LOGICAL_ERROR);
if (unread_limit < memory.size())
throw DB::Exception("Size limit exceed", ErrorCodes::LOGICAL_ERROR);
}
void PeekableReadBuffer::resizeOwnMemoryIfNecessary(size_t bytes_to_append)
@ -222,16 +218,11 @@ void PeekableReadBuffer::resizeOwnMemoryIfNecessary(size_t bytes_to_append)
}
else
{
if (unread_limit < new_size)
throw DB::Exception("PeekableReadBuffer: Memory limit exceed", ErrorCodes::MEMORY_LIMIT_EXCEEDED);
size_t pos_offset = pos - memory.data();
size_t new_size_amortized = memory.size() * 2;
if (new_size_amortized < new_size)
new_size_amortized = new_size;
else if (unread_limit < new_size_amortized)
new_size_amortized = unread_limit;
memory.resize(new_size_amortized);
if (need_update_checkpoint)

View File

@ -20,8 +20,7 @@ class PeekableReadBuffer : public BufferWithOwnMemory<ReadBuffer>
{
friend class PeekableReadBufferCheckpoint;
public:
explicit PeekableReadBuffer(ReadBuffer & sub_buf_, size_t start_size_ = DBMS_DEFAULT_BUFFER_SIZE,
size_t unread_limit_ = 16 * DBMS_DEFAULT_BUFFER_SIZE);
explicit PeekableReadBuffer(ReadBuffer & sub_buf_, size_t start_size_ = DBMS_DEFAULT_BUFFER_SIZE);
~PeekableReadBuffer() override;
@ -95,7 +94,6 @@ private:
ReadBuffer & sub_buf;
const size_t unread_limit;
size_t peeked_size = 0;
Position checkpoint = nullptr;
bool checkpoint_in_own_memory = false;

View File

@ -483,6 +483,10 @@ inline void writeEscapedString(const StringRef & ref, WriteBuffer & buf)
writeEscapedString(ref.data, ref.size, buf);
}
inline void writeEscapedString(const std::string_view & ref, WriteBuffer & buf)
{
writeEscapedString(ref.data(), ref.size(), buf);
}
template <char quote_character>
void writeAnyQuotedString(const char * begin, const char * end, WriteBuffer & buf)
@ -512,17 +516,31 @@ inline void writeQuotedString(const String & s, WriteBuffer & buf)
writeAnyQuotedString<'\''>(s, buf);
}
inline void writeQuotedString(const StringRef & ref, WriteBuffer & buf)
{
writeAnyQuotedString<'\''>(ref, buf);
}
inline void writeQuotedString(const std::string_view & ref, WriteBuffer & buf)
{
writeAnyQuotedString<'\''>(ref.data(), ref.data() + ref.size(), buf);
}
inline void writeDoubleQuotedString(const String & s, WriteBuffer & buf)
{
writeAnyQuotedString<'"'>(s, buf);
}
inline void writeDoubleQuotedString(const StringRef & s, WriteBuffer & buf)
{
writeAnyQuotedString<'"'>(s, buf);
}
inline void writeDoubleQuotedString(const std::string_view & s, WriteBuffer & buf)
{
writeAnyQuotedString<'"'>(s.data(), s.data() + s.size(), buf);
}
/// Outputs a string in backquotes.
inline void writeBackQuotedString(const StringRef & s, WriteBuffer & buf)
{
@ -901,6 +919,7 @@ writeBinary(const T & x, WriteBuffer & buf) { writePODBinary(x, buf); }
inline void writeBinary(const String & x, WriteBuffer & buf) { writeStringBinary(x, buf); }
inline void writeBinary(const StringRef & x, WriteBuffer & buf) { writeStringBinary(x, buf); }
inline void writeBinary(const std::string_view & x, WriteBuffer & buf) { writeStringBinary(x, buf); }
inline void writeBinary(const Int128 & x, WriteBuffer & buf) { writePODBinary(x, buf); }
inline void writeBinary(const UInt128 & x, WriteBuffer & buf) { writePODBinary(x, buf); }
inline void writeBinary(const DummyUInt256 & x, WriteBuffer & buf) { writePODBinary(x, buf); }
@ -1001,6 +1020,10 @@ writeQuoted(const T & x, WriteBuffer & buf) { writeText(x, buf); }
inline void writeQuoted(const String & x, WriteBuffer & buf) { writeQuotedString(x, buf); }
inline void writeQuoted(const std::string_view & x, WriteBuffer & buf) { writeQuotedString(x, buf); }
inline void writeQuoted(const StringRef & x, WriteBuffer & buf) { writeQuotedString(x, buf); }
inline void writeQuoted(const LocalDate & x, WriteBuffer & buf)
{
writeChar('\'', buf);
@ -1043,6 +1066,10 @@ writeDoubleQuoted(const T & x, WriteBuffer & buf) { writeText(x, buf); }
inline void writeDoubleQuoted(const String & x, WriteBuffer & buf) { writeDoubleQuotedString(x, buf); }
inline void writeDoubleQuoted(const std::string_view & x, WriteBuffer & buf) { writeDoubleQuotedString(x, buf); }
inline void writeDoubleQuoted(const StringRef & x, WriteBuffer & buf) { writeDoubleQuotedString(x, buf); }
inline void writeDoubleQuoted(const LocalDate & x, WriteBuffer & buf)
{
writeChar('"', buf);

View File

@ -0,0 +1,82 @@
#include <gtest/gtest.h>
#include <string>
#include <type_traits>
#include <common/StringRef.h>
#include <IO/Operators.h>
#include <IO/WriteHelpers.h>
#include <IO/WriteBufferFromString.h>
using namespace DB;
template <typename T, typename U>
void checkString(const T & str, U manip, const std::string & expected)
{
WriteBufferFromOwnString buf;
buf << manip << str;
EXPECT_EQ(expected, buf.str()) << "str type:" << typeid(str).name();
}
TEST(OperatorsManipTest, EscapingTest)
{
checkString("Hello 'world'", escape, "Hello \\'world\\'");
checkString("Hello \\world\\", escape, "Hello \\\\world\\\\"); // NOLINT
std::string s1 = "Hello 'world'";
checkString(s1, escape, "Hello \\'world\\'");
std::string s2 = "Hello \\world\\";
checkString(s2, escape, "Hello \\\\world\\\\"); // NOLINT
std::string_view sv1 = s1;
checkString(sv1, escape, "Hello \\'world\\'");
std::string_view sv2 = s2;
checkString(sv2, escape, "Hello \\\\world\\\\"); // NOLINT
StringRef sr1 = s1;
checkString(sr1, escape, "Hello \\'world\\'");
StringRef sr2 = s2;
checkString(sr2, escape, "Hello \\\\world\\\\"); // NOLINT
}
TEST(OperatorsManipTest, QuouteTest)
{
checkString("Hello 'world'", quote, "'Hello \\'world\\''");
std::string s1 = "Hello 'world'";
checkString(s1, quote, "'Hello \\'world\\''");
std::string_view sv1 = s1;
checkString(sv1, quote, "'Hello \\'world\\''");
StringRef sr1 = s1;
checkString(sr1, quote, "'Hello \\'world\\''");
}
TEST(OperatorsManipTest, DoubleQuouteTest)
{
checkString("Hello 'world'", double_quote, "\"Hello 'world'\"");
std::string s1 = "Hello 'world'";
checkString(s1, double_quote, "\"Hello 'world'\"");
std::string_view sv1 = s1;
checkString(sv1, double_quote, "\"Hello 'world'\"");
StringRef sr1 = s1;
checkString(sr1, double_quote, "\"Hello 'world'\"");
}
TEST(OperatorsManipTest, binary)
{
checkString("Hello", binary, "\x5Hello");
std::string s1 = "Hello";
checkString(s1, binary, "\x5Hello");
std::string_view sv1 = s1;
checkString(sv1, binary, "\x5Hello");
StringRef sr1 = s1;
checkString(sr1, binary, "\x5Hello");
}

View File

@ -9,7 +9,6 @@
namespace DB::ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int MEMORY_LIMIT_EXCEEDED;
}
static void readAndAssert(DB::ReadBuffer & buf, const char * str)
@ -40,7 +39,7 @@ try
DB::ReadBufferFromString b4(s4);
DB::ConcatReadBuffer concat({&b1, &b2, &b3, &b4});
DB::PeekableReadBuffer peekable(concat, 0, 16);
DB::PeekableReadBuffer peekable(concat, 0);
ASSERT_TRUE(!peekable.eof());
assertAvailable(peekable, "0123456789");
@ -48,6 +47,8 @@ try
DB::PeekableReadBufferCheckpoint checkpoint{peekable};
readAndAssert(peekable, "01234");
}
#ifndef ABORT_ON_LOGICAL_ERROR
bool exception = false;
try
{
@ -60,6 +61,7 @@ try
exception = true;
}
ASSERT_TRUE(exception);
#endif
assertAvailable(peekable, "56789");
readAndAssert(peekable, "56");
@ -70,19 +72,10 @@ try
peekable.dropCheckpoint();
assertAvailable(peekable, "789");
exception = false;
try
{
DB::PeekableReadBufferCheckpoint checkpoint{peekable, true};
peekable.ignore(30);
peekable.ignore(20);
}
catch (DB::Exception & e)
{
if (e.code() != DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED)
throw;
exception = true;
}
ASSERT_TRUE(exception);
assertAvailable(peekable, "789qwertyuiop");
readAndAssert(peekable, "789qwertyu");

View File

@ -436,12 +436,6 @@ void ActionsDAG::project(const NamesWithAliases & projection)
settings.projected_output = true;
}
void ActionsDAG::removeColumn(const std::string & column_name)
{
auto & node = getNode(column_name);
index.remove(&node);
}
bool ActionsDAG::tryRestoreColumn(const std::string & column_name)
{
if (index.contains(column_name))
@ -550,6 +544,11 @@ std::string ActionsDAG::dumpDAG() const
out << "\n";
}
out << "Index:";
for (const auto * node : index)
out << ' ' << map[node];
out << '\n';
return out.str();
}
@ -698,7 +697,8 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
/// Will store merged result in `first`.
/// This map contains nodes which should be removed from `first` index, cause they are used as inputs for `second`.
std::unordered_set<Node *> removed_first_result;
/// The second element is the number of removes (cause one node may be repeated several times in result).
std::unordered_map<Node *, size_t> removed_first_result;
/// Map inputs of `second` to nodes of `first`.
std::unordered_map<Node *, Node *> inputs_map;
@ -723,7 +723,7 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
else
{
inputs_map[node] = it->second.front();
removed_first_result.emplace(it->second.front());
removed_first_result[it->second.front()] += 1;
it->second.pop_front();
}
}
@ -767,8 +767,12 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
auto cur = it;
++it;
if (removed_first_result.count(*cur))
auto jt = removed_first_result.find(*cur);
if (jt != removed_first_result.end() && jt->second > 0)
{
first.index.remove(cur);
--jt->second;
}
}
for (auto * node : second.index)

View File

@ -80,7 +80,7 @@ public:
};
/// Index is used to:
/// * find Node buy it's result_name
/// * find Node by it's result_name
/// * specify order of columns in result
/// It represents a set of available columns.
/// Removing of column from index is equivalent to removing of column from final result.
@ -133,16 +133,6 @@ public:
insert(node);
}
void remove(Node * node)
{
auto it = map.find(node->result_name);
if (it != map.end())
return;
list.erase(it->second);
map.erase(it);
}
void remove(std::list<Node *>::iterator it)
{
auto map_it = map.find((*it)->result_name);
@ -219,8 +209,6 @@ public:
/// Add alias actions and remove unused columns from index. Also specify result columns order in index.
void project(const NamesWithAliases & projection);
/// Removes column from index.
void removeColumn(const std::string & column_name);
/// If column is not in index, try to find it in nodes and insert back into index.
bool tryRestoreColumn(const std::string & column_name);

View File

@ -0,0 +1,100 @@
#include <Interpreters/ColumnAliasesVisitor.h>
#include <Interpreters/IdentifierSemantic.h>
#include <Interpreters/RequiredSourceColumnsVisitor.h>
#include <Interpreters/addTypeConversionToAST.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTAlterQuery.h>
#include <Parsers/ASTInsertQuery.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/queryToString.h>
namespace DB
{
bool ColumnAliasesMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &)
{
if (const auto * f = node->as<ASTFunction>())
{
/// "lambda" visits children itself.
if (f->name == "lambda")
return false;
}
return !(node->as<ASTTableExpression>()
|| node->as<ASTSubquery>()
|| node->as<ASTArrayJoin>()
|| node->as<ASTSelectQuery>()
|| node->as<ASTSelectWithUnionQuery>());
}
void ColumnAliasesMatcher::visit(ASTPtr & ast, Data & data)
{
// If it's select query, only replace filters.
if (auto * query = ast->as<ASTSelectQuery>())
{
if (query->where())
Visitor(data).visit(query->refWhere());
if (query->prewhere())
Visitor(data).visit(query->refPrewhere());
return;
}
if (auto * node = ast->as<ASTFunction>())
{
visit(*node, ast, data);
return;
}
if (auto * node = ast->as<ASTIdentifier>())
{
visit(*node, ast, data);
return;
}
}
void ColumnAliasesMatcher::visit(ASTFunction & node, ASTPtr & /*ast*/, Data & data)
{
/// Do not add formal parameters of the lambda expression
if (node.name == "lambda")
{
Names local_aliases;
auto names_from_lambda = RequiredSourceColumnsMatcher::extractNamesFromLambda(node);
for (const auto & name : names_from_lambda)
{
if (data.private_aliases.insert(name).second)
{
local_aliases.push_back(name);
}
}
/// visit child with masked local aliases
Visitor(data).visit(node.arguments->children[1]);
for (const auto & name : local_aliases)
data.private_aliases.erase(name);
}
}
void ColumnAliasesMatcher::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
{
if (auto column_name = IdentifierSemantic::getColumnName(node))
{
if (data.forbidden_columns.count(*column_name) || data.private_aliases.count(*column_name) || !data.columns.has(*column_name))
return;
const auto & col = data.columns.get(*column_name);
if (col.default_desc.kind == ColumnDefaultKind::Alias)
{
ast = addTypeConversionToAST(col.default_desc.expression->clone(), col.type->getName(), data.columns.getAll(), data.context);
auto str = queryToString(ast);
// revisit ast to track recursive alias columns
Visitor(data).visit(ast);
}
}
}
}

View File

@ -0,0 +1,81 @@
#pragma once
#include <Core/Names.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Storages/ColumnsDescription.h>
namespace DB
{
class IAST;
using ASTPtr = std::shared_ptr<IAST>;
class IDataType;
class ASTFunction;
class ASTIdentifier;
using DataTypePtr = std::shared_ptr<const IDataType>;
/// Visits AST node to rewrite alias columns in query
/// Currently works only 3 kind ways below
/// For example:
// CREATE TABLE test_table
// (
// `timestamp` DateTime,
// `value` UInt64,
// `day` Date ALIAS toDate(timestamp),
// `day1` Date ALIAS day + 1,
// `day2` Date ALIAS day1 + 1,
// `time` DateTime ALIAS timestamp
// )ENGINE = MergeTree
// PARTITION BY toYYYYMMDD(timestamp)
// ORDER BY timestamp SETTINGS index_granularity = 1;
/// 1. Rewrite the filters in query when enable optimize_respect_aliases
/// this could help with `optimize_trivial_count`, Partition Prune in `KeyCondition` and secondary indexes.
/// eg: select max(value) from test_table where day2 = today(), filters will be: ((toDate(timestamp) + 1) + 1) = today() .
/// 2. Alias on alias for `required_columns` extracted in `InterpreterSelectQuery.cpp`, it could help get all dependent physical columns for query.
/// eg: select day2 from test_table. `required_columns` can got require columns from the temporary rewritten AST `((toDate(timestamp) + 1) + 1)`.
/// 3. Help with `optimize_aggregation_in_order` and `optimize_read_in_order` in `ReadInOrderOptimizer.cpp`:
/// For queries with alias columns in `orderBy` and `groupBy`, these ASTs will not change.
/// But we generate temporary asts and generate temporary Actions to get the `InputOrderInfo`
/// eg: select day1 from test_table order by day1;
class ColumnAliasesMatcher
{
public:
using Visitor = InDepthNodeVisitor<ColumnAliasesMatcher, false>;
struct Data
{
const ColumnsDescription & columns;
/// forbidden_columns are from array join, we can't rewrite alias columns involved in array join.
/// Do not analyze joined columns.
/// They may have aliases and come to description as is.
const NameSet & forbidden_columns;
const Context & context;
/// private_aliases are from lambda, so these are local names.
NameSet private_aliases;
Data(const ColumnsDescription & columns_, const NameSet & forbidden_columns_, const Context & context_)
: columns(columns_)
, forbidden_columns(forbidden_columns_)
, context(context_)
{}
};
static void visit(ASTPtr & ast, Data & data);
static bool needChildVisit(const ASTPtr & node, const ASTPtr & child);
private:
static void visit(ASTIdentifier & node, ASTPtr & ast, Data & data);
static void visit(ASTFunction & node, ASTPtr & ast, Data & data);
};
using ColumnAliasesVisitor = ColumnAliasesMatcher::Visitor;
}

View File

@ -26,7 +26,6 @@
#include <Storages/MergeTree/MergeTreeSettings.h>
#include <Storages/CompressionCodecSelector.h>
#include <Storages/StorageS3Settings.h>
#include <Storages/LiveView/TemporaryLiveViewCleaner.h>
#include <Disks/DiskLocal.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <Interpreters/ActionLocksManager.h>
@ -429,7 +428,6 @@ struct ContextShared
if (system_logs)
system_logs->shutdown();
TemporaryLiveViewCleaner::shutdown();
DatabaseCatalog::shutdown();
/// Preemptive destruction is important, because these objects may have a refcount to ContextShared (cyclic reference).
@ -493,7 +491,6 @@ Context Context::createGlobal(ContextShared * shared)
void Context::initGlobal()
{
DatabaseCatalog::init(*this);
TemporaryLiveViewCleaner::init(*this);
}
SharedContextHolder Context::createShared()

View File

@ -13,7 +13,6 @@
#include <IO/ReadHelpers.h>
#include <IO/Operators.h>
#include <IO/ReadBufferFromString.h>
#include <Storages/StorageDistributed.h>
#include <DataStreams/IBlockInputStream.h>
#include <Interpreters/executeQuery.h>
#include <Interpreters/Cluster.h>
@ -21,7 +20,6 @@
#include <Interpreters/Context.h>
#include <Access/AccessRightsElement.h>
#include <Access/ContextAccess.h>
#include <Common/DNSResolver.h>
#include <Common/Macros.h>
#include <Common/setThreadName.h>
#include <Common/Stopwatch.h>
@ -34,7 +32,6 @@
#include <DataTypes/DataTypeString.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Poco/Timestamp.h>
#include <Poco/Net/NetException.h>
#include <common/sleep.h>
#include <common/getFQDNOrHostName.h>
#include <pcg_random.hpp>
@ -62,107 +59,46 @@ namespace ErrorCodes
}
namespace
String DDLLogEntry::toString()
{
WriteBufferFromOwnString wb;
struct HostID
{
String host_name;
UInt16 port;
Strings host_id_strings(hosts.size());
std::transform(hosts.begin(), hosts.end(), host_id_strings.begin(), HostID::applyToString);
HostID() = default;
explicit HostID(const Cluster::Address & address)
: host_name(address.host_name), port(address.port) {}
static HostID fromString(const String & host_port_str)
{
HostID res;
std::tie(res.host_name, res.port) = Cluster::Address::fromString(host_port_str);
return res;
}
String toString() const
{
return Cluster::Address::toString(host_name, port);
}
String readableString() const
{
return host_name + ":" + DB::toString(port);
}
bool isLocalAddress(UInt16 clickhouse_port) const
{
try
{
return DB::isLocalAddress(DNSResolver::instance().resolveAddress(host_name, port), clickhouse_port);
}
catch (const Poco::Net::NetException &)
{
/// Avoid "Host not found" exceptions
return false;
}
}
static String applyToString(const HostID & host_id)
{
return host_id.toString();
}
};
auto version = CURRENT_VERSION;
wb << "version: " << version << "\n";
wb << "query: " << escape << query << "\n";
wb << "hosts: " << host_id_strings << "\n";
wb << "initiator: " << initiator << "\n";
return wb.str();
}
struct DDLLogEntry
void DDLLogEntry::parse(const String & data)
{
String query;
std::vector<HostID> hosts;
String initiator; // optional
ReadBufferFromString rb(data);
static constexpr int CURRENT_VERSION = 1;
int version;
rb >> "version: " >> version >> "\n";
String toString()
{
WriteBufferFromOwnString wb;
if (version != CURRENT_VERSION)
throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unknown DDLLogEntry format version: {}", version);
Strings host_id_strings(hosts.size());
std::transform(hosts.begin(), hosts.end(), host_id_strings.begin(), HostID::applyToString);
Strings host_id_strings;
rb >> "query: " >> escape >> query >> "\n";
rb >> "hosts: " >> host_id_strings >> "\n";
auto version = CURRENT_VERSION;
wb << "version: " << version << "\n";
wb << "query: " << escape << query << "\n";
wb << "hosts: " << host_id_strings << "\n";
wb << "initiator: " << initiator << "\n";
if (!rb.eof())
rb >> "initiator: " >> initiator >> "\n";
else
initiator.clear();
return wb.str();
}
assertEOF(rb);
void parse(const String & data)
{
ReadBufferFromString rb(data);
int version;
rb >> "version: " >> version >> "\n";
if (version != CURRENT_VERSION)
throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unknown DDLLogEntry format version: {}", version);
Strings host_id_strings;
rb >> "query: " >> escape >> query >> "\n";
rb >> "hosts: " >> host_id_strings >> "\n";
if (!rb.eof())
rb >> "initiator: " >> initiator >> "\n";
else
initiator.clear();
assertEOF(rb);
hosts.resize(host_id_strings.size());
std::transform(host_id_strings.begin(), host_id_strings.end(), hosts.begin(), HostID::fromString);
}
};
hosts.resize(host_id_strings.size());
std::transform(host_id_strings.begin(), host_id_strings.end(), hosts.begin(), HostID::fromString);
}
struct DDLTask
@ -315,7 +251,7 @@ DDLWorker::DDLWorker(int pool_size_, const std::string & zk_root_dir, Context &
: context(context_)
, log(&Poco::Logger::get("DDLWorker"))
, pool_size(pool_size_)
, worker_pool(pool_size_)
, worker_pool(std::make_unique<ThreadPool>(pool_size))
{
CurrentMetrics::set(CurrentMetrics::MaxDDLEntryID, 0);
last_tasks.reserve(pool_size);
@ -352,7 +288,7 @@ DDLWorker::~DDLWorker()
stop_flag = true;
queue_updated_event->set();
cleanup_event->set();
worker_pool.wait();
worker_pool.reset();
main_thread.join();
cleanup_thread.join();
}
@ -517,7 +453,7 @@ void DDLWorker::scheduleTasks()
if (!already_processed)
{
worker_pool.scheduleOrThrowOnError([this, task_ptr = task.release()]()
worker_pool->scheduleOrThrowOnError([this, task_ptr = task.release()]()
{
setThreadName("DDLWorkerExec");
enqueueTask(DDLTaskPtr(task_ptr));
@ -1138,6 +1074,17 @@ String DDLWorker::enqueueQuery(DDLLogEntry & entry)
void DDLWorker::runMainThread()
{
auto reset_state = [&](bool reset_pool = true)
{
/// It will wait for all threads in pool to finish and will not rethrow exceptions (if any).
/// We create new thread pool to forget previous exceptions.
if (reset_pool)
worker_pool = std::make_unique<ThreadPool>(pool_size);
/// Clear other in-memory state, like server just started.
last_tasks.clear();
max_id = 0;
};
setThreadName("DDLWorker");
LOG_DEBUG(log, "Started DDLWorker thread");
@ -1153,7 +1100,12 @@ void DDLWorker::runMainThread()
catch (const Coordination::Exception & e)
{
if (!Coordination::isHardwareError(e.code))
throw; /// A logical error.
{
/// A logical error.
LOG_ERROR(log, "ZooKeeper error: {}. Failed to start DDLWorker.",getCurrentExceptionMessage(true));
reset_state(false);
assert(false); /// Catch such failures in tests with debug build
}
tryLogCurrentException(__PRETTY_FUNCTION__);
@ -1162,8 +1114,8 @@ void DDLWorker::runMainThread()
}
catch (...)
{
tryLogCurrentException(log, "Terminating. Cannot initialize DDL queue.");
return;
tryLogCurrentException(log, "Cannot initialize DDL queue.");
reset_state(false);
}
}
while (!initialized && !stop_flag);
@ -1192,14 +1144,14 @@ void DDLWorker::runMainThread()
}
else
{
LOG_ERROR(log, "Unexpected ZooKeeper error: {}. Terminating.", getCurrentExceptionMessage(true));
return;
LOG_ERROR(log, "Unexpected ZooKeeper error: {}", getCurrentExceptionMessage(true));
reset_state();
}
}
catch (...)
{
tryLogCurrentException(log, "Unexpected error, will terminate:");
return;
tryLogCurrentException(log, "Unexpected error:");
reset_state();
}
}
}

View File

@ -1,12 +1,15 @@
#pragma once
#include <DataStreams/BlockIO.h>
#include <Interpreters/Cluster.h>
#include <Interpreters/Context.h>
#include <DataStreams/BlockIO.h>
#include <Storages/IStorage_fwd.h>
#include <Poco/Net/NetException.h>
#include <Common/CurrentThread.h>
#include <Common/DNSResolver.h>
#include <Common/ThreadPool.h>
#include <Common/isLocalAddress.h>
#include <common/logger_useful.h>
#include <Storages/IStorage.h>
#include <atomic>
#include <chrono>
@ -16,24 +19,80 @@
namespace zkutil
{
class ZooKeeper;
class ZooKeeper;
}
namespace DB
{
class Context;
class ASTAlterQuery;
class AccessRightsElements;
struct DDLLogEntry;
struct HostID
{
String host_name;
UInt16 port;
HostID() = default;
explicit HostID(const Cluster::Address & address) : host_name(address.host_name), port(address.port) { }
static HostID fromString(const String & host_port_str)
{
HostID res;
std::tie(res.host_name, res.port) = Cluster::Address::fromString(host_port_str);
return res;
}
String toString() const { return Cluster::Address::toString(host_name, port); }
String readableString() const { return host_name + ":" + DB::toString(port); }
bool isLocalAddress(UInt16 clickhouse_port) const
{
try
{
return DB::isLocalAddress(DNSResolver::instance().resolveAddress(host_name, port), clickhouse_port);
}
catch (const Poco::Net::NetException &)
{
/// Avoid "Host not found" exceptions
return false;
}
}
static String applyToString(const HostID & host_id) { return host_id.toString(); }
};
struct DDLLogEntry
{
String query;
std::vector<HostID> hosts;
String initiator; // optional
static constexpr int CURRENT_VERSION = 1;
public:
String toString();
void parse(const String & data);
};
struct DDLTask;
using DDLTaskPtr = std::unique_ptr<DDLTask>;
/// Pushes distributed DDL query to the queue
BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context);
BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context, const AccessRightsElements & query_requires_access, bool query_requires_grant_option = false);
BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context, AccessRightsElements && query_requires_access, bool query_requires_grant_option = false);
BlockIO executeDDLQueryOnCluster(
const ASTPtr & query_ptr,
const Context & context,
const AccessRightsElements & query_requires_access,
bool query_requires_grant_option = false);
BlockIO executeDDLQueryOnCluster(
const ASTPtr & query_ptr,
const Context & context,
AccessRightsElements && query_requires_access,
bool query_requires_grant_option = false);
class DDLWorker
@ -127,7 +186,7 @@ private:
/// Size of the pool for query execution.
size_t pool_size = 1;
ThreadPool worker_pool;
std::unique_ptr<ThreadPool> worker_pool;
/// Cleaning starts after new node event is received if the last cleaning wasn't made sooner than N seconds ago
Int64 cleanup_delay_period = 60; // minute (in seconds)

View File

@ -8,6 +8,7 @@
#include <Poco/File.h>
#include <Common/quoteString.h>
#include <Storages/StorageMemory.h>
#include <Storages/LiveView/TemporaryLiveViewCleaner.h>
#include <Core/BackgroundSchedulePool.h>
#include <Parsers/formatAST.h>
#include <IO/ReadHelpers.h>
@ -148,10 +149,16 @@ void DatabaseCatalog::loadDatabases()
std::lock_guard lock{tables_marked_dropped_mutex};
if (!tables_marked_dropped.empty())
(*drop_task)->schedule();
/// Another background thread which drops temporary LiveViews.
/// We should start it after loadMarkedAsDroppedTables() to avoid race condition.
TemporaryLiveViewCleaner::instance().startupIfNecessary();
}
void DatabaseCatalog::shutdownImpl()
{
TemporaryLiveViewCleaner::shutdown();
if (drop_task)
(*drop_task)->deactivate();
@ -524,6 +531,7 @@ std::unique_ptr<DatabaseCatalog> DatabaseCatalog::database_catalog;
DatabaseCatalog::DatabaseCatalog(Context & global_context_)
: global_context(global_context_), log(&Poco::Logger::get("DatabaseCatalog"))
{
TemporaryLiveViewCleaner::init(global_context);
}
DatabaseCatalog & DatabaseCatalog::init(Context & global_context_)

View File

@ -62,7 +62,7 @@ public:
using Actions = std::vector<Action>;
/// This map helps to find input position bu it's name.
/// This map helps to find input position by it's name.
/// Key is a view to input::result_name.
/// Result is a list because it is allowed for inputs to have same names.
using NameToInputMap = std::unordered_map<std::string_view, std::list<size_t>>;
@ -87,6 +87,7 @@ public:
const Actions & getActions() const { return actions; }
const std::list<Node> & getNodes() const { return actions_dag->getNodes(); }
const ActionsDAG & getActionsDAG() const { return *actions_dag; }
const ColumnNumbers & getResultPositions() const { return result_positions; }
/// Get a list of input columns.
Names getRequiredColumns() const;

View File

@ -1489,23 +1489,6 @@ void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, si
columns_to_remove.insert(step.required_output[i]);
}
if (!columns_to_remove.empty())
{
auto columns = prewhere_info->prewhere_actions->getResultColumns();
auto remove_actions = std::make_shared<ActionsDAG>();
for (const auto & column : columns)
{
if (columns_to_remove.count(column.name))
{
remove_actions->addInput(column);
remove_actions->removeColumn(column.name);
}
}
prewhere_info->remove_columns_actions = std::move(remove_actions);
}
columns_to_remove_after_prewhere = std::move(columns_to_remove);
}
else if (hasFilter())

View File

@ -53,6 +53,13 @@ BlockInputStreamPtr InterpreterExistsQuery::executeImpl()
result = DatabaseCatalog::instance().isTableExist({database, exists_query->table}, context);
}
}
else if ((exists_query = query_ptr->as<ASTExistsViewQuery>()))
{
String database = context.resolveDatabase(exists_query->database);
context.checkAccess(AccessType::SHOW_TABLES, database, exists_query->table);
auto tbl = DatabaseCatalog::instance().tryGetTable({database, exists_query->table}, context);
result = tbl != nullptr && tbl->isView();
}
else if ((exists_query = query_ptr->as<ASTExistsDatabaseQuery>()))
{
String database = context.resolveDatabase(exists_query->database);

View File

@ -156,6 +156,10 @@ std::unique_ptr<IInterpreter> InterpreterFactory::get(ASTPtr & query, Context &
{
return std::make_unique<InterpreterExistsQuery>(query, context);
}
else if (query->as<ASTExistsViewQuery>())
{
return std::make_unique<InterpreterExistsQuery>(query, context);
}
else if (query->as<ASTExistsDictionaryQuery>())
{
return std::make_unique<InterpreterExistsQuery>(query, context);

View File

@ -106,7 +106,7 @@ Block InterpreterInsertQuery::getSampleBlock(
/// The table does not have a column with that name
if (!table_sample.has(current_name))
throw Exception("No such column " + current_name + " in table " + query.table_id.getNameForLogs(),
throw Exception("No such column " + current_name + " in table " + table->getStorageID().getNameForLogs(),
ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
if (!allow_materialized && !table_sample_non_materialized.has(current_name))

View File

@ -33,6 +33,7 @@
#include <Interpreters/JoinedTables.h>
#include <Interpreters/OpenTelemetrySpanLog.h>
#include <Interpreters/QueryAliasesVisitor.h>
#include <Interpreters/replaceAliasColumnsInQuery.h>
#include <Processors/Pipe.h>
#include <Processors/QueryPlan/AddingDelayedSourceStep.h>
@ -1223,6 +1224,7 @@ void InterpreterSelectQuery::executeFetchColumns(
temp_query_info.query = query_ptr;
temp_query_info.syntax_analyzer_result = syntax_analyzer_result;
temp_query_info.sets = query_analyzer->getPreparedSets();
num_rows = storage->totalRowsByPartitionPredicate(temp_query_info, *context);
}
if (num_rows)
@ -1329,9 +1331,12 @@ void InterpreterSelectQuery::executeFetchColumns(
if (is_alias)
{
auto column_decl = storage_columns.get(column);
/// TODO: can make CAST only if the type is different (but requires SyntaxAnalyzer).
auto cast_column_default = addTypeConversionToAST(column_default->expression->clone(), column_decl.type->getName());
column_expr = setAlias(cast_column_default->clone(), column);
column_expr = column_default->expression->clone();
// recursive visit for alias to alias
replaceAliasColumnsInQuery(column_expr, metadata_snapshot->getColumns(), syntax_analyzer_result->getArrayJoinSourceNameSet(), *context);
column_expr = addTypeConversionToAST(std::move(column_expr), column_decl.type->getName(), metadata_snapshot->getColumns().getAll(), *context);
column_expr = setAlias(column_expr, column);
}
else
column_expr = std::make_shared<ASTIdentifier>(column);
@ -1543,7 +1548,7 @@ void InterpreterSelectQuery::executeFetchColumns(
getSortDescriptionFromGroupBy(query),
query_info.syntax_analyzer_result);
query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot);
query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, *context);
}
StreamLocalLimits limits;

View File

@ -467,8 +467,11 @@ void InterpreterSystemQuery::restartReplicas(Context & system_context)
guard.second = catalog.getDDLGuard(guard.first.database_name, guard.first.table_name);
ThreadPool pool(std::min(size_t(getNumberOfPhysicalCPUCores()), replica_names.size()));
for (auto & table : replica_names)
pool.scheduleOrThrowOnError([&]() { tryRestartReplica(table, system_context, false); });
for (auto & replica : replica_names)
{
LOG_TRACE(log, "Restarting replica on {}", replica.getNameForLogs());
pool.scheduleOrThrowOnError([&]() { tryRestartReplica(replica, system_context, false); });
}
pool.wait();
}

View File

@ -78,6 +78,9 @@ void RewriteAnyFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast, Da
auto & func_arguments = func.arguments->children;
if (func_arguments.size() != 1)
return;
const auto * first_arg_func = func_arguments[0]->as<ASTFunction>();
if (!first_arg_func || first_arg_func->arguments->children.empty())
return;

View File

@ -230,16 +230,8 @@ void TableJoin::addJoinedColumn(const NameAndTypePair & joined_column)
void TableJoin::addJoinedColumnsAndCorrectNullability(ColumnsWithTypeAndName & columns) const
{
for (auto & col : columns)
{
/// Materialize column.
/// Column is not empty if it is constant, but after Join all constants will be materialized.
/// So, we need remove constants from header.
if (col.column)
col.column = nullptr;
if (leftBecomeNullable(col.type))
col.type = makeNullable(col.type);
}
for (const auto & col : columns_added_by_join)
{

View File

@ -18,6 +18,7 @@
#include <Interpreters/ExpressionActions.h> /// getSmallestColumn()
#include <Interpreters/getTableExpressions.h>
#include <Interpreters/TreeOptimizer.h>
#include <Interpreters/replaceAliasColumnsInQuery.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
@ -427,6 +428,7 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & sele
}
}
std::vector<const ASTFunction *> getAggregates(ASTPtr & query, const ASTSelectQuery & select_query)
{
/// There can not be aggregate functions inside the WHERE and PREWHERE.
@ -730,6 +732,13 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
required_source_columns.swap(source_columns);
}
NameSet TreeRewriterResult::getArrayJoinSourceNameSet() const
{
NameSet forbidden_columns;
for (const auto & elem : array_join_result_to_source)
forbidden_columns.insert(elem.first);
return forbidden_columns;
}
TreeRewriterResultPtr TreeRewriter::analyzeSelect(
ASTPtr & query,
@ -793,6 +802,12 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
result.analyzed_join->table_join);
collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases);
/// rewrite filters for select query, must go after getArrayJoinedColumns
if (settings.optimize_respect_aliases && result.metadata_snapshot)
{
replaceAliasColumnsInQuery(query, result.metadata_snapshot->getColumns(), result.getArrayJoinSourceNameSet(), context);
}
result.aggregates = getAggregates(query, *select_query);
result.window_function_asts = getWindowFunctions(query, *select_query);
result.collectUsedColumns(query, true);

View File

@ -70,6 +70,7 @@ struct TreeRewriterResult
void collectSourceColumns(bool add_special);
void collectUsedColumns(const ASTPtr & query, bool is_select);
Names requiredSourceColumns() const { return required_source_columns.getNames(); }
NameSet getArrayJoinSourceNameSet() const;
const Scalars & getScalars() const { return scalars; }
};

View File

@ -4,11 +4,20 @@
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTWithAlias.h>
#include <Storages/ColumnsDescription.h>
#include <Interpreters/Context.h>
#include <Interpreters/TreeRewriter.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/ExpressionActions.h>
namespace DB
{
namespace ErrorCodes
{
extern const int THERE_IS_NO_DEFAULT_VALUE;
}
ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name)
{
auto func = makeASTFunction("cast", ast, std::make_shared<ASTLiteral>(type_name));
@ -23,4 +32,23 @@ ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name)
return func;
}
ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name, const NamesAndTypesList & all_columns, const Context & context)
{
auto syntax_analyzer_result = TreeRewriter(context).analyze(ast, all_columns);
const auto actions = ExpressionAnalyzer(ast, syntax_analyzer_result, context).getActions(true);
for (const auto & action : actions->getActions())
if (action.node->type == ActionsDAG::ActionType::ARRAY_JOIN)
throw Exception("Unsupported default value that requires ARRAY JOIN action", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE);
auto block = actions->getSampleBlock();
auto desc_type = block.getByName(ast->getColumnName()).type;
if (desc_type->getName() != type_name)
return addTypeConversionToAST(std::move(ast), type_name);
return std::move(ast);
}
}

View File

@ -6,8 +6,12 @@
namespace DB
{
class Context;
class NamesAndTypesList;
/// It will produce an expression with CAST to get an AST with the required type.
ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name);
// If same type, then ignore the wrapper of CAST function
ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name, const NamesAndTypesList & all_columns, const Context & context);
}

View File

@ -0,0 +1,16 @@
#include <Interpreters/replaceAliasColumnsInQuery.h>
#include <Interpreters/ColumnAliasesVisitor.h>
#include <Storages/ColumnsDescription.h>
#include <Parsers/ASTSelectQuery.h>
namespace DB
{
void replaceAliasColumnsInQuery(ASTPtr & ast, const ColumnsDescription & columns, const NameSet & forbidden_columns, const Context & context)
{
ColumnAliasesVisitor::Data aliase_column_data(columns, forbidden_columns, context);
ColumnAliasesVisitor aliase_column_visitor(aliase_column_data);
aliase_column_visitor.visit(ast);
}
}

View File

@ -0,0 +1,14 @@
#pragma once
#include <common/types.h>
#include <Core/Names.h>
#include <Parsers/IAST_fwd.h>
namespace DB
{
class ColumnsDescription;
class Context;
void replaceAliasColumnsInQuery(ASTPtr & ast, const ColumnsDescription & columns, const NameSet & forbidden_columns, const Context & context);
}

View File

@ -37,6 +37,7 @@ SRCS(
ClusterProxy/SelectStreamFactory.cpp
ClusterProxy/executeQuery.cpp
CollectJoinOnKeysVisitor.cpp
ColumnAliasesVisitor.cpp
Context.cpp
CrashLog.cpp
CrossToInnerJoinVisitor.cpp
@ -157,6 +158,7 @@ SRCS(
interpretSubquery.cpp
join_common.cpp
loadMetadata.cpp
replaceAliasColumnsInQuery.cpp
processColumnTransformers.cpp
sortBlock.cpp

View File

@ -261,11 +261,13 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ParserIdentifier id_parser;
ParserKeyword distinct("DISTINCT");
ParserKeyword all("ALL");
ParserExpressionList contents(false);
ParserSelectWithUnionQuery select;
ParserKeyword over("OVER");
bool has_distinct_modifier = false;
bool has_all = false;
bool has_distinct = false;
ASTPtr identifier;
ASTPtr query;
@ -279,10 +281,34 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
return false;
++pos;
auto pos_after_bracket = pos;
auto old_expected = expected;
if (all.ignore(pos, expected))
has_all = true;
if (distinct.ignore(pos, expected))
has_distinct_modifier = true;
else
has_distinct = true;
if (!has_all && all.ignore(pos, expected))
has_all = true;
if (has_all && has_distinct)
return false;
if (has_all || has_distinct)
{
/// case f(ALL), f(ALL, x), f(DISTINCT), f(DISTINCT, x), ALL and DISTINCT should be treat as identifier
if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket)
{
pos = pos_after_bracket;
expected = old_expected;
has_all = false;
has_distinct = false;
}
}
if (!has_distinct && !has_all)
{
auto old_pos = pos;
auto maybe_an_subquery = pos->type == TokenType::OpeningRoundBracket;
@ -370,14 +396,37 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
++pos;
/// Parametric aggregate functions cannot have DISTINCT in parameters list.
if (has_distinct_modifier)
if (has_distinct)
return false;
expr_list_params = expr_list_args;
expr_list_args = nullptr;
pos_after_bracket = pos;
old_expected = expected;
if (all.ignore(pos, expected))
has_all = true;
if (distinct.ignore(pos, expected))
has_distinct_modifier = true;
has_distinct = true;
if (!has_all && all.ignore(pos, expected))
has_all = true;
if (has_all && has_distinct)
return false;
if (has_all || has_distinct)
{
/// case f(ALL), f(ALL, x), f(DISTINCT), f(DISTINCT, x), ALL and DISTINCT should be treat as identifier
if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket)
{
pos = pos_after_bracket;
expected = old_expected;
has_distinct = false;
}
}
if (!contents.parse(pos, expr_list_args, expected))
return false;
@ -391,7 +440,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
tryGetIdentifierNameInto(identifier, function_node->name);
/// func(DISTINCT ...) is equivalent to funcDistinct(...)
if (has_distinct_modifier)
if (has_distinct)
function_node->name += "Distinct";
function_node->arguments = expr_list_args;

View File

@ -30,6 +30,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
node = select_query;
ParserKeyword s_select("SELECT");
ParserKeyword s_all("ALL");
ParserKeyword s_distinct("DISTINCT");
ParserKeyword s_from("FROM");
ParserKeyword s_prewhere("PREWHERE");
@ -91,14 +92,24 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
}
}
/// SELECT [DISTINCT] [TOP N [WITH TIES]] expr list
/// SELECT [ALL/DISTINCT] [TOP N [WITH TIES]] expr list
{
bool has_all = false;
if (!s_select.ignore(pos, expected))
return false;
if (s_all.ignore(pos, expected))
has_all = true;
if (s_distinct.ignore(pos, expected))
select_query->distinct = true;
if (!has_all && s_all.ignore(pos, expected))
has_all = true;
if (has_all && select_query->distinct)
return false;
if (s_top.ignore(pos, expected))
{
ParserNumber num;

Some files were not shown because too many files have changed in this diff Show More