mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
Merge remote-tracking branch 'origin' into add-sqlancer-test-docker
This commit is contained in:
commit
ea1f15f619
@ -112,11 +112,13 @@ static void writeSignalIDtoSignalPipe(int sig)
|
||||
/** Signal handler for HUP / USR1 */
|
||||
static void closeLogsSignalHandler(int sig, siginfo_t *, void *)
|
||||
{
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
writeSignalIDtoSignalPipe(sig);
|
||||
}
|
||||
|
||||
static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *)
|
||||
{
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
writeSignalIDtoSignalPipe(sig);
|
||||
}
|
||||
|
||||
@ -125,6 +127,7 @@ static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *)
|
||||
*/
|
||||
static void signalHandler(int sig, siginfo_t * info, void * context)
|
||||
{
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
auto saved_errno = errno; /// We must restore previous value of errno in signal handler.
|
||||
|
||||
char buf[signal_pipe_buf_size];
|
||||
|
@ -3,7 +3,7 @@ SET(VERSION_REVISION 54445)
|
||||
SET(VERSION_MAJOR 21)
|
||||
SET(VERSION_MINOR 1)
|
||||
SET(VERSION_PATCH 1)
|
||||
SET(VERSION_GITHASH 53d0c9fa7255aa1dc48991d19f4246ff71cc2fd7)
|
||||
SET(VERSION_DESCRIBE v21.1.1.5643-prestable)
|
||||
SET(VERSION_STRING 21.1.1.5643)
|
||||
SET(VERSION_GITHASH 667dd0cf0ccecdaa6f334177b7ece2f53bd196a1)
|
||||
SET(VERSION_DESCRIBE v21.1.1.5646-prestable)
|
||||
SET(VERSION_STRING 21.1.1.5646)
|
||||
# end of autochange
|
||||
|
@ -1,5 +1,4 @@
|
||||
# Freebsd: contrib/cppkafka/include/cppkafka/detail/endianness.h:53:23: error: 'betoh16' was not declared in this scope
|
||||
if (NOT ARCH_ARM AND NOT OS_FREEBSD AND OPENSSL_FOUND)
|
||||
if (NOT ARCH_ARM AND OPENSSL_FOUND)
|
||||
option (ENABLE_RDKAFKA "Enable kafka" ${ENABLE_LIBRARIES})
|
||||
elseif(ENABLE_RDKAFKA AND NOT OPENSSL_FOUND)
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use librdkafka without SSL")
|
||||
|
@ -1,2 +1,2 @@
|
||||
wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz
|
||||
tar xJf MacOSX10.14.sdk.tar.xz --strip-components=1
|
||||
wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz
|
||||
tar xJf MacOSX10.15.sdk.tar.xz --strip-components=1
|
||||
|
2
contrib/libcxx
vendored
2
contrib/libcxx
vendored
@ -1 +1 @@
|
||||
Subproject commit 95650a0db4399ee871d5fd698ad12384fe9fa964
|
||||
Subproject commit 8b80a151d12b98ffe2d0c22f7cec12c3b9ff88d7
|
@ -5,6 +5,8 @@ set(LIBCXX_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcxx)
|
||||
set(SRCS
|
||||
${LIBCXX_SOURCE_DIR}/src/algorithm.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/any.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/atomic.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/barrier.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/bind.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/charconv.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/chrono.cpp
|
||||
@ -20,6 +22,7 @@ ${LIBCXX_SOURCE_DIR}/src/functional.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/future.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/hash.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/ios.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/ios.instantiations.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/iostream.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/locale.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/memory.cpp
|
||||
@ -28,6 +31,7 @@ ${LIBCXX_SOURCE_DIR}/src/mutex_destructor.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/new.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/optional.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/random.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/random_shuffle.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/regex.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/shared_mutex.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/stdexcept.cpp
|
||||
|
2
contrib/libcxxabi
vendored
2
contrib/libcxxabi
vendored
@ -1 +1 @@
|
||||
Subproject commit 1ebc83af4c06dbcd56b4d166c1314a7d4c1173f9
|
||||
Subproject commit df8f1e727dbc9e2bedf2282096fa189dc3fe0076
|
@ -11,7 +11,6 @@ ${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp
|
||||
${LIBCXXABI_SOURCE_DIR}/src/stdlib_exception.cpp
|
||||
${LIBCXXABI_SOURCE_DIR}/src/abort_message.cpp
|
||||
${LIBCXXABI_SOURCE_DIR}/src/cxa_demangle.cpp
|
||||
${LIBCXXABI_SOURCE_DIR}/src/cxa_unexpected.cpp
|
||||
${LIBCXXABI_SOURCE_DIR}/src/cxa_exception.cpp
|
||||
${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp
|
||||
${LIBCXXABI_SOURCE_DIR}/src/cxa_exception_storage.cpp
|
||||
|
@ -83,7 +83,8 @@
|
||||
#if (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ <= 101400)
|
||||
#define _TTHREAD_EMULATE_TIMESPEC_GET_
|
||||
#endif
|
||||
|
||||
#elif defined(__FreeBSD__)
|
||||
#define HAVE_PTHREAD_SETNAME_FREEBSD 1
|
||||
#else
|
||||
// pthread_setname_gnu
|
||||
#define HAVE_PTHREAD_SETNAME_GNU 1
|
||||
|
@ -45,7 +45,8 @@
|
||||
"name": "yandex/clickhouse-stateless-test",
|
||||
"dependent": [
|
||||
"docker/test/stateful",
|
||||
"docker/test/coverage"
|
||||
"docker/test/coverage",
|
||||
"docker/test/unit"
|
||||
]
|
||||
},
|
||||
"docker/test/stateless_pytest": {
|
||||
@ -134,7 +135,9 @@
|
||||
"name": "yandex/clickhouse-test-base",
|
||||
"dependent": [
|
||||
"docker/test/stateless",
|
||||
"docker/test/stateless_pytest"
|
||||
"docker/test/stateless_unbundled",
|
||||
"docker/test/stateless_pytest",
|
||||
"docker/test/integration/base"
|
||||
]
|
||||
},
|
||||
"docker/packager/unbundled": {
|
||||
|
@ -82,7 +82,7 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \
|
||||
&& rm -rf cctools-port
|
||||
|
||||
# Download toolchain for Darwin
|
||||
RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz
|
||||
RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz
|
||||
|
||||
# Download toolchain for ARM
|
||||
# It contains all required headers and libraries. Note that it's named as "gcc" but actually we are using clang for cross compiling.
|
||||
|
@ -3,7 +3,7 @@
|
||||
set -x -e
|
||||
|
||||
mkdir -p build/cmake/toolchain/darwin-x86_64
|
||||
tar xJf MacOSX10.14.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
tar xJf MacOSX10.15.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
|
||||
mkdir -p build/cmake/toolchain/linux-aarch64
|
||||
tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build/cmake/toolchain/linux-aarch64 --strip-components=1
|
||||
|
@ -4,5 +4,5 @@ alpine-root/install/*
|
||||
# docs (looks useless)
|
||||
alpine-root/usr/share/doc/*
|
||||
|
||||
# packages, etc. (used by prepare.sh)
|
||||
alpine-root/tgz-packages/*
|
||||
# packages, etc. (used by alpine-build.sh)
|
||||
tgz-packages/*
|
||||
|
3
docker/server/.gitignore
vendored
3
docker/server/.gitignore
vendored
@ -1 +1,2 @@
|
||||
alpine-root/*
|
||||
alpine-root/*
|
||||
tgz-packages/*
|
||||
|
@ -16,7 +16,7 @@ RUN addgroup clickhouse \
|
||||
&& chown root:clickhouse /var/log/clickhouse-server \
|
||||
&& chmod 775 /var/log/clickhouse-server \
|
||||
&& chmod +x /entrypoint.sh \
|
||||
&& apk add --no-cache su-exec
|
||||
&& apk add --no-cache su-exec bash
|
||||
|
||||
EXPOSE 9000 8123 9009
|
||||
|
||||
|
@ -4,6 +4,7 @@ set -x
|
||||
REPO_CHANNEL="${REPO_CHANNEL:-stable}" # lts / testing / prestable / etc
|
||||
REPO_URL="${REPO_URL:-"https://repo.yandex.ru/clickhouse/tgz/${REPO_CHANNEL}"}"
|
||||
VERSION="${VERSION:-20.9.3.45}"
|
||||
DOCKER_IMAGE="${DOCKER_IMAGE:-yandex/clickhouse-server}"
|
||||
|
||||
# where original files live
|
||||
DOCKER_BUILD_FOLDER="${BASH_SOURCE%/*}"
|
||||
@ -11,12 +12,12 @@ DOCKER_BUILD_FOLDER="${BASH_SOURCE%/*}"
|
||||
# we will create root for our image here
|
||||
CONTAINER_ROOT_FOLDER="${DOCKER_BUILD_FOLDER}/alpine-root"
|
||||
|
||||
# where to put downloaded tgz
|
||||
TGZ_PACKAGES_FOLDER="${CONTAINER_ROOT_FOLDER}/tgz-packages"
|
||||
|
||||
# clean up the root from old runs
|
||||
# clean up the root from old runs, it's reconstructed each time
|
||||
rm -rf "$CONTAINER_ROOT_FOLDER"
|
||||
mkdir -p "$CONTAINER_ROOT_FOLDER"
|
||||
|
||||
# where to put downloaded tgz
|
||||
TGZ_PACKAGES_FOLDER="${DOCKER_BUILD_FOLDER}/tgz-packages"
|
||||
mkdir -p "$TGZ_PACKAGES_FOLDER"
|
||||
|
||||
PACKAGES=( "clickhouse-client" "clickhouse-server" "clickhouse-common-static" )
|
||||
@ -24,7 +25,7 @@ PACKAGES=( "clickhouse-client" "clickhouse-server" "clickhouse-common-static" )
|
||||
# download tars from the repo
|
||||
for package in "${PACKAGES[@]}"
|
||||
do
|
||||
wget -q --show-progress "${REPO_URL}/${package}-${VERSION}.tgz" -O "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz"
|
||||
wget -c -q --show-progress "${REPO_URL}/${package}-${VERSION}.tgz" -O "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz"
|
||||
done
|
||||
|
||||
# unpack tars
|
||||
@ -42,7 +43,7 @@ mkdir -p "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/users.d" \
|
||||
"${CONTAINER_ROOT_FOLDER}/lib64"
|
||||
|
||||
cp "${DOCKER_BUILD_FOLDER}/docker_related_config.xml" "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/config.d/"
|
||||
cp "${DOCKER_BUILD_FOLDER}/entrypoint.alpine.sh" "${CONTAINER_ROOT_FOLDER}/entrypoint.sh"
|
||||
cp "${DOCKER_BUILD_FOLDER}/entrypoint.sh" "${CONTAINER_ROOT_FOLDER}/entrypoint.sh"
|
||||
|
||||
## get glibc components from ubuntu 20.04 and put them to expected place
|
||||
docker pull ubuntu:20.04
|
||||
@ -56,4 +57,5 @@ docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_dns.so.2 "${CONTAIN
|
||||
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libresolv.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
|
||||
docker cp -L "${ubuntu20image}":/lib64/ld-linux-x86-64.so.2 "${CONTAINER_ROOT_FOLDER}/lib64"
|
||||
|
||||
docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "yandex/clickhouse-server:${VERSION}-alpine" --pull
|
||||
docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "${DOCKER_IMAGE}:${VERSION}-alpine" --pull
|
||||
rm -rf "$CONTAINER_ROOT_FOLDER"
|
||||
|
@ -1,152 +0,0 @@
|
||||
#!/bin/sh
|
||||
#set -x
|
||||
|
||||
DO_CHOWN=1
|
||||
if [ "$CLICKHOUSE_DO_NOT_CHOWN" = 1 ]; then
|
||||
DO_CHOWN=0
|
||||
fi
|
||||
|
||||
CLICKHOUSE_UID="${CLICKHOUSE_UID:-"$(id -u clickhouse)"}"
|
||||
CLICKHOUSE_GID="${CLICKHOUSE_GID:-"$(id -g clickhouse)"}"
|
||||
|
||||
# support --user
|
||||
if [ "$(id -u)" = "0" ]; then
|
||||
USER=$CLICKHOUSE_UID
|
||||
GROUP=$CLICKHOUSE_GID
|
||||
# busybox has setuidgid & chpst buildin
|
||||
gosu="su-exec $USER:$GROUP"
|
||||
else
|
||||
USER="$(id -u)"
|
||||
GROUP="$(id -g)"
|
||||
gosu=""
|
||||
DO_CHOWN=0
|
||||
fi
|
||||
|
||||
# set some vars
|
||||
CLICKHOUSE_CONFIG="${CLICKHOUSE_CONFIG:-/etc/clickhouse-server/config.xml}"
|
||||
|
||||
# port is needed to check if clickhouse-server is ready for connections
|
||||
HTTP_PORT="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=http_port)"
|
||||
|
||||
# get CH directories locations
|
||||
DATA_DIR="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=path || true)"
|
||||
TMP_DIR="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=tmp_path || true)"
|
||||
USER_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=user_files_path || true)"
|
||||
LOG_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=logger.log || true)"
|
||||
LOG_DIR="$(dirname "${LOG_PATH}" || true)"
|
||||
ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=logger.errorlog || true)"
|
||||
ERROR_LOG_DIR="$(dirname "${ERROR_LOG_PATH}" || true)"
|
||||
FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=format_schema_path || true)"
|
||||
|
||||
CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
|
||||
CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}"
|
||||
CLICKHOUSE_DB="${CLICKHOUSE_DB:-}"
|
||||
|
||||
for dir in "$DATA_DIR" \
|
||||
"$ERROR_LOG_DIR" \
|
||||
"$LOG_DIR" \
|
||||
"$TMP_DIR" \
|
||||
"$USER_PATH" \
|
||||
"$FORMAT_SCHEMA_PATH"
|
||||
do
|
||||
# check if variable not empty
|
||||
[ -z "$dir" ] && continue
|
||||
# ensure directories exist
|
||||
if ! mkdir -p "$dir"; then
|
||||
echo "Couldn't create necessary directory: $dir"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$DO_CHOWN" = "1" ]; then
|
||||
# ensure proper directories permissions
|
||||
chown -R "$USER:$GROUP" "$dir"
|
||||
elif [ "$(stat -c %u "$dir")" != "$USER" ]; then
|
||||
echo "Necessary directory '$dir' isn't owned by user with id '$USER'"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# if clickhouse user is defined - create it (user "default" already exists out of box)
|
||||
if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CLICKHOUSE_PASSWORD" ]; then
|
||||
echo "$0: create new user '$CLICKHOUSE_USER' instead 'default'"
|
||||
cat <<EOT > /etc/clickhouse-server/users.d/default-user.xml
|
||||
<yandex>
|
||||
<!-- Docs: <https://clickhouse.tech/docs/en/operations/settings/settings_users/> -->
|
||||
<users>
|
||||
<!-- Remove default user -->
|
||||
<default remove="remove">
|
||||
</default>
|
||||
|
||||
<${CLICKHOUSE_USER}>
|
||||
<profile>default</profile>
|
||||
<networks>
|
||||
<ip>::/0</ip>
|
||||
</networks>
|
||||
<password>${CLICKHOUSE_PASSWORD}</password>
|
||||
<quota>default</quota>
|
||||
</${CLICKHOUSE_USER}>
|
||||
</users>
|
||||
</yandex>
|
||||
EOT
|
||||
fi
|
||||
|
||||
if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
|
||||
# Listen only on localhost until the initialization is done
|
||||
$gosu /usr/bin/clickhouse-server --config-file="${CLICKHOUSE_CONFIG}" -- --listen_host=127.0.0.1 &
|
||||
pid="$!"
|
||||
|
||||
# check if clickhouse is ready to accept connections
|
||||
# will try to send ping clickhouse via http_port (max 6 retries, with 1 sec timeout and 1 sec delay between retries)
|
||||
tries=6
|
||||
while ! wget --spider -T 1 -q "http://localhost:$HTTP_PORT/ping" 2>/dev/null; do
|
||||
if [ "$tries" -le "0" ]; then
|
||||
echo >&2 'ClickHouse init process failed.'
|
||||
exit 1
|
||||
fi
|
||||
tries=$(( tries-1 ))
|
||||
sleep 1
|
||||
done
|
||||
|
||||
if [ -n "$CLICKHOUSE_PASSWORD" ]; then
|
||||
printf -v WITH_PASSWORD '%s %q' "--password" "$CLICKHOUSE_PASSWORD"
|
||||
fi
|
||||
|
||||
clickhouseclient="clickhouse-client --multiquery -u $CLICKHOUSE_USER $WITH_PASSWORD "
|
||||
|
||||
# create default database, if defined
|
||||
if [ -n "$CLICKHOUSE_DB" ]; then
|
||||
echo "$0: create database '$CLICKHOUSE_DB'"
|
||||
"$clickhouseclient" -q "CREATE DATABASE IF NOT EXISTS $CLICKHOUSE_DB";
|
||||
fi
|
||||
|
||||
for f in /docker-entrypoint-initdb.d/*; do
|
||||
case "$f" in
|
||||
*.sh)
|
||||
if [ -x "$f" ]; then
|
||||
echo "$0: running $f"
|
||||
"$f"
|
||||
else
|
||||
echo "$0: sourcing $f"
|
||||
. "$f"
|
||||
fi
|
||||
;;
|
||||
*.sql) echo "$0: running $f"; "$clickhouseclient" < "$f" ; echo ;;
|
||||
*.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "$clickhouseclient"; echo ;;
|
||||
*) echo "$0: ignoring $f" ;;
|
||||
esac
|
||||
echo
|
||||
done
|
||||
|
||||
if ! kill -s TERM "$pid" || ! wait "$pid"; then
|
||||
echo >&2 'Finishing of ClickHouse init process failed.'
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments
|
||||
if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
|
||||
exec $gosu /usr/bin/clickhouse-server --config-file="${CLICKHOUSE_CONFIG}" "$@"
|
||||
fi
|
||||
|
||||
# Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image
|
||||
exec "$@"
|
71
docker/server/entrypoint.sh
Normal file → Executable file
71
docker/server/entrypoint.sh
Normal file → Executable file
@ -1,7 +1,10 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eo pipefail
|
||||
shopt -s nullglob
|
||||
|
||||
DO_CHOWN=1
|
||||
if [ "$CLICKHOUSE_DO_NOT_CHOWN" = 1 ]; then
|
||||
if [ "${CLICKHOUSE_DO_NOT_CHOWN:-0}" = "1" ]; then
|
||||
DO_CHOWN=0
|
||||
fi
|
||||
|
||||
@ -9,10 +12,17 @@ CLICKHOUSE_UID="${CLICKHOUSE_UID:-"$(id -u clickhouse)"}"
|
||||
CLICKHOUSE_GID="${CLICKHOUSE_GID:-"$(id -g clickhouse)"}"
|
||||
|
||||
# support --user
|
||||
if [ x"$UID" == x0 ]; then
|
||||
if [ "$(id -u)" = "0" ]; then
|
||||
USER=$CLICKHOUSE_UID
|
||||
GROUP=$CLICKHOUSE_GID
|
||||
gosu="gosu $USER:$GROUP"
|
||||
if command -v gosu &> /dev/null; then
|
||||
gosu="gosu $USER:$GROUP"
|
||||
elif command -v su-exec &> /dev/null; then
|
||||
gosu="su-exec $USER:$GROUP"
|
||||
else
|
||||
echo "No gosu/su-exec detected!"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
USER="$(id -u)"
|
||||
GROUP="$(id -g)"
|
||||
@ -23,18 +33,23 @@ fi
|
||||
# set some vars
|
||||
CLICKHOUSE_CONFIG="${CLICKHOUSE_CONFIG:-/etc/clickhouse-server/config.xml}"
|
||||
|
||||
if ! $gosu test -f "$CLICKHOUSE_CONFIG" -a -r "$CLICKHOUSE_CONFIG"; then
|
||||
echo "Configuration file '$dir' isn't readable by user with id '$USER'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# port is needed to check if clickhouse-server is ready for connections
|
||||
HTTP_PORT="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=http_port)"
|
||||
HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port)"
|
||||
|
||||
# get CH directories locations
|
||||
DATA_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=path || true)"
|
||||
TMP_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=tmp_path || true)"
|
||||
USER_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=user_files_path || true)"
|
||||
LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.log || true)"
|
||||
LOG_DIR="$(dirname $LOG_PATH || true)"
|
||||
ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.errorlog || true)"
|
||||
ERROR_LOG_DIR="$(dirname $ERROR_LOG_PATH || true)"
|
||||
FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=format_schema_path || true)"
|
||||
DATA_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=path || true)"
|
||||
TMP_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=tmp_path || true)"
|
||||
USER_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=user_files_path || true)"
|
||||
LOG_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=logger.log || true)"
|
||||
LOG_DIR="$(dirname "$LOG_PATH" || true)"
|
||||
ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=logger.errorlog || true)"
|
||||
ERROR_LOG_DIR="$(dirname "$ERROR_LOG_PATH" || true)"
|
||||
FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=format_schema_path || true)"
|
||||
|
||||
CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
|
||||
CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}"
|
||||
@ -58,8 +73,8 @@ do
|
||||
if [ "$DO_CHOWN" = "1" ]; then
|
||||
# ensure proper directories permissions
|
||||
chown -R "$USER:$GROUP" "$dir"
|
||||
elif [ "$(stat -c %u "$dir")" != "$USER" ]; then
|
||||
echo "Necessary directory '$dir' isn't owned by user with id '$USER'"
|
||||
elif ! $gosu test -d "$dir" -a -w "$dir" -a -r "$dir"; then
|
||||
echo "Necessary directory '$dir' isn't accessible by user with id '$USER'"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
@ -90,21 +105,22 @@ fi
|
||||
|
||||
if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
|
||||
# Listen only on localhost until the initialization is done
|
||||
$gosu /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG -- --listen_host=127.0.0.1 &
|
||||
$gosu /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" -- --listen_host=127.0.0.1 &
|
||||
pid="$!"
|
||||
|
||||
# check if clickhouse is ready to accept connections
|
||||
# will try to send ping clickhouse via http_port (max 12 retries by default, with 1 sec delay)
|
||||
if ! wget --spider --quiet --prefer-family=IPv6 --tries="${CLICKHOUSE_INIT_TIMEOUT:-12}" --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then
|
||||
echo >&2 'ClickHouse init process failed.'
|
||||
exit 1
|
||||
fi
|
||||
# will try to send ping clickhouse via http_port (max 12 retries by default, with 1 sec timeout and 1 sec delay between retries)
|
||||
tries=${CLICKHOUSE_INIT_TIMEOUT:-12}
|
||||
while ! wget --spider -T 1 -q "http://127.0.0.1:$HTTP_PORT/ping" 2>/dev/null; do
|
||||
if [ "$tries" -le "0" ]; then
|
||||
echo >&2 'ClickHouse init process failed.'
|
||||
exit 1
|
||||
fi
|
||||
tries=$(( tries-1 ))
|
||||
sleep 1
|
||||
done
|
||||
|
||||
if [ ! -z "$CLICKHOUSE_PASSWORD" ]; then
|
||||
printf -v WITH_PASSWORD '%s %q' "--password" "$CLICKHOUSE_PASSWORD"
|
||||
fi
|
||||
|
||||
clickhouseclient=( clickhouse-client --multiquery -u $CLICKHOUSE_USER $WITH_PASSWORD )
|
||||
clickhouseclient=( clickhouse-client --multiquery -u "$CLICKHOUSE_USER" --password "$CLICKHOUSE_PASSWORD" )
|
||||
|
||||
echo
|
||||
|
||||
@ -122,10 +138,11 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
|
||||
"$f"
|
||||
else
|
||||
echo "$0: sourcing $f"
|
||||
# shellcheck source=/dev/null
|
||||
. "$f"
|
||||
fi
|
||||
;;
|
||||
*.sql) echo "$0: running $f"; cat "$f" | "${clickhouseclient[@]}" ; echo ;;
|
||||
*.sql) echo "$0: running $f"; "${clickhouseclient[@]}" < "$f" ; echo ;;
|
||||
*.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "${clickhouseclient[@]}"; echo ;;
|
||||
*) echo "$0: ignoring $f" ;;
|
||||
esac
|
||||
@ -140,7 +157,7 @@ fi
|
||||
|
||||
# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments
|
||||
if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
|
||||
exec $gosu /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG "$@"
|
||||
exec $gosu /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" "$@"
|
||||
fi
|
||||
|
||||
# Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image
|
||||
|
@ -329,6 +329,7 @@ function run_tests
|
||||
|
||||
# nc - command not found
|
||||
01601_proxy_protocol
|
||||
01622_defaults_for_url_engine
|
||||
)
|
||||
|
||||
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
|
@ -30,3 +30,4 @@ RUN curl 'https://cdn.mysql.com//Downloads/Connector-ODBC/8.0/mysql-connector-od
|
||||
|
||||
ENV TZ=Europe/Moscow
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
|
@ -66,3 +66,6 @@ function run_tests()
|
||||
export -f run_tests
|
||||
|
||||
timeout "$MAX_RUN_TIME" bash -c run_tests ||:
|
||||
|
||||
tar -chf /test_output/text_log_dump.tar /var/lib/clickhouse/data/system/text_log ||:
|
||||
tar -chf /test_output/query_log_dump.tar /var/lib/clickhouse/data/system/query_log ||:
|
||||
|
@ -86,3 +86,4 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
COPY run.sh /
|
||||
CMD ["/bin/bash", "/run.sh"]
|
||||
|
||||
|
@ -7,3 +7,4 @@ RUN apt-get install gdb
|
||||
|
||||
CMD service zookeeper start && sleep 7 && /usr/share/zookeeper/bin/zkCli.sh -server localhost:2181 -create create /clickhouse_test ''; \
|
||||
gdb -q -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms | tee test_output/test_result.txt
|
||||
|
||||
|
@ -42,9 +42,9 @@ Also, we need to download macOS X SDK into the working tree.
|
||||
|
||||
``` bash
|
||||
cd ClickHouse
|
||||
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz'
|
||||
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz'
|
||||
mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
|
||||
tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
tar xJf MacOSX10.15.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
```
|
||||
|
||||
## Build ClickHouse {#build-clickhouse}
|
||||
|
67
docs/en/operations/system-tables/distributed_ddl_queue.md
Normal file
67
docs/en/operations/system-tables/distributed_ddl_queue.md
Normal file
@ -0,0 +1,67 @@
|
||||
# system.distributed_ddl_queue {#system_tables-distributed_ddl_queue}
|
||||
|
||||
Contains information about distributed ddl queries (ON CLUSTER queries) that were executed on a cluster.
|
||||
|
||||
Columns:
|
||||
|
||||
- `entry` ([String](../../sql-reference/data-types/string.md)) - Query id.
|
||||
- `host_name` ([String](../../sql-reference/data-types/string.md)) - Hostname.
|
||||
- `host_address` ([String](../../sql-reference/data-types/string.md)) - IP address that the Hostname resolves to.
|
||||
- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) - Host Port.
|
||||
- `status` ([Enum](../../sql-reference/data-types/enum.md)) - Stats of the query.
|
||||
- `cluster` ([String](../../sql-reference/data-types/string.md)) - Cluster name.
|
||||
- `query` ([String](../../sql-reference/data-types/string.md)) - Query executed.
|
||||
- `initiator` ([String](../../sql-reference/data-types/string.md)) - Nod that executed the query.
|
||||
- `query_start_time` ([Date](../../sql-reference/data-types/date.md)) — Query start time.
|
||||
- `query_finish_time` ([Date](../../sql-reference/data-types/date.md)) — Query finish time.
|
||||
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — Duration of query execution in milliseconds.
|
||||
- `exception_code` ([Enum](../../sql-reference/data-types/enum.md)) - Exception code from ZooKeeper.
|
||||
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM system.distributed_ddl_queue
|
||||
WHERE cluster = 'test_cluster'
|
||||
LIMIT 2
|
||||
FORMAT Vertical
|
||||
|
||||
Query id: f544e72a-6641-43f1-836b-24baa1c9632a
|
||||
|
||||
Row 1:
|
||||
──────
|
||||
entry: query-0000000000
|
||||
host_name: clickhouse01
|
||||
host_address: 172.23.0.11
|
||||
port: 9000
|
||||
status: Finished
|
||||
cluster: test_cluster
|
||||
query: CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster
|
||||
initiator: clickhouse01:9000
|
||||
query_start_time: 2020-12-30 13:07:51
|
||||
query_finish_time: 2020-12-30 13:07:51
|
||||
query_duration_ms: 6
|
||||
exception_code: ZOK
|
||||
|
||||
Row 2:
|
||||
──────
|
||||
entry: query-0000000000
|
||||
host_name: clickhouse02
|
||||
host_address: 172.23.0.12
|
||||
port: 9000
|
||||
status: Finished
|
||||
cluster: test_cluster
|
||||
query: CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster
|
||||
initiator: clickhouse01:9000
|
||||
query_start_time: 2020-12-30 13:07:51
|
||||
query_finish_time: 2020-12-30 13:07:51
|
||||
query_duration_ms: 6
|
||||
exception_code: ZOK
|
||||
|
||||
2 rows in set. Elapsed: 0.025 sec.
|
||||
```
|
||||
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/distributed_ddl_queuedistributed_ddl_queue.md) <!--hide-->
|
||||
|
@ -55,10 +55,10 @@ In this case, ClickHouse can reload the dictionary earlier if the dictionary con
|
||||
When upgrading the dictionaries, the ClickHouse server applies different logic depending on the type of [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md):
|
||||
|
||||
- For a text file, it checks the time of modification. If the time differs from the previously recorded time, the dictionary is updated.
|
||||
- For MyISAM tables, the time of modification is checked using a `SHOW TABLE STATUS` query.
|
||||
- For MySQL source, the time of modification is checked using a `SHOW TABLE STATUS` query (in case of MySQL 8 you need to disable meta-information caching in MySQL by `set global information_schema_stats_expiry=0`.
|
||||
- Dictionaries from other sources are updated every time by default.
|
||||
|
||||
For MySQL (InnoDB), ODBC and ClickHouse sources, you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps:
|
||||
For other sources (ODBC, ClickHouse, etc), you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps:
|
||||
|
||||
- The dictionary table must have a field that always changes when the source data is updated.
|
||||
- The settings of the source must specify a query that retrieves the changing field. The ClickHouse server interprets the query result as a row, and if this row has changed relative to its previous state, the dictionary is updated. Specify the query in the `<invalidate_query>` field in the settings for the [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md).
|
||||
|
@ -583,7 +583,7 @@ Example of settings:
|
||||
or
|
||||
|
||||
``` sql
|
||||
SOURCE(MONGO(
|
||||
SOURCE(MONGODB(
|
||||
host 'localhost'
|
||||
port 27017
|
||||
user ''
|
||||
|
@ -23,6 +23,7 @@ The following actions are supported:
|
||||
- [CLEAR COLUMN](#alter_clear-column) — Resets column values.
|
||||
- [COMMENT COLUMN](#alter_comment-column) — Adds a text comment to the column.
|
||||
- [MODIFY COLUMN](#alter_modify-column) — Changes column’s type, default expression and TTL.
|
||||
- [MODIFY COLUMN REMOVE](#modify-remove) — Removes one of the column properties.
|
||||
|
||||
These actions are described in detail below.
|
||||
|
||||
@ -145,6 +146,26 @@ The `ALTER` query is atomic. For MergeTree tables it is also lock-free.
|
||||
|
||||
The `ALTER` query for changing columns is replicated. The instructions are saved in ZooKeeper, then each replica applies them. All `ALTER` queries are run in the same order. The query waits for the appropriate actions to be completed on the other replicas. However, a query to change columns in a replicated table can be interrupted, and all actions will be performed asynchronously.
|
||||
|
||||
## MODIFY COLUMN REMOVE {#modify-remove}
|
||||
|
||||
Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC`, `COMMENT`, `TTL`.
|
||||
|
||||
Syntax:
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_name MODIFY column_name REMOVE property;
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
|
||||
```
|
||||
|
||||
## See Also
|
||||
|
||||
- [REMOVE TTL](ttl.md).
|
||||
|
||||
## Limitations {#alter-query-limitations}
|
||||
|
||||
The `ALTER` query lets you create and delete separate elements (columns) in nested data structures, but not whole nested data structures. To add a nested data structure, you can add columns with a name like `name.nested_name` and the type `Array(T)`. A nested data structure is equivalent to multiple array columns with a name that has the same prefix before the dot.
|
||||
|
@ -286,7 +286,7 @@ ALTER TABLE mt DELETE IN PARTITION 2 WHERE p = 2;
|
||||
You can specify the partition expression in `ALTER ... PARTITION` queries in different ways:
|
||||
|
||||
- As a value from the `partition` column of the `system.parts` table. For example, `ALTER TABLE visits DETACH PARTITION 201901`.
|
||||
- As the expression from the table column. Constants and constant expressions are supported. For example, `ALTER TABLE visits DETACH PARTITION toYYYYMM(toDate('2019-01-25'))`.
|
||||
- As a tuple of expressions or constants that matches (in types) the table partitioning keys tuple. In the case of a single element partitioning key, the expression should be wrapped in the `tuple (...)` function. For example, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`.
|
||||
- Using the partition ID. Partition ID is a string identifier of the partition (human-readable, if possible) that is used as the names of partitions in the file system and in ZooKeeper. The partition ID must be specified in the `PARTITION ID` clause, in a single quotes. For example, `ALTER TABLE visits DETACH PARTITION ID '201901'`.
|
||||
- In the [ALTER ATTACH PART](#alter_attach-partition) and [DROP DETACHED PART](#alter_drop-detached) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached_parts](../../../operations/system-tables/detached_parts.md#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`.
|
||||
|
||||
|
@ -3,10 +3,83 @@ toc_priority: 44
|
||||
toc_title: TTL
|
||||
---
|
||||
|
||||
### Manipulations with Table TTL {#manipulations-with-table-ttl}
|
||||
# Manipulations with Table TTL {#manipulations-with-table-ttl}
|
||||
|
||||
## MODIFY TTL {#modify-ttl}
|
||||
|
||||
You can change [table TTL](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) with a request of the following form:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table-name MODIFY TTL ttl-expression
|
||||
ALTER TABLE table_name MODIFY TTL ttl_expression;
|
||||
```
|
||||
|
||||
## REMOVE TTL {#remove-ttl}
|
||||
|
||||
TTL-property can be removed from table with the following query:
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_name REMOVE TTL
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
Consider the table with table `TTL`:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table_with_ttl
|
||||
(
|
||||
event_time DateTime,
|
||||
UserID UInt64,
|
||||
Comment String
|
||||
)
|
||||
ENGINE MergeTree()
|
||||
ORDER BY tuple()
|
||||
TTL event_time + INTERVAL 3 MONTH;
|
||||
SETTINGS min_bytes_for_wide_part = 0;
|
||||
|
||||
INSERT INTO table_with_ttl VALUES (now(), 1, 'username1');
|
||||
|
||||
INSERT INTO table_with_ttl VALUES (now() - INTERVAL 4 MONTH, 2, 'username2');
|
||||
```
|
||||
|
||||
Run `OPTIMIZE` to force `TTL` cleanup:
|
||||
|
||||
```sql
|
||||
OPTIMIZE TABLE table_with_ttl FINAL;
|
||||
SELECT * FROM table_with_ttl FORMAT PrettyCompact;
|
||||
```
|
||||
Second row was deleted from table.
|
||||
|
||||
```text
|
||||
┌─────────event_time────┬──UserID─┬─────Comment──┐
|
||||
│ 2020-12-11 12:44:57 │ 1 │ username1 │
|
||||
└───────────────────────┴─────────┴──────────────┘
|
||||
```
|
||||
|
||||
Now remove table `TTL` with the following query:
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_with_ttl REMOVE TTL;
|
||||
```
|
||||
|
||||
Re-insert the deleted row and force the `TTL` cleanup again with `OPTIMIZE`:
|
||||
|
||||
```sql
|
||||
INSERT INTO table_with_ttl VALUES (now() - INTERVAL 4 MONTH, 2, 'username2');
|
||||
OPTIMIZE TABLE table_with_ttl FINAL;
|
||||
SELECT * FROM table_with_ttl FORMAT PrettyCompact;
|
||||
```
|
||||
|
||||
The `TTL` is no longer there, so the second row is not deleted:
|
||||
|
||||
```text
|
||||
┌─────────event_time────┬──UserID─┬─────Comment──┐
|
||||
│ 2020-12-11 12:44:57 │ 1 │ username1 │
|
||||
│ 2020-08-11 12:44:57 │ 2 │ username2 │
|
||||
└───────────────────────┴─────────┴──────────────┘
|
||||
```
|
||||
|
||||
### See Also
|
||||
|
||||
- More about the [TTL-expression](../../../sql-reference/statements/create/table#ttl-expression).
|
||||
- Modify column [with TTL](../../../sql-reference/statements/alter/column#alter_modify-column).
|
||||
|
@ -13,9 +13,7 @@ Basic query format:
|
||||
INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
|
||||
```
|
||||
|
||||
You can specify a list of columns to insert using the `(c1, c2, c3)` or `COLUMNS(c1,c2,c3)` syntax.
|
||||
|
||||
Instead of listing all the required columns you can use the `(* EXCEPT(column_list))` syntax.
|
||||
You can specify a list of columns to insert using the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier).
|
||||
|
||||
For example, consider the table:
|
||||
|
||||
@ -23,9 +21,8 @@ For example, consider the table:
|
||||
SHOW CREATE insert_select_testtable;
|
||||
```
|
||||
|
||||
```
|
||||
┌─statement────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ CREATE TABLE insert_select_testtable
|
||||
```text
|
||||
CREATE TABLE insert_select_testtable
|
||||
(
|
||||
`a` Int8,
|
||||
`b` String,
|
||||
@ -33,8 +30,7 @@ SHOW CREATE insert_select_testtable;
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY a
|
||||
SETTINGS index_granularity = 8192 │
|
||||
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
SETTINGS index_granularity = 8192
|
||||
```
|
||||
|
||||
``` sql
|
||||
|
21
docs/en/sql-reference/statements/select/all.md
Normal file
21
docs/en/sql-reference/statements/select/all.md
Normal file
@ -0,0 +1,21 @@
|
||||
---
|
||||
toc_title: ALL
|
||||
---
|
||||
|
||||
# ALL Clause {#select-all}
|
||||
|
||||
`SELECT ALL` is identical to `SELECT` without `DISTINCT`.
|
||||
|
||||
- If `ALL` specified, ignore it.
|
||||
- If both `ALL` and `DISTINCT` specified, exception will be thrown.
|
||||
|
||||
`ALL` can also be specified inside aggregate function with the same effect(noop), for instance:
|
||||
|
||||
```sql
|
||||
SELECT sum(ALL number) FROM numbers(10);
|
||||
```
|
||||
equals to
|
||||
|
||||
```sql
|
||||
SELECT sum(number) FROM numbers(10);
|
||||
```
|
@ -18,10 +18,6 @@ It is possible to obtain the same result by applying [GROUP BY](../../../sql-ref
|
||||
- When [ORDER BY](../../../sql-reference/statements/select/order-by.md) is omitted and [LIMIT](../../../sql-reference/statements/select/limit.md) is defined, the query stops running immediately after the required number of different rows has been read.
|
||||
- Data blocks are output as they are processed, without waiting for the entire query to finish running.
|
||||
|
||||
## Limitations {#limitations}
|
||||
|
||||
`DISTINCT` is not supported if `SELECT` has at least one array column.
|
||||
|
||||
## Examples {#examples}
|
||||
|
||||
ClickHouse supports using the `DISTINCT` and `ORDER BY` clauses for different columns in one query. The `DISTINCT` clause is executed before the `ORDER BY` clause.
|
||||
|
@ -44,9 +44,9 @@ Además, necesitamos descargar macOS X SDK en el árbol de trabajo.
|
||||
|
||||
``` bash
|
||||
cd ClickHouse
|
||||
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz'
|
||||
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz'
|
||||
mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
|
||||
tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
tar xJf MacOSX10.15.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
```
|
||||
|
||||
# Construir ClickHouse {#build-clickhouse}
|
||||
|
@ -44,9 +44,9 @@ En outre, nous devons télécharger macOS X SDK dans l'arbre de travail.
|
||||
|
||||
``` bash
|
||||
cd ClickHouse
|
||||
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz'
|
||||
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz'
|
||||
mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
|
||||
tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
tar xJf MacOSX10.15.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
```
|
||||
|
||||
# Construire ClickHouse {#build-clickhouse}
|
||||
|
@ -45,9 +45,9 @@ make install
|
||||
|
||||
``` bash
|
||||
cd ClickHouse
|
||||
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz'
|
||||
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz'
|
||||
mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
|
||||
tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
tar xJf MacOSX10.15.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
```
|
||||
|
||||
# ビルドClickHouse {#build-clickhouse}
|
||||
|
@ -133,7 +133,7 @@ ClickHouse имеет сильную типизацию, поэтому нет
|
||||
|
||||
## Агрегатные функции {#aggregate-functions}
|
||||
|
||||
Агрегатные функции - это функции с состоянием (stateful). Они накапливают переданные значения в некотором состоянии и позволяют получать результаты из этого состояния. Работа с ними осуществляется с помощью интерфейса `IAggregateFunction`. Состояния могут быть как простыми (состояние для `AggregateFunctionCount` это всего лишь один человек `UInt64` значение) так и довольно сложными (состояние `AggregateFunctionUniqCombined` представляет собой комбинацию линейного массива, хэш-таблицы и вероятностной структуры данных `HyperLogLog`).
|
||||
Агрегатные функции - это функции с состоянием (stateful). Они накапливают переданные значения в некотором состоянии и позволяют получать результаты из этого состояния. Работа с ними осуществляется с помощью интерфейса `IAggregateFunction`. Состояния могут быть как простыми (состояние для `AggregateFunctionCount` это всего лишь одна переменная типа `UInt64`) так и довольно сложными (состояние `AggregateFunctionUniqCombined` представляет собой комбинацию линейного массива, хэш-таблицы и вероятностной структуры данных `HyperLogLog`).
|
||||
|
||||
Состояния распределяются в `Arena` (пул памяти) для работы с несколькими состояниями при выполнении запроса `GROUP BY` высокой кардинальности (большим числом уникальных данных). Состояния могут иметь нетривиальный конструктор и деструктор: например, сложные агрегатные состояния могут сами аллоцировать дополнительную память. Потому к созданию и уничтожению состояний, правильной передаче владения и порядку уничтожения следует уделять больше внимание.
|
||||
|
||||
|
@ -54,10 +54,10 @@ LIFETIME(MIN 300 MAX 360)
|
||||
При обновлении словарей сервер ClickHouse применяет различную логику в зависимости от типа [источника](external-dicts-dict-sources.md):
|
||||
|
||||
> - У текстового файла проверяется время модификации. Если время изменилось по отношению к запомненному ранее, то словарь обновляется.
|
||||
> - Для таблиц типа MyISAM, время модификации проверяется запросом `SHOW TABLE STATUS`.
|
||||
> - Для MySQL источника, время модификации проверяется запросом `SHOW TABLE STATUS` (для MySQL 8 необходимо отключить кеширование мета-информации в MySQL `set global information_schema_stats_expiry=0`.
|
||||
> - Словари из других источников по умолчанию обновляются каждый раз.
|
||||
|
||||
Для источников MySQL (InnoDB), ODBC и ClickHouse можно настроить запрос, который позволит обновлять словари только в случае их фактического изменения, а не каждый раз. Чтобы это сделать необходимо выполнить следующие условия/действия:
|
||||
Для других источников (ODBC, ClickHouse и т.д.) можно настроить запрос, который позволит обновлять словари только в случае их фактического изменения, а не каждый раз. Чтобы это сделать необходимо выполнить следующие условия/действия:
|
||||
|
||||
> - В таблице словаря должно быть поле, которое гарантированно изменяется при обновлении данных в источнике.
|
||||
> - В настройках источника указывается запрос, который получает изменяющееся поле. Результат запроса сервер ClickHouse интерпретирует как строку и если эта строка изменилась по отношению к предыдущему состоянию, то словарь обновляется. Запрос следует указывать в поле `<invalidate_query>` настроек [источника](external-dicts-dict-sources.md).
|
||||
|
@ -12,6 +12,7 @@ toc_title: "\u041c\u0430\u043d\u0438\u043f\u0443\u043b\u044f\u0446\u0438\u0438\u
|
||||
- [CLEAR COLUMN](#alter_clear-column) — сбрасывает все значения в столбце для заданной партиции;
|
||||
- [COMMENT COLUMN](#alter_comment-column) — добавляет комментарий к столбцу;
|
||||
- [MODIFY COLUMN](#alter_modify-column) — изменяет тип столбца, выражение для значения по умолчанию и TTL.
|
||||
- [MODIFY COLUMN REMOVE](#modify-remove) — удаляет какое-либо из свойств столбца.
|
||||
|
||||
Подробное описание для каждого действия приведено ниже.
|
||||
|
||||
@ -135,6 +136,28 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)
|
||||
|
||||
Запрос `ALTER` на изменение столбцов реплицируется. Соответствующие инструкции сохраняются в ZooKeeper, и затем каждая реплика их применяет. Все запросы `ALTER` выполняются в одном и том же порядке. Запрос ждёт выполнения соответствующих действий на всех репликах. Но при этом, запрос на изменение столбцов в реплицируемой таблице можно прервать, и все действия будут осуществлены асинхронно.
|
||||
|
||||
## MODIFY COLUMN REMOVE {#modify-remove}
|
||||
|
||||
Удаляет какое-либо из свойств столбца: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC`, `COMMENT`, `TTL`.
|
||||
|
||||
Синтаксис:
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_name MODIFY column_name REMOVE property;
|
||||
```
|
||||
|
||||
**Пример**
|
||||
|
||||
Удаление свойства TTL:
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
|
||||
```
|
||||
|
||||
## Смотрите также
|
||||
|
||||
- [REMOVE TTL](ttl.md).
|
||||
|
||||
## Ограничения запроса ALTER {#ogranicheniia-zaprosa-alter}
|
||||
|
||||
Запрос `ALTER` позволяет создавать и удалять отдельные элементы (столбцы) вложенных структур данных, но не вложенные структуры данных целиком. Для добавления вложенной структуры данных, вы можете добавить столбцы с именем вида `name.nested_name` и типом `Array(T)` - вложенная структура данных полностью эквивалентна нескольким столбцам-массивам с именем, имеющим одинаковый префикс до точки.
|
||||
|
@ -288,7 +288,7 @@ ALTER TABLE mt DELETE IN PARTITION 2 WHERE p = 2;
|
||||
Чтобы задать нужную партицию в запросах `ALTER ... PARTITION`, можно использовать:
|
||||
|
||||
- Имя партиции. Посмотреть имя партиции можно в столбце `partition` системной таблицы [system.parts](../../../operations/system-tables/parts.md#system_tables-parts). Например, `ALTER TABLE visits DETACH PARTITION 201901`.
|
||||
- Произвольное выражение из столбцов исходной таблицы. Также поддерживаются константы и константные выражения. Например, `ALTER TABLE visits DETACH PARTITION toYYYYMM(toDate('2019-01-25'))`.
|
||||
- Кортеж из выражений или констант, совпадающий (в типах) с кортежем партиционирования. В случае ключа партиционирования из одного элемента, выражение следует обернуть в функцию `tuple(...)`. Например, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`.
|
||||
- Строковый идентификатор партиции. Идентификатор партиции используется для именования кусков партиции на файловой системе и в ZooKeeper. В запросах `ALTER` идентификатор партиции нужно указывать в секции `PARTITION ID`, в одинарных кавычках. Например, `ALTER TABLE visits DETACH PARTITION ID '201901'`.
|
||||
- Для запросов [ATTACH PART](#alter_attach-partition) и [DROP DETACHED PART](#alter_drop-detached): чтобы задать имя куска партиции, используйте строковой литерал со значением из столбца `name` системной таблицы [system.detached_parts](../../../operations/system-tables/detached_parts.md#system_tables-detached_parts). Например, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`.
|
||||
|
||||
@ -306,4 +306,4 @@ OPTIMIZE TABLE table_not_partitioned PARTITION tuple() FINAL;
|
||||
|
||||
Примеры запросов `ALTER ... PARTITION` можно посмотреть в тестах: [`00502_custom_partitioning_local`](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00502_custom_partitioning_local.sql) и [`00502_custom_partitioning_replicated_zookeeper`](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper.sql).
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/partition/) <!--hide-->
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/partition/) <!--hide-->
|
||||
|
@ -5,10 +5,82 @@ toc_title: TTL
|
||||
|
||||
# Манипуляции с TTL таблицы {#manipuliatsii-s-ttl-tablitsy}
|
||||
|
||||
## MODIFY TTL {#modify-ttl}
|
||||
|
||||
Вы можете изменить [TTL для таблицы](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-column-ttl) запросом следующего вида:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table-name MODIFY TTL ttl-expression
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/ttl/) <!--hide-->
|
||||
## REMOVE TTL {#remove-ttl}
|
||||
|
||||
Удалить табличный TTL можно запросом следующего вида:
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_name REMOVE TTL
|
||||
```
|
||||
|
||||
**Пример**
|
||||
|
||||
Создадим таблицу с табличным `TTL` и заполним её данными:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table_with_ttl
|
||||
(
|
||||
event_time DateTime,
|
||||
UserID UInt64,
|
||||
Comment String
|
||||
)
|
||||
ENGINE MergeTree()
|
||||
ORDER BY tuple()
|
||||
TTL event_time + INTERVAL 3 MONTH;
|
||||
SETTINGS min_bytes_for_wide_part = 0;
|
||||
|
||||
INSERT INTO table_with_ttl VALUES (now(), 1, 'username1');
|
||||
|
||||
INSERT INTO table_with_ttl VALUES (now() - INTERVAL 4 MONTH, 2, 'username2');
|
||||
```
|
||||
|
||||
Выполним `OPTIMIZE` для принудительной очистки по `TTL`:
|
||||
|
||||
```sql
|
||||
OPTIMIZE TABLE table_with_ttl FINAL;
|
||||
SELECT * FROM table_with_ttl;
|
||||
```
|
||||
В результате видно, что вторая строка удалена.
|
||||
|
||||
```text
|
||||
┌─────────event_time────┬──UserID─┬─────Comment──┐
|
||||
│ 2020-12-11 12:44:57 │ 1 │ username1 │
|
||||
└───────────────────────┴─────────┴──────────────┘
|
||||
```
|
||||
|
||||
Удаляем табличный `TTL`:
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_with_ttl REMOVE TTL;
|
||||
```
|
||||
|
||||
Заново вставляем удаленную строку и снова принудительно запускаем очистку по `TTL` с помощью `OPTIMIZE`:
|
||||
|
||||
```sql
|
||||
INSERT INTO table_with_ttl VALUES (now() - INTERVAL 4 MONTH, 2, 'username2');
|
||||
OPTIMIZE TABLE table_with_ttl FINAL;
|
||||
SELECT * FROM table_with_ttl;
|
||||
```
|
||||
|
||||
`TTL` больше нет, поэтому данные не удаляются:
|
||||
|
||||
```text
|
||||
┌─────────event_time────┬──UserID─┬─────Comment──┐
|
||||
│ 2020-12-11 12:44:57 │ 1 │ username1 │
|
||||
│ 2020-08-11 12:44:57 │ 2 │ username2 │
|
||||
└───────────────────────┴─────────┴──────────────┘
|
||||
```
|
||||
|
||||
### Смотрите также
|
||||
|
||||
- Подробнее о [свойстве TTL](../../../engines/table-engines/mergetree-family/mergetree#table_engine-mergetree-ttl).
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/ttl/) <!--hide-->
|
||||
|
@ -13,9 +13,7 @@ toc_title: INSERT INTO
|
||||
INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
|
||||
```
|
||||
|
||||
Вы можете указать список столбцов для вставки, используя следующий синтаксис: `(c1, c2, c3)` или `COLUMNS(c1,c2,c3)`.
|
||||
|
||||
Можно не перечислять все необходимые столбцы, а использовать синтаксис `(* EXCEPT(column_list))`.
|
||||
Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как `APPLY`, `EXCEPT`, `REPLACE`.
|
||||
|
||||
В качестве примера рассмотрим таблицу:
|
||||
|
||||
|
@ -18,10 +18,6 @@ toc_title: DISTINCT
|
||||
- Когда секция [ORDER BY](order-by.md) опущена, а секция [LIMIT](limit.md) присутствует, запрос прекращает выполнение сразу после считывания необходимого количества различных строк.
|
||||
- Блоки данных выводятся по мере их обработки, не дожидаясь завершения выполнения всего запроса.
|
||||
|
||||
## Ограничения {#limitations}
|
||||
|
||||
`DISTINCT` не поддерживается, если `SELECT` имеет по крайней мере один столбец-массив.
|
||||
|
||||
## Примеры {#examples}
|
||||
|
||||
ClickHouse поддерживает использование секций `DISTINCT` и `ORDER BY` для разных столбцов в одном запросе. Секция `DISTINCT` выполняется до секции `ORDER BY`.
|
||||
|
@ -33,8 +33,8 @@ cd cctools-port/cctools
|
||||
make install
|
||||
|
||||
cd ${CCTOOLS}
|
||||
wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz
|
||||
tar xJf MacOSX10.14.sdk.tar.xz
|
||||
wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz
|
||||
tar xJf MacOSX10.15.sdk.tar.xz
|
||||
```
|
||||
|
||||
# 编译 ClickHouse {#bian-yi-clickhouse}
|
||||
@ -46,7 +46,7 @@ CC=clang-8 CXX=clang++-8 cmake . -Bbuild-osx -DCMAKE_SYSTEM_NAME=Darwin \
|
||||
-DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar \
|
||||
-DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib \
|
||||
-DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld \
|
||||
-DSDK_PATH=${CCTOOLS}/MacOSX10.14.sdk
|
||||
-DSDK_PATH=${CCTOOLS}/MacOSX10.15.sdk
|
||||
ninja -C build-osx
|
||||
```
|
||||
|
||||
|
@ -29,7 +29,7 @@ SELECT 1 - 0.9
|
||||
|
||||
- 当一行行阅读浮点数的时候,浮点数的结果可能不是机器最近显示的数值。
|
||||
|
||||
## 南和Inf {#data_type-float-nan-inf}
|
||||
## NaN和Inf {#data_type-float-nan-inf}
|
||||
|
||||
与标准SQL相比,ClickHouse 支持以下类别的浮点数:
|
||||
|
||||
|
@ -287,7 +287,7 @@
|
||||
</div>
|
||||
<div id="run_div">
|
||||
<button class="shadow" id="run">Run</button>
|
||||
<span class="hint"> (Ctrl+Enter)</span>
|
||||
<span class="hint"> (Ctrl/Cmd+Enter)</span>
|
||||
<span id="hourglass">⧗</span>
|
||||
<span id="check-mark">✔</span>
|
||||
<span id="stats"></span>
|
||||
@ -424,10 +424,10 @@
|
||||
post();
|
||||
}
|
||||
|
||||
document.onkeypress = function(event)
|
||||
document.onkeydown = function(event)
|
||||
{
|
||||
/// Firefox has code 13 for Enter and Chromium has code 10.
|
||||
if (event.ctrlKey && (event.charCode == 13 || event.charCode == 10)) {
|
||||
if ((event.metaKey || event.ctrlKey) && (event.keyCode == 13 || event.keyCode == 10)) {
|
||||
post();
|
||||
}
|
||||
}
|
||||
|
@ -112,7 +112,6 @@ class GroupArrayNumericImpl final
|
||||
{
|
||||
using Data = GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>;
|
||||
static constexpr bool limit_num_elems = Trait::has_limit;
|
||||
DataTypePtr & data_type;
|
||||
UInt64 max_elems;
|
||||
UInt64 seed;
|
||||
|
||||
@ -121,7 +120,6 @@ public:
|
||||
const DataTypePtr & data_type_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max(), UInt64 seed_ = 123456)
|
||||
: IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>(
|
||||
{data_type_}, {})
|
||||
, data_type(this->argument_types[0])
|
||||
, max_elems(max_elems_)
|
||||
, seed(seed_)
|
||||
{
|
||||
@ -129,7 +127,7 @@ public:
|
||||
|
||||
String getName() const override { return getNameByTrait<Trait>(); }
|
||||
|
||||
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(data_type); }
|
||||
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(this->argument_types[0]); }
|
||||
|
||||
void insert(Data & a, const T & v, Arena * arena) const
|
||||
{
|
||||
|
@ -168,7 +168,7 @@ public:
|
||||
{
|
||||
for (const auto & x : small)
|
||||
{
|
||||
if (rb->contains(static_cast<Value>(x.getValue())))
|
||||
if (r1.rb->contains(static_cast<Value>(x.getValue())))
|
||||
buffer.push_back(x.getValue());
|
||||
}
|
||||
|
||||
@ -264,7 +264,7 @@ public:
|
||||
{
|
||||
for (const auto & x : small)
|
||||
{
|
||||
if (rb->contains(static_cast<Value>(x.getValue())))
|
||||
if (r1.rb->contains(static_cast<Value>(x.getValue())))
|
||||
++ret;
|
||||
}
|
||||
}
|
||||
@ -419,7 +419,7 @@ public:
|
||||
if (isSmall())
|
||||
return small.find(x) != small.end();
|
||||
else
|
||||
return rb->contains(x);
|
||||
return rb->contains(static_cast<Value>(x));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -613,7 +613,7 @@ public:
|
||||
/**
|
||||
* Replace value
|
||||
*/
|
||||
void rb_replace(const UInt32 * from_vals, const UInt32 * to_vals, size_t num)
|
||||
void rb_replace(const UInt64 * from_vals, const UInt64 * to_vals, size_t num)
|
||||
{
|
||||
if (isSmall())
|
||||
toLarge();
|
||||
@ -622,9 +622,9 @@ public:
|
||||
{
|
||||
if (from_vals[i] == to_vals[i])
|
||||
continue;
|
||||
bool changed = rb->removeChecked(from_vals[i]);
|
||||
bool changed = rb->removeChecked(static_cast<Value>(from_vals[i]));
|
||||
if (changed)
|
||||
rb->add(to_vals[i]);
|
||||
rb->add(static_cast<Value>(to_vals[i]));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -56,7 +56,7 @@ public:
|
||||
|
||||
DataTypePtr getReturnType() const override
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeNumber<T>>());
|
||||
return std::make_shared<DataTypeArray>(this->argument_types[0]);
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
|
@ -670,4 +670,32 @@ ColumnAggregateFunction::ColumnAggregateFunction(const ColumnAggregateFunction &
|
||||
{
|
||||
}
|
||||
|
||||
MutableColumnPtr ColumnAggregateFunction::cloneResized(size_t size) const
|
||||
{
|
||||
if (size == 0)
|
||||
return cloneEmpty();
|
||||
|
||||
size_t from_size = data.size();
|
||||
|
||||
if (size <= from_size)
|
||||
{
|
||||
auto res = createView();
|
||||
auto & res_data = res->data;
|
||||
res_data.assign(data.begin(), data.begin() + size);
|
||||
return res;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Create a new column to return.
|
||||
MutableColumnPtr cloned_col = cloneEmpty();
|
||||
auto * res = typeid_cast<ColumnAggregateFunction *>(cloned_col.get());
|
||||
|
||||
res->insertRangeFrom(*this, 0, from_size);
|
||||
for (size_t i = from_size; i < size; ++i)
|
||||
res->insertDefault();
|
||||
|
||||
return cloned_col;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -215,7 +215,7 @@ public:
|
||||
void getExtremes(Field & min, Field & max) const override;
|
||||
|
||||
bool structureEquals(const IColumn &) const override;
|
||||
|
||||
MutableColumnPtr cloneResized(size_t size) const override;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
@ -12,6 +12,10 @@
|
||||
#include <random>
|
||||
#include <cstdlib>
|
||||
|
||||
#ifdef MEMORY_TRACKER_DEBUG_CHECKS
|
||||
thread_local bool _memory_tracker_always_throw_logical_error_on_allocation = false;
|
||||
#endif
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
@ -165,6 +169,14 @@ void MemoryTracker::alloc(Int64 size)
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef MEMORY_TRACKER_DEBUG_CHECKS
|
||||
if (unlikely(_memory_tracker_always_throw_logical_error_on_allocation))
|
||||
{
|
||||
_memory_tracker_always_throw_logical_error_on_allocation = false;
|
||||
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Memory tracker: allocations not allowed.");
|
||||
}
|
||||
#endif
|
||||
|
||||
std::bernoulli_distribution fault(fault_probability);
|
||||
if (unlikely(fault_probability && fault(thread_local_rng)) && memoryTrackerCanThrow(level, true))
|
||||
{
|
||||
|
@ -5,6 +5,28 @@
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/VariableContext.h>
|
||||
|
||||
#if !defined(NDEBUG)
|
||||
#define MEMORY_TRACKER_DEBUG_CHECKS
|
||||
#endif
|
||||
|
||||
/// DENY_ALLOCATIONS_IN_SCOPE macro makes MemoryTracker throw LOGICAL_ERROR on any allocation attempt
|
||||
/// until the end of the scope. It's useful to ensure that no allocations happen in signal handlers and
|
||||
/// outside of try/catch block of thread functions. ALLOW_ALLOCATIONS_IN_SCOPE cancels effect of
|
||||
/// DENY_ALLOCATIONS_IN_SCOPE in the inner scope. In Release builds these macros do nothing.
|
||||
#ifdef MEMORY_TRACKER_DEBUG_CHECKS
|
||||
#include <ext/scope_guard.h>
|
||||
extern thread_local bool _memory_tracker_always_throw_logical_error_on_allocation;
|
||||
#define ALLOCATIONS_IN_SCOPE_IMPL_CONCAT(n, val) \
|
||||
bool _allocations_flag_prev_val##n = _memory_tracker_always_throw_logical_error_on_allocation; \
|
||||
_memory_tracker_always_throw_logical_error_on_allocation = val; \
|
||||
SCOPE_EXIT({ _memory_tracker_always_throw_logical_error_on_allocation = _allocations_flag_prev_val##n; })
|
||||
#define ALLOCATIONS_IN_SCOPE_IMPL(n, val) ALLOCATIONS_IN_SCOPE_IMPL_CONCAT(n, val)
|
||||
#define DENY_ALLOCATIONS_IN_SCOPE ALLOCATIONS_IN_SCOPE_IMPL(__LINE__, true)
|
||||
#define ALLOW_ALLOCATIONS_IN_SCOPE ALLOCATIONS_IN_SCOPE_IMPL(__LINE__, false)
|
||||
#else
|
||||
#define DENY_ALLOCATIONS_IN_SCOPE static_assert(true)
|
||||
#define ALLOW_ALLOCATIONS_IN_SCOPE static_assert(true)
|
||||
#endif
|
||||
|
||||
/** Tracks memory consumption.
|
||||
* It throws an exception if amount of consumed memory become greater than certain limit.
|
||||
|
@ -181,6 +181,7 @@ QueryProfilerReal::QueryProfilerReal(const UInt64 thread_id, const UInt32 period
|
||||
|
||||
void QueryProfilerReal::signalHandler(int sig, siginfo_t * info, void * context)
|
||||
{
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
writeTraceInfo(TraceType::Real, sig, info, context);
|
||||
}
|
||||
|
||||
@ -190,6 +191,7 @@ QueryProfilerCpu::QueryProfilerCpu(const UInt64 thread_id, const UInt32 period)
|
||||
|
||||
void QueryProfilerCpu::signalHandler(int sig, siginfo_t * info, void * context)
|
||||
{
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
writeTraceInfo(TraceType::CPU, sig, info, context);
|
||||
}
|
||||
|
||||
|
@ -197,6 +197,7 @@ static void injection(
|
||||
|
||||
void ThreadFuzzer::signalHandler(int)
|
||||
{
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
auto saved_errno = errno;
|
||||
|
||||
auto & fuzzer = ThreadFuzzer::instance();
|
||||
|
@ -208,6 +208,7 @@ size_t ThreadPoolImpl<Thread>::active() const
|
||||
template <typename Thread>
|
||||
void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_it)
|
||||
{
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
CurrentMetrics::Increment metric_all_threads(
|
||||
std::is_same_v<Thread, std::thread> ? CurrentMetrics::GlobalThread : CurrentMetrics::LocalThread);
|
||||
|
||||
@ -223,7 +224,9 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
|
||||
|
||||
if (!jobs.empty())
|
||||
{
|
||||
job = std::move(jobs.top().job);
|
||||
/// std::priority_queue does not provide interface for getting non-const reference to an element
|
||||
/// to prevent us from modifying its priority. We have to use const_cast to force move semantics on JobWithPriority::job.
|
||||
job = std::move(const_cast<Job &>(jobs.top().job));
|
||||
jobs.pop();
|
||||
}
|
||||
else
|
||||
@ -237,6 +240,7 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
|
||||
{
|
||||
try
|
||||
{
|
||||
ALLOW_ALLOCATIONS_IN_SCOPE;
|
||||
CurrentMetrics::Increment metric_active_threads(
|
||||
std::is_same_v<Thread, std::thread> ? CurrentMetrics::GlobalThreadActive : CurrentMetrics::LocalThreadActive);
|
||||
|
||||
|
@ -65,6 +65,7 @@ void ZooKeeper::init(const std::string & implementation_, const std::string & ho
|
||||
Coordination::ZooKeeper::Nodes nodes;
|
||||
nodes.reserve(hosts_strings.size());
|
||||
|
||||
bool dns_error = false;
|
||||
for (auto & host_string : hosts_strings)
|
||||
{
|
||||
try
|
||||
@ -76,14 +77,27 @@ void ZooKeeper::init(const std::string & implementation_, const std::string & ho
|
||||
|
||||
nodes.emplace_back(Coordination::ZooKeeper::Node{Poco::Net::SocketAddress{host_string}, secure});
|
||||
}
|
||||
catch (const Poco::Net::HostNotFoundException & e)
|
||||
{
|
||||
/// Most likely it's misconfiguration and wrong hostname was specified
|
||||
LOG_ERROR(log, "Cannot use ZooKeeper host {}, reason: {}", host_string, e.displayText());
|
||||
}
|
||||
catch (const Poco::Net::DNSException & e)
|
||||
{
|
||||
LOG_ERROR(log, "Cannot use ZooKeeper host {}, reason: {}", host_string, e.displayText());
|
||||
/// Most likely DNS is not available now
|
||||
dns_error = true;
|
||||
LOG_ERROR(log, "Cannot use ZooKeeper host {} due to DNS error: {}", host_string, e.displayText());
|
||||
}
|
||||
}
|
||||
|
||||
if (nodes.empty())
|
||||
throw KeeperException("Cannot use any of provided ZooKeeper nodes", Coordination::Error::ZBADARGUMENTS);
|
||||
{
|
||||
/// For DNS errors we throw exception with ZCONNECTIONLOSS code, so it will be considered as hardware error, not user error
|
||||
if (dns_error)
|
||||
throw KeeperException("Cannot resolve any of provided ZooKeeper hosts due to DNS error", Coordination::Error::ZCONNECTIONLOSS);
|
||||
else
|
||||
throw KeeperException("Cannot use any of provided ZooKeeper nodes", Coordination::Error::ZBADARGUMENTS);
|
||||
}
|
||||
|
||||
impl = std::make_unique<Coordination::ZooKeeper>(
|
||||
nodes,
|
||||
|
@ -38,7 +38,15 @@ UInt32 CompressionCodecZSTD::getMaxCompressedDataSize(UInt32 uncompressed_size)
|
||||
|
||||
UInt32 CompressionCodecZSTD::doCompressData(const char * source, UInt32 source_size, char * dest) const
|
||||
{
|
||||
size_t compressed_size = ZSTD_compress(dest, ZSTD_compressBound(source_size), source, source_size, level);
|
||||
ZSTD_CCtx * cctx = ZSTD_createCCtx();
|
||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level);
|
||||
if (enable_long_range)
|
||||
{
|
||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1);
|
||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, window_log); // NB zero window_log means "use default" for libzstd
|
||||
}
|
||||
size_t compressed_size = ZSTD_compress2(cctx, dest, ZSTD_compressBound(source_size), source, source_size);
|
||||
ZSTD_freeCCtx(cctx);
|
||||
|
||||
if (ZSTD_isError(compressed_size))
|
||||
throw Exception("Cannot compress block with ZSTD: " + std::string(ZSTD_getErrorName(compressed_size)), ErrorCodes::CANNOT_COMPRESS);
|
||||
@ -55,8 +63,13 @@ void CompressionCodecZSTD::doDecompressData(const char * source, UInt32 source_s
|
||||
throw Exception("Cannot ZSTD_decompress: " + std::string(ZSTD_getErrorName(res)), ErrorCodes::CANNOT_DECOMPRESS);
|
||||
}
|
||||
|
||||
CompressionCodecZSTD::CompressionCodecZSTD(int level_)
|
||||
: level(level_)
|
||||
CompressionCodecZSTD::CompressionCodecZSTD(int level_, int window_log_) : level(level_), enable_long_range(true), window_log(window_log_)
|
||||
{
|
||||
setCodecDescription(
|
||||
"ZSTD", {std::make_shared<ASTLiteral>(static_cast<UInt64>(level)), std::make_shared<ASTLiteral>(static_cast<UInt64>(window_log))});
|
||||
}
|
||||
|
||||
CompressionCodecZSTD::CompressionCodecZSTD(int level_) : level(level_), enable_long_range(false), window_log(0)
|
||||
{
|
||||
setCodecDescription("ZSTD", {std::make_shared<ASTLiteral>(static_cast<UInt64>(level))});
|
||||
}
|
||||
@ -64,13 +77,14 @@ CompressionCodecZSTD::CompressionCodecZSTD(int level_)
|
||||
void registerCodecZSTD(CompressionCodecFactory & factory)
|
||||
{
|
||||
UInt8 method_code = UInt8(CompressionMethodByte::ZSTD);
|
||||
factory.registerCompressionCodec("ZSTD", method_code, [&](const ASTPtr & arguments) -> CompressionCodecPtr
|
||||
{
|
||||
factory.registerCompressionCodec("ZSTD", method_code, [&](const ASTPtr & arguments) -> CompressionCodecPtr {
|
||||
int level = CompressionCodecZSTD::ZSTD_DEFAULT_LEVEL;
|
||||
if (arguments && !arguments->children.empty())
|
||||
{
|
||||
if (arguments->children.size() > 1)
|
||||
throw Exception("ZSTD codec must have 1 parameter, given " + std::to_string(arguments->children.size()), ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE);
|
||||
if (arguments->children.size() > 2)
|
||||
throw Exception(
|
||||
"ZSTD codec must have 1 or 2 parameters, given " + std::to_string(arguments->children.size()),
|
||||
ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE);
|
||||
|
||||
const auto children = arguments->children;
|
||||
const auto * literal = children[0]->as<ASTLiteral>();
|
||||
@ -79,9 +93,32 @@ void registerCodecZSTD(CompressionCodecFactory & factory)
|
||||
|
||||
level = literal->value.safeGet<UInt64>();
|
||||
if (level > ZSTD_maxCLevel())
|
||||
throw Exception("ZSTD codec can't have level more that " + toString(ZSTD_maxCLevel()) + ", given " + toString(level), ErrorCodes::ILLEGAL_CODEC_PARAMETER);
|
||||
}
|
||||
throw Exception(
|
||||
"ZSTD codec can't have level more than " + toString(ZSTD_maxCLevel()) + ", given " + toString(level),
|
||||
ErrorCodes::ILLEGAL_CODEC_PARAMETER);
|
||||
if (arguments->children.size() > 1)
|
||||
{
|
||||
const auto * window_literal = children[1]->as<ASTLiteral>();
|
||||
if (!window_literal)
|
||||
throw Exception("ZSTD codec second argument must be integer", ErrorCodes::ILLEGAL_CODEC_PARAMETER);
|
||||
|
||||
const int window_log = window_literal->value.safeGet<UInt64>();
|
||||
|
||||
ZSTD_bounds window_log_bounds = ZSTD_cParam_getBounds(ZSTD_c_windowLog);
|
||||
if (ZSTD_isError(window_log_bounds.error))
|
||||
throw Exception(
|
||||
"ZSTD windowLog parameter is not supported " + std::string(ZSTD_getErrorName(window_log_bounds.error)),
|
||||
ErrorCodes::ILLEGAL_CODEC_PARAMETER);
|
||||
// 0 means "use default" for libzstd
|
||||
if (window_log != 0 && (window_log > window_log_bounds.upperBound || window_log < window_log_bounds.lowerBound))
|
||||
throw Exception(
|
||||
"ZSTD codec can't have window log more than " + toString(window_log_bounds.upperBound) + " and lower than "
|
||||
+ toString(window_log_bounds.lowerBound) + ", given " + toString(window_log),
|
||||
ErrorCodes::ILLEGAL_CODEC_PARAMETER);
|
||||
|
||||
return std::make_shared<CompressionCodecZSTD>(level, window_log);
|
||||
}
|
||||
}
|
||||
return std::make_shared<CompressionCodecZSTD>(level);
|
||||
});
|
||||
}
|
||||
|
@ -12,9 +12,12 @@ class CompressionCodecZSTD : public ICompressionCodec
|
||||
{
|
||||
public:
|
||||
static constexpr auto ZSTD_DEFAULT_LEVEL = 1;
|
||||
static constexpr auto ZSTD_DEFAULT_LOG_WINDOW = 24;
|
||||
|
||||
CompressionCodecZSTD(int level_);
|
||||
|
||||
CompressionCodecZSTD(int level_, int window_log);
|
||||
|
||||
uint8_t getMethodByte() const override;
|
||||
|
||||
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
|
||||
@ -32,6 +35,8 @@ protected:
|
||||
|
||||
private:
|
||||
const int level;
|
||||
const bool enable_long_range;
|
||||
const int window_log;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -371,8 +371,9 @@ class IColumn;
|
||||
M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \
|
||||
M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
|
||||
M(Bool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \
|
||||
M(Bool, optimize_respect_aliases, true, "If it is set to true, it will respect aliases in WHERE/GROUP BY/ORDER BY, that will help with partition pruning/secondary indexes/optimize_aggregation_in_order/optimize_read_in_order/optimize_trivial_count", 0) \
|
||||
M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
|
||||
M(Bool, optimize_move_functions_out_of_any, true, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \
|
||||
M(Bool, optimize_move_functions_out_of_any, false, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \
|
||||
M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \
|
||||
M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \
|
||||
M(Bool, optimize_duplicate_order_by_and_distinct, true, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \
|
||||
|
@ -61,9 +61,12 @@ Block ColumnGathererStream::readImpl()
|
||||
|
||||
MutableColumnPtr output_column = column.column->cloneEmpty();
|
||||
output_block = Block{column.cloneEmpty()};
|
||||
/// Surprisingly this call may directly change output_block, bypassing
|
||||
/// output_column. See ColumnGathererStream::gather.
|
||||
output_column->gather(*this);
|
||||
if (!output_column->empty())
|
||||
output_block.getByPosition(0).column = std::move(output_column);
|
||||
|
||||
return output_block;
|
||||
}
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Interpreters/castColumn.h>
|
||||
#include <DataTypes/DataTypeAggregateFunction.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
@ -14,6 +15,7 @@
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
|
||||
// TODO include this last because of a broken roaring header. See the comment
|
||||
// inside.
|
||||
#include <AggregateFunctions/AggregateFunctionGroupBitmapData.h>
|
||||
@ -282,18 +284,16 @@ public:
|
||||
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
const auto * arg_type1 = typeid_cast<const DataTypeNumber<UInt32> *>(arguments[1].get());
|
||||
if (!(arg_type1))
|
||||
throw Exception(
|
||||
"Second argument for function " + getName() + " must be UInt32 but it has type " + arguments[1]->getName() + ".",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
const auto * arg_type2 = typeid_cast<const DataTypeNumber<UInt32> *>(arguments[1].get());
|
||||
if (!(arg_type2))
|
||||
throw Exception(
|
||||
"Third argument for function " + getName() + " must be UInt32 but it has type " + arguments[2]->getName() + ".",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
for (size_t i = 1; i < 3; ++i)
|
||||
{
|
||||
WhichDataType which(arguments[i].get());
|
||||
if (!(which.isUInt8() || which.isUInt16() || which.isUInt32() || which.isUInt64()))
|
||||
{
|
||||
throw Exception(
|
||||
"The second and third arguments for function " + getName() + " must be one of [UInt8, UInt16, UInt32, UInt64] but one of them has type " + arguments[1]->getName() + ".",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
}
|
||||
return arguments[0];
|
||||
}
|
||||
|
||||
@ -327,13 +327,23 @@ private:
|
||||
bool is_column_const[3];
|
||||
const ColumnAggregateFunction * col_agg_func;
|
||||
const PaddedPODArray<AggregateDataPtr> * container0;
|
||||
const PaddedPODArray<UInt32> * container1, * container2;
|
||||
const PaddedPODArray<UInt64> * container1, * container2;
|
||||
|
||||
ColumnPtr column_holder[2];
|
||||
for (size_t i = 0; i < 3; ++i)
|
||||
{
|
||||
column_ptrs[i] = arguments[i].column.get();
|
||||
if (i > 0)
|
||||
{
|
||||
column_holder[i - 1] = castColumn(arguments[i], std::make_shared<DataTypeUInt64>());
|
||||
column_ptrs[i] = column_holder[i-1].get();
|
||||
}
|
||||
else
|
||||
{
|
||||
column_ptrs[i] = arguments[i].column.get();
|
||||
}
|
||||
is_column_const[i] = isColumnConst(*column_ptrs[i]);
|
||||
}
|
||||
|
||||
if (is_column_const[0])
|
||||
col_agg_func = typeid_cast<const ColumnAggregateFunction*>(typeid_cast<const ColumnConst*>(column_ptrs[0])->getDataColumnPtr().get());
|
||||
else
|
||||
@ -341,13 +351,13 @@ private:
|
||||
|
||||
container0 = &col_agg_func->getData();
|
||||
if (is_column_const[1])
|
||||
container1 = &typeid_cast<const ColumnUInt32*>(typeid_cast<const ColumnConst*>(column_ptrs[1])->getDataColumnPtr().get())->getData();
|
||||
container1 = &typeid_cast<const ColumnUInt64*>(typeid_cast<const ColumnConst*>(column_ptrs[1])->getDataColumnPtr().get())->getData();
|
||||
else
|
||||
container1 = &typeid_cast<const ColumnUInt32*>(column_ptrs[1])->getData();
|
||||
container1 = &typeid_cast<const ColumnUInt64*>(column_ptrs[1])->getData();
|
||||
if (is_column_const[2])
|
||||
container2 = &typeid_cast<const ColumnUInt32*>(typeid_cast<const ColumnConst*>(column_ptrs[2])->getDataColumnPtr().get())->getData();
|
||||
container2 = &typeid_cast<const ColumnUInt64*>(typeid_cast<const ColumnConst*>(column_ptrs[2])->getDataColumnPtr().get())->getData();
|
||||
else
|
||||
container2 = &typeid_cast<const ColumnUInt32*>(column_ptrs[2])->getData();
|
||||
container2 = &typeid_cast<const ColumnUInt64*>(column_ptrs[2])->getData();
|
||||
|
||||
auto col_to = ColumnAggregateFunction::create(col_agg_func->getAggregateFunction());
|
||||
col_to->reserve(input_rows_count);
|
||||
@ -357,8 +367,8 @@ private:
|
||||
const AggregateDataPtr data_ptr_0 = is_column_const[0] ? (*container0)[0] : (*container0)[i];
|
||||
const AggregateFunctionGroupBitmapData<T> & bitmap_data_0
|
||||
= *reinterpret_cast<const AggregateFunctionGroupBitmapData<T>*>(data_ptr_0);
|
||||
const UInt32 range_start = is_column_const[1] ? (*container1)[0] : (*container1)[i];
|
||||
const UInt32 range_end = is_column_const[2] ? (*container2)[0] : (*container2)[i];
|
||||
const UInt64 range_start = is_column_const[1] ? (*container1)[0] : (*container1)[i];
|
||||
const UInt64 range_end = is_column_const[2] ? (*container2)[0] : (*container2)[i];
|
||||
|
||||
col_to->insertDefault();
|
||||
AggregateFunctionGroupBitmapData<T> & bitmap_data_2
|
||||
@ -374,7 +384,7 @@ struct BitmapSubsetInRangeImpl
|
||||
public:
|
||||
static constexpr auto name = "bitmapSubsetInRange";
|
||||
template <typename T>
|
||||
static void apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_0, UInt32 range_start, UInt32 range_end, AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
|
||||
static void apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_0, UInt64 range_start, UInt64 range_end, AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
|
||||
{
|
||||
bitmap_data_0.rbs.rb_range(range_start, range_end, bitmap_data_2.rbs);
|
||||
}
|
||||
@ -385,7 +395,7 @@ struct BitmapSubsetLimitImpl
|
||||
public:
|
||||
static constexpr auto name = "bitmapSubsetLimit";
|
||||
template <typename T>
|
||||
static void apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_0, UInt32 range_start, UInt32 range_end, AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
|
||||
static void apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_0, UInt64 range_start, UInt64 range_end, AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
|
||||
{
|
||||
bitmap_data_0.rbs.rb_limit(range_start, range_end, bitmap_data_2.rbs);
|
||||
}
|
||||
@ -418,14 +428,14 @@ public:
|
||||
for (size_t i = 0; i < 2; ++i)
|
||||
{
|
||||
const auto * array_type = typeid_cast<const DataTypeArray *>(arguments[i + 1].get());
|
||||
String msg(i == 0 ? "Second" : "Third");
|
||||
msg += " argument for function " + getName() + " must be an UInt32 array but it has type " + arguments[i + 1]->getName() + ".";
|
||||
String msg = "The second and third arguments for function " + getName() + " must be an one of [Array(UInt8), Array(UInt16), Array(UInt32), Array(UInt64)] but one of them has type " + arguments[i + 1]->getName() + ".";
|
||||
|
||||
if (!array_type)
|
||||
throw Exception(msg, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
auto nested_type = array_type->getNestedType();
|
||||
WhichDataType which(nested_type);
|
||||
if (!which.isUInt32())
|
||||
if (!(which.isUInt8() || which.isUInt16() || which.isUInt32() || which.isUInt64()))
|
||||
throw Exception(msg, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
return arguments[0];
|
||||
@ -461,13 +471,26 @@ private:
|
||||
bool is_column_const[3];
|
||||
const ColumnAggregateFunction * col_agg_func;
|
||||
const PaddedPODArray<AggregateDataPtr> * container0;
|
||||
const ColumnArray * array;
|
||||
|
||||
const ColumnArray * array1;
|
||||
const ColumnArray * array2;
|
||||
|
||||
ColumnPtr column_holder[2];
|
||||
for (size_t i = 0; i < 3; ++i)
|
||||
{
|
||||
column_ptrs[i] = arguments[i].column.get();
|
||||
if (i > 0)
|
||||
{
|
||||
auto array_type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());
|
||||
column_holder[i - 1] = castColumn(arguments[i], array_type);
|
||||
column_ptrs[i] = column_holder[i-1].get();
|
||||
}
|
||||
else
|
||||
{
|
||||
column_ptrs[i] = arguments[i].column.get();
|
||||
}
|
||||
is_column_const[i] = isColumnConst(*column_ptrs[i]);
|
||||
}
|
||||
|
||||
if (is_column_const[0])
|
||||
{
|
||||
col_agg_func = typeid_cast<const ColumnAggregateFunction*>(typeid_cast<const ColumnConst*>(column_ptrs[0])->getDataColumnPtr().get());
|
||||
@ -479,21 +502,20 @@ private:
|
||||
container0 = &col_agg_func->getData();
|
||||
|
||||
if (is_column_const[1])
|
||||
array = typeid_cast<const ColumnArray*>(typeid_cast<const ColumnConst*>(column_ptrs[1])->getDataColumnPtr().get());
|
||||
array1 = typeid_cast<const ColumnArray*>(typeid_cast<const ColumnConst*>(column_ptrs[1])->getDataColumnPtr().get());
|
||||
else
|
||||
{
|
||||
array = typeid_cast<const ColumnArray *>(arguments[1].column.get());
|
||||
}
|
||||
const ColumnArray::Offsets & from_offsets = array->getOffsets();
|
||||
const ColumnVector<UInt32>::Container & from_container = typeid_cast<const ColumnVector<UInt32> *>(&array->getData())->getData();
|
||||
array1 = typeid_cast<const ColumnArray *>(column_ptrs[1]);
|
||||
|
||||
const ColumnArray::Offsets & from_offsets = array1->getOffsets();
|
||||
const ColumnVector<UInt64>::Container & from_container = typeid_cast<const ColumnVector<UInt64> *>(&array1->getData())->getData();
|
||||
|
||||
if (is_column_const[2])
|
||||
array = typeid_cast<const ColumnArray*>(typeid_cast<const ColumnConst*>(column_ptrs[2])->getDataColumnPtr().get());
|
||||
array2 = typeid_cast<const ColumnArray*>(typeid_cast<const ColumnConst*>(column_ptrs[2])->getDataColumnPtr().get());
|
||||
else
|
||||
array = typeid_cast<const ColumnArray *>(arguments[2].column.get());
|
||||
array2 = typeid_cast<const ColumnArray *>(column_ptrs[2]);
|
||||
|
||||
const ColumnArray::Offsets & to_offsets = array->getOffsets();
|
||||
const ColumnVector<UInt32>::Container & to_container = typeid_cast<const ColumnVector<UInt32> *>(&array->getData())->getData();
|
||||
const ColumnArray::Offsets & to_offsets = array2->getOffsets();
|
||||
const ColumnVector<UInt64>::Container & to_container = typeid_cast<const ColumnVector<UInt64> *>(&array2->getData())->getData();
|
||||
auto col_to = ColumnAggregateFunction::create(col_agg_func->getAggregateFunction());
|
||||
col_to->reserve(input_rows_count);
|
||||
|
||||
@ -526,6 +548,7 @@ private:
|
||||
to_start = i == 0 ? 0 : to_offsets[i - 1];
|
||||
to_end = to_offsets[i];
|
||||
}
|
||||
|
||||
if (from_end - from_start != to_end - to_start)
|
||||
throw Exception("From array size and to array size mismatch", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
@ -724,10 +747,11 @@ public:
|
||||
throw Exception(
|
||||
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
const auto * arg_type1 = typeid_cast<const DataTypeNumber<UInt32> *>(arguments[1].get());
|
||||
if (!(arg_type1))
|
||||
|
||||
WhichDataType which(arguments[1].get());
|
||||
if (!(which.isUInt8() || which.isUInt16() || which.isUInt32() || which.isUInt64()))
|
||||
throw Exception(
|
||||
"Second argument for function " + getName() + " must be UInt32 but it has type " + arguments[1]->getName() + ".",
|
||||
"Second argument for function " + getName() + " must be one of [UInt8, UInt16, UInt32, UInt64] but it has type " + arguments[1]->getName() + ".",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
return std::make_shared<DataTypeNumber<UInt8>>();
|
||||
@ -765,27 +789,32 @@ private:
|
||||
{
|
||||
const IColumn * column_ptrs[2];
|
||||
bool is_column_const[2];
|
||||
const PaddedPODArray<AggregateDataPtr> * container0;
|
||||
const PaddedPODArray<UInt32> * container1;
|
||||
|
||||
for (size_t i = 0; i < 2; ++i)
|
||||
{
|
||||
column_ptrs[i] = arguments[i].column.get();
|
||||
is_column_const[i] = isColumnConst(*column_ptrs[i]);
|
||||
}
|
||||
const PaddedPODArray<AggregateDataPtr> * container0;
|
||||
const PaddedPODArray<UInt64> * container1;
|
||||
|
||||
column_ptrs[0] = arguments[0].column.get();
|
||||
is_column_const[0] = isColumnConst(*column_ptrs[0]);
|
||||
|
||||
if (is_column_const[0])
|
||||
container0 = &typeid_cast<const ColumnAggregateFunction*>(typeid_cast<const ColumnConst*>(column_ptrs[0])->getDataColumnPtr().get())->getData();
|
||||
else
|
||||
container0 = &typeid_cast<const ColumnAggregateFunction*>(column_ptrs[0])->getData();
|
||||
|
||||
// we can always cast the second column to ColumnUInt64
|
||||
auto uint64_column = castColumn(arguments[1], std::make_shared<DataTypeUInt64>());
|
||||
column_ptrs[1] = uint64_column.get();
|
||||
is_column_const[1] = isColumnConst(*column_ptrs[1]);
|
||||
|
||||
if (is_column_const[1])
|
||||
container1 = &typeid_cast<const ColumnUInt32*>(typeid_cast<const ColumnConst*>(column_ptrs[1])->getDataColumnPtr().get())->getData();
|
||||
container1 = &typeid_cast<const ColumnUInt64*>(typeid_cast<const ColumnConst*>(column_ptrs[1])->getDataColumnPtr().get())->getData();
|
||||
else
|
||||
container1 = &typeid_cast<const ColumnUInt32*>(column_ptrs[1])->getData();
|
||||
container1 = &typeid_cast<const ColumnUInt64*>(column_ptrs[1])->getData();
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
const AggregateDataPtr data_ptr_0 = is_column_const[0] ? (*container0)[0] : (*container0)[i];
|
||||
const UInt32 data1 = is_column_const[1] ? (*container1)[0] : (*container1)[i];
|
||||
const UInt64 data1 = is_column_const[1] ? (*container1)[0] : (*container1)[i];
|
||||
const AggregateFunctionGroupBitmapData<T> & bitmap_data_0
|
||||
= *reinterpret_cast<const AggregateFunctionGroupBitmapData<T> *>(data_ptr_0);
|
||||
vec_to[i] = bitmap_data_0.rbs.rb_contains(data1);
|
||||
|
@ -2263,7 +2263,7 @@ private:
|
||||
|
||||
template <typename ToDataType>
|
||||
std::enable_if_t<IsDataTypeDecimal<ToDataType>, WrapperType>
|
||||
createDecimalWrapper(const DataTypePtr & from_type, const ToDataType * to_type) const
|
||||
createDecimalWrapper(const DataTypePtr & from_type, const ToDataType * to_type, bool requested_result_is_nullable) const
|
||||
{
|
||||
TypeIndex type_index = from_type->getTypeId();
|
||||
UInt32 scale = to_type->getScale();
|
||||
@ -2282,11 +2282,12 @@ private:
|
||||
|
||||
auto wrapper_cast_type = cast_type;
|
||||
|
||||
return [wrapper_cast_type, type_index, scale, to_type]
|
||||
return [wrapper_cast_type, type_index, scale, to_type, requested_result_is_nullable]
|
||||
(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *column_nullable, size_t input_rows_count)
|
||||
{
|
||||
ColumnPtr result_column;
|
||||
auto res = callOnIndexAndDataType<ToDataType>(type_index, [&](const auto & types) -> bool {
|
||||
auto res = callOnIndexAndDataType<ToDataType>(type_index, [&](const auto & types) -> bool
|
||||
{
|
||||
using Types = std::decay_t<decltype(types)>;
|
||||
using LeftDataType = typename Types::LeftType;
|
||||
using RightDataType = typename Types::RightType;
|
||||
@ -2312,6 +2313,19 @@ private:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else if constexpr (std::is_same_v<LeftDataType, DataTypeString>)
|
||||
{
|
||||
if (requested_result_is_nullable)
|
||||
{
|
||||
/// Consistent with CAST(Nullable(String) AS Nullable(Numbers))
|
||||
/// In case when converting to Nullable type, we apply different parsing rule,
|
||||
/// that will not throw an exception but return NULL in case of malformed input.
|
||||
result_column = ConvertImpl<LeftDataType, RightDataType, NameCast, ConvertReturnNullOnErrorTag>::execute(
|
||||
arguments, result_type, input_rows_count, scale);
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
result_column = ConvertImpl<LeftDataType, RightDataType, NameCast>::execute(arguments, result_type, input_rows_count, scale);
|
||||
|
||||
@ -2929,7 +2943,7 @@ private:
|
||||
std::is_same_v<ToDataType, DataTypeDecimal<Decimal256>> ||
|
||||
std::is_same_v<ToDataType, DataTypeDateTime64>)
|
||||
{
|
||||
ret = createDecimalWrapper(from_type, checkAndGetDataType<ToDataType>(to_type.get()));
|
||||
ret = createDecimalWrapper(from_type, checkAndGetDataType<ToDataType>(to_type.get()), requested_result_is_nullable);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
238
src/Functions/decodeXMLComponent.cpp
Normal file
238
src/Functions/decodeXMLComponent.cpp
Normal file
@ -0,0 +1,238 @@
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringToString.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/hex.h>
|
||||
#include <common/find_symbols.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
struct DecodeXMLComponentName
|
||||
{
|
||||
static constexpr auto name = "decodeXMLComponent";
|
||||
};
|
||||
|
||||
class FunctionDecodeXMLComponentImpl
|
||||
{
|
||||
public:
|
||||
static void vector(
|
||||
const ColumnString::Chars & data,
|
||||
const ColumnString::Offsets & offsets,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
/// The size of result is always not more than the size of source.
|
||||
/// Because entities decodes to the shorter byte sequence.
|
||||
/// Example: &#xx... &#xx... will decode to UTF-8 byte sequence not longer than 4 bytes.
|
||||
res_data.resize(data.size());
|
||||
|
||||
size_t size = offsets.size();
|
||||
res_offsets.resize(size);
|
||||
|
||||
size_t prev_offset = 0;
|
||||
size_t res_offset = 0;
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
const char * src_data = reinterpret_cast<const char *>(&data[prev_offset]);
|
||||
size_t src_size = offsets[i] - prev_offset;
|
||||
size_t dst_size = execute(src_data, src_size, reinterpret_cast<char *>(res_data.data() + res_offset));
|
||||
|
||||
res_offset += dst_size;
|
||||
res_offsets[i] = res_offset;
|
||||
prev_offset = offsets[i];
|
||||
}
|
||||
|
||||
res_data.resize(res_offset);
|
||||
}
|
||||
|
||||
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
|
||||
{
|
||||
throw Exception("Function decodeXMLComponent cannot work with FixedString argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
private:
|
||||
static const int max_legal_unicode_value = 0x10FFFF;
|
||||
static const int max_decimal_length_of_unicode_point = 7; /// 1114111
|
||||
|
||||
static size_t execute(const char * src, size_t src_size, char * dst)
|
||||
{
|
||||
const char * src_pos = src;
|
||||
const char * src_end = src + src_size;
|
||||
char * dst_pos = dst;
|
||||
|
||||
while (true)
|
||||
{
|
||||
const char * entity_pos = find_first_symbols<'&'>(src_pos, src_end);
|
||||
|
||||
if (entity_pos + strlen("lt;") >= src_end)
|
||||
break;
|
||||
|
||||
/// Copy text between entities.
|
||||
size_t bytes_to_copy = entity_pos - src_pos;
|
||||
memcpySmallAllowReadWriteOverflow15(dst_pos, src_pos, bytes_to_copy);
|
||||
dst_pos += bytes_to_copy;
|
||||
src_pos = entity_pos;
|
||||
|
||||
++entity_pos;
|
||||
|
||||
const char * entity_end = find_first_symbols<';'>(entity_pos, src_end);
|
||||
if (entity_end == src_end)
|
||||
break;
|
||||
|
||||
bool parsed = false;
|
||||
|
||||
/// &#NNNN; or &#xNNNN;
|
||||
uint32_t code_point = 0;
|
||||
if (isValidNumericEntity(entity_pos, entity_end, code_point))
|
||||
{
|
||||
codePointToUTF8(code_point, dst_pos);
|
||||
parsed = true;
|
||||
}
|
||||
else if (entity_end - entity_pos == 2)
|
||||
{
|
||||
if (memcmp(entity_pos, "lt", 2) == 0)
|
||||
{
|
||||
*dst_pos = '<';
|
||||
++dst_pos;
|
||||
parsed = true;
|
||||
}
|
||||
else if (memcmp(entity_pos, "gt", 2) == 0)
|
||||
{
|
||||
*dst_pos = '>';
|
||||
++dst_pos;
|
||||
parsed = true;
|
||||
}
|
||||
}
|
||||
else if (entity_end - entity_pos == 3)
|
||||
{
|
||||
if (memcmp(entity_pos, "amp", 3) == 0)
|
||||
{
|
||||
*dst_pos = '&';
|
||||
++dst_pos;
|
||||
parsed = true;
|
||||
}
|
||||
}
|
||||
else if (entity_end - entity_pos == 4)
|
||||
{
|
||||
if (memcmp(entity_pos, "quot", 4) == 0)
|
||||
{
|
||||
*dst_pos = '"';
|
||||
++dst_pos;
|
||||
parsed = true;
|
||||
}
|
||||
else if (memcmp(entity_pos, "apos", 4) == 0)
|
||||
{
|
||||
*dst_pos = '\'';
|
||||
++dst_pos;
|
||||
parsed = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (parsed)
|
||||
{
|
||||
/// Skip the parsed entity.
|
||||
src_pos = entity_end + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Copy one byte as is and skip it.
|
||||
*dst_pos = *src_pos;
|
||||
++dst_pos;
|
||||
++src_pos;
|
||||
}
|
||||
}
|
||||
|
||||
/// Copy the rest of the string.
|
||||
if (src_pos < src_end)
|
||||
{
|
||||
size_t bytes_to_copy = src_end - src_pos;
|
||||
memcpySmallAllowReadWriteOverflow15(dst_pos, src_pos, bytes_to_copy);
|
||||
dst_pos += bytes_to_copy;
|
||||
}
|
||||
|
||||
return dst_pos - dst;
|
||||
}
|
||||
|
||||
static void codePointToUTF8(uint32_t code_point, char *& dst_pos)
|
||||
{
|
||||
if (code_point < (1 << 7))
|
||||
{
|
||||
dst_pos[0] = (code_point & 0x7F);
|
||||
++dst_pos;
|
||||
}
|
||||
else if (code_point < (1 << 11))
|
||||
{
|
||||
dst_pos[0] = ((code_point >> 6) & 0x1F) + 0xC0;
|
||||
dst_pos[1] = (code_point & 0x3F) + 0x80;
|
||||
dst_pos += 2;
|
||||
}
|
||||
else if (code_point < (1 << 16))
|
||||
{
|
||||
dst_pos[0] = ((code_point >> 12) & 0x0F) + 0xE0;
|
||||
dst_pos[1] = ((code_point >> 6) & 0x3F) + 0x80;
|
||||
dst_pos[2] = (code_point & 0x3F) + 0x80;
|
||||
dst_pos += 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
dst_pos[0] = ((code_point >> 18) & 0x07) + 0xF0;
|
||||
dst_pos[1] = ((code_point >> 12) & 0x3F) + 0x80;
|
||||
dst_pos[2] = ((code_point >> 6) & 0x3F) + 0x80;
|
||||
dst_pos[3] = (code_point & 0x3F) + 0x80;
|
||||
dst_pos += 4;
|
||||
}
|
||||
}
|
||||
|
||||
static bool isValidNumericEntity(const char * src, const char * end, uint32_t & code_point)
|
||||
{
|
||||
if (src + strlen("#") >= end)
|
||||
return false;
|
||||
|
||||
if (src[0] != '#' || (end - src > 1 + max_decimal_length_of_unicode_point))
|
||||
return false;
|
||||
|
||||
if (src + 2 < end && (src[1] == 'x' || src[1] == 'X'))
|
||||
{
|
||||
src += 2;
|
||||
for (; src < end; ++src)
|
||||
{
|
||||
if (!isHexDigit(*src))
|
||||
return false;
|
||||
code_point *= 16;
|
||||
code_point += unhex(*src);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
src += 1;
|
||||
for (; src < end; ++src)
|
||||
{
|
||||
if (!isNumericASCII(*src))
|
||||
return false;
|
||||
code_point *= 10;
|
||||
code_point += *src - '0';
|
||||
}
|
||||
}
|
||||
|
||||
return code_point <= max_legal_unicode_value;
|
||||
}
|
||||
};
|
||||
|
||||
using FunctionDecodeXMLComponent = FunctionStringToString<FunctionDecodeXMLComponentImpl, DecodeXMLComponentName>;
|
||||
|
||||
}
|
||||
|
||||
void registerFunctionDecodeXMLComponent(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionDecodeXMLComponent>();
|
||||
}
|
||||
}
|
@ -30,6 +30,10 @@ public:
|
||||
|
||||
bool useDefaultImplementationForNulls() const override { return false; }
|
||||
|
||||
/// We should never return LowCardinality result, cause we declare that result is always constant zero.
|
||||
/// (in getResultIfAlwaysReturnsConstantAndHasArguments)
|
||||
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
|
@ -34,6 +34,8 @@ void registerFunctionNormalizeQuery(FunctionFactory &);
|
||||
void registerFunctionNormalizedQueryHash(FunctionFactory &);
|
||||
void registerFunctionCountMatches(FunctionFactory &);
|
||||
void registerFunctionEncodeXMLComponent(FunctionFactory & factory);
|
||||
void registerFunctionDecodeXMLComponent(FunctionFactory & factory);
|
||||
|
||||
|
||||
#if USE_BASE64
|
||||
void registerFunctionBase64Encode(FunctionFactory &);
|
||||
@ -70,6 +72,7 @@ void registerFunctionsString(FunctionFactory & factory)
|
||||
registerFunctionNormalizedQueryHash(factory);
|
||||
registerFunctionCountMatches(factory);
|
||||
registerFunctionEncodeXMLComponent(factory);
|
||||
registerFunctionDecodeXMLComponent(factory);
|
||||
#if USE_BASE64
|
||||
registerFunctionBase64Encode(factory);
|
||||
registerFunctionBase64Decode(factory);
|
||||
|
@ -11,205 +11,144 @@ namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
template <typename A, typename B>
|
||||
struct TupleHammingDistanceImpl
|
||||
{
|
||||
using ResultType = UInt8;
|
||||
|
||||
static void NO_INLINE vectorVector(
|
||||
const PaddedPODArray<A> & a1,
|
||||
const PaddedPODArray<A> & b1,
|
||||
const PaddedPODArray<B> & a2,
|
||||
const PaddedPODArray<B> & b2,
|
||||
PaddedPODArray<ResultType> & c)
|
||||
{
|
||||
size_t size = a1.size();
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
c[i] = apply(a1[i], a2[i]) + apply(b1[i], b2[i]);
|
||||
}
|
||||
|
||||
static void NO_INLINE
|
||||
vectorConstant(const PaddedPODArray<A> & a1, const PaddedPODArray<A> & b1, UInt64 a2, UInt64 b2, PaddedPODArray<ResultType> & c)
|
||||
{
|
||||
size_t size = a1.size();
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
c[i] = apply(a1[i], a2) + apply(b1[i], b2);
|
||||
}
|
||||
|
||||
static void NO_INLINE
|
||||
constantVector(UInt64 a1, UInt64 b1, const PaddedPODArray<B> & a2, const PaddedPODArray<B> & b2, PaddedPODArray<ResultType> & c)
|
||||
{
|
||||
size_t size = a2.size();
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
c[i] = apply(a1, a2[i]) + apply(b1, b2[i]);
|
||||
}
|
||||
|
||||
static ResultType constantConstant(UInt64 a1, UInt64 b1, UInt64 a2, UInt64 b2) { return apply(a1, a2) + apply(b1, b2); }
|
||||
|
||||
private:
|
||||
static inline UInt8 apply(UInt64 a, UInt64 b) { return a != b; }
|
||||
};
|
||||
|
||||
template <typename F>
|
||||
bool castType(const IDataType * type, F && f)
|
||||
{
|
||||
return castTypeToEither<
|
||||
DataTypeInt8,
|
||||
DataTypeInt16,
|
||||
DataTypeInt32,
|
||||
DataTypeInt64,
|
||||
DataTypeUInt8,
|
||||
DataTypeUInt16,
|
||||
DataTypeUInt32,
|
||||
DataTypeUInt64>(type, std::forward<F>(f));
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
static bool castBothTypes(const IDataType * left, const IDataType * right, F && f)
|
||||
{
|
||||
return castType(left, [&](const auto & left_) { return castType(right, [&](const auto & right_) { return f(left_, right_); }); });
|
||||
}
|
||||
|
||||
// tupleHammingDistance function: (Tuple(Integer, Integer), Tuple(Integer, Integer))->0/1/2
|
||||
// in order to avoid code bloating, for non-constant tuple, we make sure that the elements
|
||||
// in the tuple should have same data type, and for constant tuple, elements can be any integer
|
||||
// data type, we cast all of them into UInt64
|
||||
/// tupleHammingDistance function: (Tuple(...), Tuple(...))-> N
|
||||
/// Return the number of non-equal tuple elements
|
||||
class FunctionTupleHammingDistance : public IFunction
|
||||
{
|
||||
private:
|
||||
const Context & context;
|
||||
|
||||
public:
|
||||
static constexpr auto name = "tupleHammingDistance";
|
||||
using ResultType = UInt8;
|
||||
static FunctionPtr create(const Context &) { return std::make_shared<FunctionTupleHammingDistance>(); }
|
||||
|
||||
explicit FunctionTupleHammingDistance(const Context & context_) : context(context_) {}
|
||||
static FunctionPtr create(const Context & context) { return std::make_shared<FunctionTupleHammingDistance>(context); }
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 2; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
Columns getTupleElements(const IColumn & column) const
|
||||
{
|
||||
if (!isTuple(arguments[0]))
|
||||
throw Exception(
|
||||
"Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
if (!isTuple(arguments[1]))
|
||||
throw Exception(
|
||||
"Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
return std::make_shared<DataTypeUInt8>();
|
||||
if (const auto * const_column = typeid_cast<const ColumnConst *>(&column))
|
||||
return convertConstTupleToConstantElements(*const_column);
|
||||
|
||||
if (const auto * column_tuple = typeid_cast<const ColumnTuple *>(&column))
|
||||
{
|
||||
Columns columns(column_tuple->tupleSize());
|
||||
for (size_t i = 0; i < columns.size(); ++i)
|
||||
columns[i] = column_tuple->getColumnPtr(i);
|
||||
return columns;
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument of function {} should be tuples, got {}",
|
||||
getName(), column.getName());
|
||||
}
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
const auto * left_tuple = checkAndGetDataType<DataTypeTuple>(arguments[0].type.get());
|
||||
const auto * right_tuple = checkAndGetDataType<DataTypeTuple>(arguments[1].type.get());
|
||||
|
||||
if (!left_tuple)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument 0 of function {} should be tuples, got {}",
|
||||
getName(), arguments[0].type->getName());
|
||||
|
||||
if (!right_tuple)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument 1 of function {} should be tuples, got {}",
|
||||
getName(), arguments[1].type->getName());
|
||||
|
||||
const auto & left_types = left_tuple->getElements();
|
||||
const auto & right_types = right_tuple->getElements();
|
||||
|
||||
Columns left_elements;
|
||||
Columns right_elements;
|
||||
if (arguments[0].column)
|
||||
left_elements = getTupleElements(*arguments[0].column);
|
||||
if (arguments[1].column)
|
||||
right_elements = getTupleElements(*arguments[1].column);
|
||||
|
||||
if (left_types.size() != right_types.size())
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Expected tuples of the same size as arguments of function {}. Got {} and {}",
|
||||
getName(), arguments[0].type->getName(), arguments[1].type->getName());
|
||||
|
||||
size_t tuple_size = left_types.size();
|
||||
if (tuple_size == 0)
|
||||
return std::make_shared<DataTypeUInt8>();
|
||||
|
||||
auto compare = FunctionFactory::instance().get("notEquals", context);
|
||||
auto plus = FunctionFactory::instance().get("plus", context);
|
||||
DataTypes types(tuple_size);
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
{
|
||||
try
|
||||
{
|
||||
ColumnWithTypeAndName left{left_elements.empty() ? nullptr : left_elements[i], left_types[i], {}};
|
||||
ColumnWithTypeAndName right{right_elements.empty() ? nullptr : right_elements[i], right_types[i], {}};
|
||||
auto elem_compare = compare->build(ColumnsWithTypeAndName{left, right});
|
||||
types[i] = elem_compare->getResultType();
|
||||
}
|
||||
catch (DB::Exception & e)
|
||||
{
|
||||
e.addMessage("While executing function {} for tuple element {}", getName(), i);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
auto res_type = types[0];
|
||||
for (size_t i = 1; i < tuple_size; ++i)
|
||||
{
|
||||
ColumnWithTypeAndName left{res_type, {}};
|
||||
ColumnWithTypeAndName right{types[i], {}};
|
||||
auto plus_elem = plus->build({left, right});
|
||||
res_type = plus_elem->getResultType();
|
||||
}
|
||||
|
||||
return res_type;
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
const ColumnWithTypeAndName & arg1 = arguments[0];
|
||||
const ColumnWithTypeAndName & arg2 = arguments[1];
|
||||
const DataTypeTuple & type1 = static_cast<const DataTypeTuple &>(*arg1.type);
|
||||
const DataTypeTuple & type2 = static_cast<const DataTypeTuple &>(*arg2.type);
|
||||
const auto & left_elems = type1.getElements();
|
||||
const auto & right_elems = type2.getElements();
|
||||
if (left_elems.size() != 2 || right_elems.size() != 2)
|
||||
throw Exception(
|
||||
"Illegal column of arguments of function " + getName() + ", tuple should have exactly two elements.",
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
const auto * left_tuple = checkAndGetDataType<DataTypeTuple>(arguments[0].type.get());
|
||||
const auto * right_tuple = checkAndGetDataType<DataTypeTuple>(arguments[1].type.get());
|
||||
const auto & left_types = left_tuple->getElements();
|
||||
const auto & right_types = right_tuple->getElements();
|
||||
auto left_elements = getTupleElements(*arguments[0].column);
|
||||
auto right_elements = getTupleElements(*arguments[1].column);
|
||||
|
||||
ColumnPtr result_column;
|
||||
size_t tuple_size = left_elements.size();
|
||||
if (tuple_size == 0)
|
||||
return DataTypeUInt8().createColumnConstWithDefaultValue(input_rows_count);
|
||||
|
||||
bool valid = castBothTypes(left_elems[0].get(), right_elems[0].get(), [&](const auto & left, const auto & right)
|
||||
auto compare = FunctionFactory::instance().get("notEquals", context);
|
||||
auto plus = FunctionFactory::instance().get("plus", context);
|
||||
ColumnsWithTypeAndName columns(tuple_size);
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
{
|
||||
using LeftDataType = std::decay_t<decltype(left)>;
|
||||
using RightDataType = std::decay_t<decltype(right)>;
|
||||
using T0 = typename LeftDataType::FieldType;
|
||||
using T1 = typename RightDataType::FieldType;
|
||||
using ColVecT0 = ColumnVector<T0>;
|
||||
using ColVecT1 = ColumnVector<T1>;
|
||||
using ColVecResult = ColumnVector<ResultType>;
|
||||
ColumnWithTypeAndName left{left_elements[i], left_types[i], {}};
|
||||
ColumnWithTypeAndName right{right_elements[i], right_types[i], {}};
|
||||
auto elem_compare = compare->build(ColumnsWithTypeAndName{left, right});
|
||||
columns[i].type = elem_compare->getResultType();
|
||||
columns[i].column = elem_compare->execute({left, right}, columns[i].type, input_rows_count);
|
||||
}
|
||||
|
||||
using OpImpl = TupleHammingDistanceImpl<T0, T1>;
|
||||
auto res = columns[0];
|
||||
for (size_t i = 1; i < tuple_size; ++i)
|
||||
{
|
||||
auto plus_elem = plus->build({res, columns[i]});
|
||||
auto res_type = plus_elem->getResultType();
|
||||
res.column = plus_elem->execute({res, columns[i]}, res_type, input_rows_count);
|
||||
res.type = res_type;
|
||||
}
|
||||
|
||||
// we can not useDefaultImplementationForConstants,
|
||||
// because with that, tupleHammingDistance((10, 300), (10, 20)) does not work,
|
||||
// since 10 has data type UInt8, and 300 has data type UInt16
|
||||
if (const ColumnConst * const_col_left = checkAndGetColumnConst<ColumnTuple>(arg1.column.get()))
|
||||
{
|
||||
if (const ColumnConst * const_col_right = checkAndGetColumnConst<ColumnTuple>(arg2.column.get()))
|
||||
{
|
||||
auto cols1 = convertConstTupleToConstantElements(*const_col_left);
|
||||
auto cols2 = convertConstTupleToConstantElements(*const_col_right);
|
||||
Field a1, b1, a2, b2;
|
||||
cols1[0]->get(0, a1);
|
||||
cols1[1]->get(0, b1);
|
||||
cols2[0]->get(0, a2);
|
||||
cols2[1]->get(0, b2);
|
||||
auto res = OpImpl::constantConstant(a1.get<UInt64>(), b1.get<UInt64>(), a2.get<UInt64>(), b2.get<UInt64>());
|
||||
result_column = DataTypeUInt8().createColumnConst(const_col_left->size(), toField(res));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
typename ColVecResult::MutablePtr col_res = nullptr;
|
||||
col_res = ColVecResult::create();
|
||||
auto & vec_res = col_res->getData();
|
||||
vec_res.resize(input_rows_count);
|
||||
// constant tuple - non-constant tuple
|
||||
if (const ColumnConst * const_col_left = checkAndGetColumnConst<ColumnTuple>(arg1.column.get()))
|
||||
{
|
||||
if (const ColumnTuple * col_right = typeid_cast<const ColumnTuple *>(arg2.column.get()))
|
||||
{
|
||||
auto const_cols = convertConstTupleToConstantElements(*const_col_left);
|
||||
Field a1, b1;
|
||||
const_cols[0]->get(0, a1);
|
||||
const_cols[1]->get(0, b1);
|
||||
auto col_r1 = checkAndGetColumn<ColVecT1>(&col_right->getColumn(0));
|
||||
auto col_r2 = checkAndGetColumn<ColVecT1>(&col_right->getColumn(1));
|
||||
if (col_r1 && col_r2)
|
||||
OpImpl::constantVector(a1.get<UInt64>(), b1.get<UInt64>(), col_r1->getData(), col_r2->getData(), vec_res);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
else if (const ColumnTuple * col_left = typeid_cast<const ColumnTuple *>(arg1.column.get()))
|
||||
{
|
||||
auto col_l1 = checkAndGetColumn<ColVecT0>(&col_left->getColumn(0));
|
||||
auto col_l2 = checkAndGetColumn<ColVecT0>(&col_left->getColumn(1));
|
||||
if (col_l1 && col_l2)
|
||||
{
|
||||
// non-constant tuple - constant tuple
|
||||
if (const ColumnConst * const_col_right = checkAndGetColumnConst<ColumnTuple>(arg2.column.get()))
|
||||
{
|
||||
auto const_cols = convertConstTupleToConstantElements(*const_col_right);
|
||||
Field a2, b2;
|
||||
const_cols[0]->get(0, a2);
|
||||
const_cols[1]->get(0, b2);
|
||||
OpImpl::vectorConstant(col_l1->getData(), col_l2->getData(), a2.get<UInt64>(), a2.get<UInt64>(), vec_res);
|
||||
}
|
||||
// non-constant tuple - non-constant tuple
|
||||
else if (const ColumnTuple * col_right = typeid_cast<const ColumnTuple *>(arg2.column.get()))
|
||||
{
|
||||
auto col_r1 = checkAndGetColumn<ColVecT1>(&col_right->getColumn(0));
|
||||
auto col_r2 = checkAndGetColumn<ColVecT1>(&col_right->getColumn(1));
|
||||
if (col_r1 && col_r2)
|
||||
OpImpl::vectorVector(col_l1->getData(), col_l2->getData(), col_r1->getData(), col_r2->getData(), vec_res);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
result_column = std::move(col_res);
|
||||
return true;
|
||||
});
|
||||
if (!valid)
|
||||
throw Exception(getName() + "'s arguments do not match the expected data types", ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
return result_column;
|
||||
return res.column;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -221,6 +221,7 @@ SRCS(
|
||||
currentUser.cpp
|
||||
dateDiff.cpp
|
||||
date_trunc.cpp
|
||||
decodeXMLComponent.cpp
|
||||
decrypt.cpp
|
||||
defaultValueOfArgumentType.cpp
|
||||
defaultValueOfTypeName.cpp
|
||||
|
@ -61,7 +61,11 @@ template <typename T> WriteBuffer & operator<< (QuoteManipWriteBuffer buf,
|
||||
template <typename T> WriteBuffer & operator<< (DoubleQuoteManipWriteBuffer buf, const T & x) { writeDoubleQuoted(x, buf.get()); return buf; }
|
||||
template <typename T> WriteBuffer & operator<< (BinaryManipWriteBuffer buf, const T & x) { writeBinary(x, buf.get()); return buf; }
|
||||
|
||||
inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, const char * x) { writeAnyEscapedString<'\''>(x, x + strlen(x), buf.get()); return buf; }
|
||||
inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, const String & x) { writeEscapedString(x, buf); return buf; }
|
||||
inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, const std::string_view & x) { writeEscapedString(x, buf); return buf; }
|
||||
inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, const StringRef & x) { writeEscapedString(x, buf); return buf; }
|
||||
inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, const char * x) { writeEscapedString(x, strlen(x), buf); return buf; }
|
||||
|
||||
inline WriteBuffer & operator<< (QuoteManipWriteBuffer buf, const char * x) { writeAnyQuotedString<'\''>(x, x + strlen(x), buf.get()); return buf; }
|
||||
inline WriteBuffer & operator<< (DoubleQuoteManipWriteBuffer buf, const char * x) { writeAnyQuotedString<'"'>(x, x + strlen(x), buf.get()); return buf; }
|
||||
inline WriteBuffer & operator<< (BinaryManipWriteBuffer buf, const char * x) { writeStringBinary(x, buf.get()); return buf; }
|
||||
|
@ -4,13 +4,11 @@ namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int MEMORY_LIMIT_EXCEEDED;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
PeekableReadBuffer::PeekableReadBuffer(ReadBuffer & sub_buf_, size_t start_size_ /*= DBMS_DEFAULT_BUFFER_SIZE*/,
|
||||
size_t unread_limit_ /* = default_limit*/)
|
||||
: BufferWithOwnMemory(start_size_), sub_buf(sub_buf_), unread_limit(unread_limit_)
|
||||
PeekableReadBuffer::PeekableReadBuffer(ReadBuffer & sub_buf_, size_t start_size_ /*= DBMS_DEFAULT_BUFFER_SIZE*/)
|
||||
: BufferWithOwnMemory(start_size_), sub_buf(sub_buf_)
|
||||
{
|
||||
padded &= sub_buf.isPadded();
|
||||
/// Read from sub-buffer
|
||||
@ -191,8 +189,6 @@ void PeekableReadBuffer::checkStateCorrect() const
|
||||
}
|
||||
if (currentlyReadFromOwnMemory() && !peeked_size)
|
||||
throw DB::Exception("Pos in empty own buffer", ErrorCodes::LOGICAL_ERROR);
|
||||
if (unread_limit < memory.size())
|
||||
throw DB::Exception("Size limit exceed", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
void PeekableReadBuffer::resizeOwnMemoryIfNecessary(size_t bytes_to_append)
|
||||
@ -222,16 +218,11 @@ void PeekableReadBuffer::resizeOwnMemoryIfNecessary(size_t bytes_to_append)
|
||||
}
|
||||
else
|
||||
{
|
||||
if (unread_limit < new_size)
|
||||
throw DB::Exception("PeekableReadBuffer: Memory limit exceed", ErrorCodes::MEMORY_LIMIT_EXCEEDED);
|
||||
|
||||
size_t pos_offset = pos - memory.data();
|
||||
|
||||
size_t new_size_amortized = memory.size() * 2;
|
||||
if (new_size_amortized < new_size)
|
||||
new_size_amortized = new_size;
|
||||
else if (unread_limit < new_size_amortized)
|
||||
new_size_amortized = unread_limit;
|
||||
memory.resize(new_size_amortized);
|
||||
|
||||
if (need_update_checkpoint)
|
||||
|
@ -20,8 +20,7 @@ class PeekableReadBuffer : public BufferWithOwnMemory<ReadBuffer>
|
||||
{
|
||||
friend class PeekableReadBufferCheckpoint;
|
||||
public:
|
||||
explicit PeekableReadBuffer(ReadBuffer & sub_buf_, size_t start_size_ = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
size_t unread_limit_ = 16 * DBMS_DEFAULT_BUFFER_SIZE);
|
||||
explicit PeekableReadBuffer(ReadBuffer & sub_buf_, size_t start_size_ = DBMS_DEFAULT_BUFFER_SIZE);
|
||||
|
||||
~PeekableReadBuffer() override;
|
||||
|
||||
@ -95,7 +94,6 @@ private:
|
||||
|
||||
|
||||
ReadBuffer & sub_buf;
|
||||
const size_t unread_limit;
|
||||
size_t peeked_size = 0;
|
||||
Position checkpoint = nullptr;
|
||||
bool checkpoint_in_own_memory = false;
|
||||
|
@ -483,6 +483,10 @@ inline void writeEscapedString(const StringRef & ref, WriteBuffer & buf)
|
||||
writeEscapedString(ref.data, ref.size, buf);
|
||||
}
|
||||
|
||||
inline void writeEscapedString(const std::string_view & ref, WriteBuffer & buf)
|
||||
{
|
||||
writeEscapedString(ref.data(), ref.size(), buf);
|
||||
}
|
||||
|
||||
template <char quote_character>
|
||||
void writeAnyQuotedString(const char * begin, const char * end, WriteBuffer & buf)
|
||||
@ -512,17 +516,31 @@ inline void writeQuotedString(const String & s, WriteBuffer & buf)
|
||||
writeAnyQuotedString<'\''>(s, buf);
|
||||
}
|
||||
|
||||
|
||||
inline void writeQuotedString(const StringRef & ref, WriteBuffer & buf)
|
||||
{
|
||||
writeAnyQuotedString<'\''>(ref, buf);
|
||||
}
|
||||
|
||||
inline void writeQuotedString(const std::string_view & ref, WriteBuffer & buf)
|
||||
{
|
||||
writeAnyQuotedString<'\''>(ref.data(), ref.data() + ref.size(), buf);
|
||||
}
|
||||
|
||||
inline void writeDoubleQuotedString(const String & s, WriteBuffer & buf)
|
||||
{
|
||||
writeAnyQuotedString<'"'>(s, buf);
|
||||
}
|
||||
|
||||
inline void writeDoubleQuotedString(const StringRef & s, WriteBuffer & buf)
|
||||
{
|
||||
writeAnyQuotedString<'"'>(s, buf);
|
||||
}
|
||||
|
||||
inline void writeDoubleQuotedString(const std::string_view & s, WriteBuffer & buf)
|
||||
{
|
||||
writeAnyQuotedString<'"'>(s.data(), s.data() + s.size(), buf);
|
||||
}
|
||||
|
||||
/// Outputs a string in backquotes.
|
||||
inline void writeBackQuotedString(const StringRef & s, WriteBuffer & buf)
|
||||
{
|
||||
@ -901,6 +919,7 @@ writeBinary(const T & x, WriteBuffer & buf) { writePODBinary(x, buf); }
|
||||
|
||||
inline void writeBinary(const String & x, WriteBuffer & buf) { writeStringBinary(x, buf); }
|
||||
inline void writeBinary(const StringRef & x, WriteBuffer & buf) { writeStringBinary(x, buf); }
|
||||
inline void writeBinary(const std::string_view & x, WriteBuffer & buf) { writeStringBinary(x, buf); }
|
||||
inline void writeBinary(const Int128 & x, WriteBuffer & buf) { writePODBinary(x, buf); }
|
||||
inline void writeBinary(const UInt128 & x, WriteBuffer & buf) { writePODBinary(x, buf); }
|
||||
inline void writeBinary(const DummyUInt256 & x, WriteBuffer & buf) { writePODBinary(x, buf); }
|
||||
@ -1001,6 +1020,10 @@ writeQuoted(const T & x, WriteBuffer & buf) { writeText(x, buf); }
|
||||
|
||||
inline void writeQuoted(const String & x, WriteBuffer & buf) { writeQuotedString(x, buf); }
|
||||
|
||||
inline void writeQuoted(const std::string_view & x, WriteBuffer & buf) { writeQuotedString(x, buf); }
|
||||
|
||||
inline void writeQuoted(const StringRef & x, WriteBuffer & buf) { writeQuotedString(x, buf); }
|
||||
|
||||
inline void writeQuoted(const LocalDate & x, WriteBuffer & buf)
|
||||
{
|
||||
writeChar('\'', buf);
|
||||
@ -1043,6 +1066,10 @@ writeDoubleQuoted(const T & x, WriteBuffer & buf) { writeText(x, buf); }
|
||||
|
||||
inline void writeDoubleQuoted(const String & x, WriteBuffer & buf) { writeDoubleQuotedString(x, buf); }
|
||||
|
||||
inline void writeDoubleQuoted(const std::string_view & x, WriteBuffer & buf) { writeDoubleQuotedString(x, buf); }
|
||||
|
||||
inline void writeDoubleQuoted(const StringRef & x, WriteBuffer & buf) { writeDoubleQuotedString(x, buf); }
|
||||
|
||||
inline void writeDoubleQuoted(const LocalDate & x, WriteBuffer & buf)
|
||||
{
|
||||
writeChar('"', buf);
|
||||
|
82
src/IO/tests/gtest_manip.cpp
Normal file
82
src/IO/tests/gtest_manip.cpp
Normal file
@ -0,0 +1,82 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <common/StringRef.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
|
||||
using namespace DB;
|
||||
|
||||
template <typename T, typename U>
|
||||
void checkString(const T & str, U manip, const std::string & expected)
|
||||
{
|
||||
WriteBufferFromOwnString buf;
|
||||
|
||||
buf << manip << str;
|
||||
EXPECT_EQ(expected, buf.str()) << "str type:" << typeid(str).name();
|
||||
}
|
||||
|
||||
TEST(OperatorsManipTest, EscapingTest)
|
||||
{
|
||||
checkString("Hello 'world'", escape, "Hello \\'world\\'");
|
||||
checkString("Hello \\world\\", escape, "Hello \\\\world\\\\"); // NOLINT
|
||||
|
||||
std::string s1 = "Hello 'world'";
|
||||
checkString(s1, escape, "Hello \\'world\\'");
|
||||
std::string s2 = "Hello \\world\\";
|
||||
checkString(s2, escape, "Hello \\\\world\\\\"); // NOLINT
|
||||
|
||||
std::string_view sv1 = s1;
|
||||
checkString(sv1, escape, "Hello \\'world\\'");
|
||||
std::string_view sv2 = s2;
|
||||
checkString(sv2, escape, "Hello \\\\world\\\\"); // NOLINT
|
||||
|
||||
StringRef sr1 = s1;
|
||||
checkString(sr1, escape, "Hello \\'world\\'");
|
||||
StringRef sr2 = s2;
|
||||
checkString(sr2, escape, "Hello \\\\world\\\\"); // NOLINT
|
||||
}
|
||||
|
||||
TEST(OperatorsManipTest, QuouteTest)
|
||||
{
|
||||
checkString("Hello 'world'", quote, "'Hello \\'world\\''");
|
||||
|
||||
std::string s1 = "Hello 'world'";
|
||||
checkString(s1, quote, "'Hello \\'world\\''");
|
||||
|
||||
std::string_view sv1 = s1;
|
||||
checkString(sv1, quote, "'Hello \\'world\\''");
|
||||
|
||||
StringRef sr1 = s1;
|
||||
checkString(sr1, quote, "'Hello \\'world\\''");
|
||||
}
|
||||
|
||||
TEST(OperatorsManipTest, DoubleQuouteTest)
|
||||
{
|
||||
checkString("Hello 'world'", double_quote, "\"Hello 'world'\"");
|
||||
|
||||
std::string s1 = "Hello 'world'";
|
||||
checkString(s1, double_quote, "\"Hello 'world'\"");
|
||||
|
||||
std::string_view sv1 = s1;
|
||||
checkString(sv1, double_quote, "\"Hello 'world'\"");
|
||||
|
||||
StringRef sr1 = s1;
|
||||
checkString(sr1, double_quote, "\"Hello 'world'\"");
|
||||
}
|
||||
|
||||
TEST(OperatorsManipTest, binary)
|
||||
{
|
||||
checkString("Hello", binary, "\x5Hello");
|
||||
|
||||
std::string s1 = "Hello";
|
||||
checkString(s1, binary, "\x5Hello");
|
||||
|
||||
std::string_view sv1 = s1;
|
||||
checkString(sv1, binary, "\x5Hello");
|
||||
|
||||
StringRef sr1 = s1;
|
||||
checkString(sr1, binary, "\x5Hello");
|
||||
}
|
@ -9,7 +9,6 @@
|
||||
namespace DB::ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int MEMORY_LIMIT_EXCEEDED;
|
||||
}
|
||||
|
||||
static void readAndAssert(DB::ReadBuffer & buf, const char * str)
|
||||
@ -40,7 +39,7 @@ try
|
||||
DB::ReadBufferFromString b4(s4);
|
||||
|
||||
DB::ConcatReadBuffer concat({&b1, &b2, &b3, &b4});
|
||||
DB::PeekableReadBuffer peekable(concat, 0, 16);
|
||||
DB::PeekableReadBuffer peekable(concat, 0);
|
||||
|
||||
ASSERT_TRUE(!peekable.eof());
|
||||
assertAvailable(peekable, "0123456789");
|
||||
@ -48,6 +47,8 @@ try
|
||||
DB::PeekableReadBufferCheckpoint checkpoint{peekable};
|
||||
readAndAssert(peekable, "01234");
|
||||
}
|
||||
|
||||
#ifndef ABORT_ON_LOGICAL_ERROR
|
||||
bool exception = false;
|
||||
try
|
||||
{
|
||||
@ -60,6 +61,7 @@ try
|
||||
exception = true;
|
||||
}
|
||||
ASSERT_TRUE(exception);
|
||||
#endif
|
||||
assertAvailable(peekable, "56789");
|
||||
|
||||
readAndAssert(peekable, "56");
|
||||
@ -70,19 +72,10 @@ try
|
||||
peekable.dropCheckpoint();
|
||||
assertAvailable(peekable, "789");
|
||||
|
||||
exception = false;
|
||||
try
|
||||
{
|
||||
DB::PeekableReadBufferCheckpoint checkpoint{peekable, true};
|
||||
peekable.ignore(30);
|
||||
peekable.ignore(20);
|
||||
}
|
||||
catch (DB::Exception & e)
|
||||
{
|
||||
if (e.code() != DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED)
|
||||
throw;
|
||||
exception = true;
|
||||
}
|
||||
ASSERT_TRUE(exception);
|
||||
assertAvailable(peekable, "789qwertyuiop");
|
||||
|
||||
readAndAssert(peekable, "789qwertyu");
|
||||
|
@ -436,12 +436,6 @@ void ActionsDAG::project(const NamesWithAliases & projection)
|
||||
settings.projected_output = true;
|
||||
}
|
||||
|
||||
void ActionsDAG::removeColumn(const std::string & column_name)
|
||||
{
|
||||
auto & node = getNode(column_name);
|
||||
index.remove(&node);
|
||||
}
|
||||
|
||||
bool ActionsDAG::tryRestoreColumn(const std::string & column_name)
|
||||
{
|
||||
if (index.contains(column_name))
|
||||
@ -550,6 +544,11 @@ std::string ActionsDAG::dumpDAG() const
|
||||
out << "\n";
|
||||
}
|
||||
|
||||
out << "Index:";
|
||||
for (const auto * node : index)
|
||||
out << ' ' << map[node];
|
||||
out << '\n';
|
||||
|
||||
return out.str();
|
||||
}
|
||||
|
||||
@ -698,7 +697,8 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
|
||||
/// Will store merged result in `first`.
|
||||
|
||||
/// This map contains nodes which should be removed from `first` index, cause they are used as inputs for `second`.
|
||||
std::unordered_set<Node *> removed_first_result;
|
||||
/// The second element is the number of removes (cause one node may be repeated several times in result).
|
||||
std::unordered_map<Node *, size_t> removed_first_result;
|
||||
/// Map inputs of `second` to nodes of `first`.
|
||||
std::unordered_map<Node *, Node *> inputs_map;
|
||||
|
||||
@ -723,7 +723,7 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
|
||||
else
|
||||
{
|
||||
inputs_map[node] = it->second.front();
|
||||
removed_first_result.emplace(it->second.front());
|
||||
removed_first_result[it->second.front()] += 1;
|
||||
it->second.pop_front();
|
||||
}
|
||||
}
|
||||
@ -767,8 +767,12 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
|
||||
auto cur = it;
|
||||
++it;
|
||||
|
||||
if (removed_first_result.count(*cur))
|
||||
auto jt = removed_first_result.find(*cur);
|
||||
if (jt != removed_first_result.end() && jt->second > 0)
|
||||
{
|
||||
first.index.remove(cur);
|
||||
--jt->second;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto * node : second.index)
|
||||
|
@ -80,7 +80,7 @@ public:
|
||||
};
|
||||
|
||||
/// Index is used to:
|
||||
/// * find Node buy it's result_name
|
||||
/// * find Node by it's result_name
|
||||
/// * specify order of columns in result
|
||||
/// It represents a set of available columns.
|
||||
/// Removing of column from index is equivalent to removing of column from final result.
|
||||
@ -133,16 +133,6 @@ public:
|
||||
insert(node);
|
||||
}
|
||||
|
||||
void remove(Node * node)
|
||||
{
|
||||
auto it = map.find(node->result_name);
|
||||
if (it != map.end())
|
||||
return;
|
||||
|
||||
list.erase(it->second);
|
||||
map.erase(it);
|
||||
}
|
||||
|
||||
void remove(std::list<Node *>::iterator it)
|
||||
{
|
||||
auto map_it = map.find((*it)->result_name);
|
||||
@ -219,8 +209,6 @@ public:
|
||||
/// Add alias actions and remove unused columns from index. Also specify result columns order in index.
|
||||
void project(const NamesWithAliases & projection);
|
||||
|
||||
/// Removes column from index.
|
||||
void removeColumn(const std::string & column_name);
|
||||
/// If column is not in index, try to find it in nodes and insert back into index.
|
||||
bool tryRestoreColumn(const std::string & column_name);
|
||||
|
||||
|
100
src/Interpreters/ColumnAliasesVisitor.cpp
Normal file
100
src/Interpreters/ColumnAliasesVisitor.cpp
Normal file
@ -0,0 +1,100 @@
|
||||
#include <Interpreters/ColumnAliasesVisitor.h>
|
||||
#include <Interpreters/IdentifierSemantic.h>
|
||||
#include <Interpreters/RequiredSourceColumnsVisitor.h>
|
||||
#include <Interpreters/addTypeConversionToAST.h>
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
#include <Parsers/ASTSelectWithUnionQuery.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTSubquery.h>
|
||||
#include <Parsers/ASTAlterQuery.h>
|
||||
#include <Parsers/ASTInsertQuery.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
bool ColumnAliasesMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &)
|
||||
{
|
||||
if (const auto * f = node->as<ASTFunction>())
|
||||
{
|
||||
/// "lambda" visits children itself.
|
||||
if (f->name == "lambda")
|
||||
return false;
|
||||
}
|
||||
|
||||
return !(node->as<ASTTableExpression>()
|
||||
|| node->as<ASTSubquery>()
|
||||
|| node->as<ASTArrayJoin>()
|
||||
|| node->as<ASTSelectQuery>()
|
||||
|| node->as<ASTSelectWithUnionQuery>());
|
||||
}
|
||||
|
||||
void ColumnAliasesMatcher::visit(ASTPtr & ast, Data & data)
|
||||
{
|
||||
// If it's select query, only replace filters.
|
||||
if (auto * query = ast->as<ASTSelectQuery>())
|
||||
{
|
||||
if (query->where())
|
||||
Visitor(data).visit(query->refWhere());
|
||||
if (query->prewhere())
|
||||
Visitor(data).visit(query->refPrewhere());
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (auto * node = ast->as<ASTFunction>())
|
||||
{
|
||||
visit(*node, ast, data);
|
||||
return;
|
||||
}
|
||||
|
||||
if (auto * node = ast->as<ASTIdentifier>())
|
||||
{
|
||||
visit(*node, ast, data);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnAliasesMatcher::visit(ASTFunction & node, ASTPtr & /*ast*/, Data & data)
|
||||
{
|
||||
/// Do not add formal parameters of the lambda expression
|
||||
if (node.name == "lambda")
|
||||
{
|
||||
Names local_aliases;
|
||||
auto names_from_lambda = RequiredSourceColumnsMatcher::extractNamesFromLambda(node);
|
||||
for (const auto & name : names_from_lambda)
|
||||
{
|
||||
if (data.private_aliases.insert(name).second)
|
||||
{
|
||||
local_aliases.push_back(name);
|
||||
}
|
||||
}
|
||||
/// visit child with masked local aliases
|
||||
Visitor(data).visit(node.arguments->children[1]);
|
||||
for (const auto & name : local_aliases)
|
||||
data.private_aliases.erase(name);
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnAliasesMatcher::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
|
||||
{
|
||||
if (auto column_name = IdentifierSemantic::getColumnName(node))
|
||||
{
|
||||
if (data.forbidden_columns.count(*column_name) || data.private_aliases.count(*column_name) || !data.columns.has(*column_name))
|
||||
return;
|
||||
|
||||
const auto & col = data.columns.get(*column_name);
|
||||
if (col.default_desc.kind == ColumnDefaultKind::Alias)
|
||||
{
|
||||
ast = addTypeConversionToAST(col.default_desc.expression->clone(), col.type->getName(), data.columns.getAll(), data.context);
|
||||
auto str = queryToString(ast);
|
||||
// revisit ast to track recursive alias columns
|
||||
Visitor(data).visit(ast);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
81
src/Interpreters/ColumnAliasesVisitor.h
Normal file
81
src/Interpreters/ColumnAliasesVisitor.h
Normal file
@ -0,0 +1,81 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/Names.h>
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class IAST;
|
||||
using ASTPtr = std::shared_ptr<IAST>;
|
||||
class IDataType;
|
||||
class ASTFunction;
|
||||
class ASTIdentifier;
|
||||
using DataTypePtr = std::shared_ptr<const IDataType>;
|
||||
|
||||
/// Visits AST node to rewrite alias columns in query
|
||||
/// Currently works only 3 kind ways below
|
||||
|
||||
/// For example:
|
||||
// CREATE TABLE test_table
|
||||
// (
|
||||
// `timestamp` DateTime,
|
||||
// `value` UInt64,
|
||||
// `day` Date ALIAS toDate(timestamp),
|
||||
// `day1` Date ALIAS day + 1,
|
||||
// `day2` Date ALIAS day1 + 1,
|
||||
// `time` DateTime ALIAS timestamp
|
||||
// )ENGINE = MergeTree
|
||||
// PARTITION BY toYYYYMMDD(timestamp)
|
||||
// ORDER BY timestamp SETTINGS index_granularity = 1;
|
||||
|
||||
/// 1. Rewrite the filters in query when enable optimize_respect_aliases
|
||||
/// this could help with `optimize_trivial_count`, Partition Prune in `KeyCondition` and secondary indexes.
|
||||
/// eg: select max(value) from test_table where day2 = today(), filters will be: ((toDate(timestamp) + 1) + 1) = today() .
|
||||
|
||||
/// 2. Alias on alias for `required_columns` extracted in `InterpreterSelectQuery.cpp`, it could help get all dependent physical columns for query.
|
||||
/// eg: select day2 from test_table. `required_columns` can got require columns from the temporary rewritten AST `((toDate(timestamp) + 1) + 1)`.
|
||||
|
||||
/// 3. Help with `optimize_aggregation_in_order` and `optimize_read_in_order` in `ReadInOrderOptimizer.cpp`:
|
||||
/// For queries with alias columns in `orderBy` and `groupBy`, these ASTs will not change.
|
||||
/// But we generate temporary asts and generate temporary Actions to get the `InputOrderInfo`
|
||||
/// eg: select day1 from test_table order by day1;
|
||||
|
||||
|
||||
class ColumnAliasesMatcher
|
||||
{
|
||||
public:
|
||||
using Visitor = InDepthNodeVisitor<ColumnAliasesMatcher, false>;
|
||||
|
||||
struct Data
|
||||
{
|
||||
const ColumnsDescription & columns;
|
||||
|
||||
/// forbidden_columns are from array join, we can't rewrite alias columns involved in array join.
|
||||
/// Do not analyze joined columns.
|
||||
/// They may have aliases and come to description as is.
|
||||
const NameSet & forbidden_columns;
|
||||
const Context & context;
|
||||
|
||||
/// private_aliases are from lambda, so these are local names.
|
||||
NameSet private_aliases;
|
||||
|
||||
Data(const ColumnsDescription & columns_, const NameSet & forbidden_columns_, const Context & context_)
|
||||
: columns(columns_)
|
||||
, forbidden_columns(forbidden_columns_)
|
||||
, context(context_)
|
||||
{}
|
||||
};
|
||||
|
||||
static void visit(ASTPtr & ast, Data & data);
|
||||
static bool needChildVisit(const ASTPtr & node, const ASTPtr & child);
|
||||
|
||||
private:
|
||||
static void visit(ASTIdentifier & node, ASTPtr & ast, Data & data);
|
||||
static void visit(ASTFunction & node, ASTPtr & ast, Data & data);
|
||||
};
|
||||
|
||||
using ColumnAliasesVisitor = ColumnAliasesMatcher::Visitor;
|
||||
|
||||
}
|
@ -26,7 +26,6 @@
|
||||
#include <Storages/MergeTree/MergeTreeSettings.h>
|
||||
#include <Storages/CompressionCodecSelector.h>
|
||||
#include <Storages/StorageS3Settings.h>
|
||||
#include <Storages/LiveView/TemporaryLiveViewCleaner.h>
|
||||
#include <Disks/DiskLocal.h>
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
#include <Interpreters/ActionLocksManager.h>
|
||||
@ -429,7 +428,6 @@ struct ContextShared
|
||||
if (system_logs)
|
||||
system_logs->shutdown();
|
||||
|
||||
TemporaryLiveViewCleaner::shutdown();
|
||||
DatabaseCatalog::shutdown();
|
||||
|
||||
/// Preemptive destruction is important, because these objects may have a refcount to ContextShared (cyclic reference).
|
||||
@ -493,7 +491,6 @@ Context Context::createGlobal(ContextShared * shared)
|
||||
void Context::initGlobal()
|
||||
{
|
||||
DatabaseCatalog::init(*this);
|
||||
TemporaryLiveViewCleaner::init(*this);
|
||||
}
|
||||
|
||||
SharedContextHolder Context::createShared()
|
||||
|
@ -13,7 +13,6 @@
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <Storages/StorageDistributed.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <Interpreters/executeQuery.h>
|
||||
#include <Interpreters/Cluster.h>
|
||||
@ -21,7 +20,6 @@
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Access/AccessRightsElement.h>
|
||||
#include <Access/ContextAccess.h>
|
||||
#include <Common/DNSResolver.h>
|
||||
#include <Common/Macros.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
@ -34,7 +32,6 @@
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Poco/Timestamp.h>
|
||||
#include <Poco/Net/NetException.h>
|
||||
#include <common/sleep.h>
|
||||
#include <common/getFQDNOrHostName.h>
|
||||
#include <pcg_random.hpp>
|
||||
@ -62,107 +59,46 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
String DDLLogEntry::toString()
|
||||
{
|
||||
WriteBufferFromOwnString wb;
|
||||
|
||||
struct HostID
|
||||
{
|
||||
String host_name;
|
||||
UInt16 port;
|
||||
Strings host_id_strings(hosts.size());
|
||||
std::transform(hosts.begin(), hosts.end(), host_id_strings.begin(), HostID::applyToString);
|
||||
|
||||
HostID() = default;
|
||||
|
||||
explicit HostID(const Cluster::Address & address)
|
||||
: host_name(address.host_name), port(address.port) {}
|
||||
|
||||
static HostID fromString(const String & host_port_str)
|
||||
{
|
||||
HostID res;
|
||||
std::tie(res.host_name, res.port) = Cluster::Address::fromString(host_port_str);
|
||||
return res;
|
||||
}
|
||||
|
||||
String toString() const
|
||||
{
|
||||
return Cluster::Address::toString(host_name, port);
|
||||
}
|
||||
|
||||
String readableString() const
|
||||
{
|
||||
return host_name + ":" + DB::toString(port);
|
||||
}
|
||||
|
||||
bool isLocalAddress(UInt16 clickhouse_port) const
|
||||
{
|
||||
try
|
||||
{
|
||||
return DB::isLocalAddress(DNSResolver::instance().resolveAddress(host_name, port), clickhouse_port);
|
||||
}
|
||||
catch (const Poco::Net::NetException &)
|
||||
{
|
||||
/// Avoid "Host not found" exceptions
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static String applyToString(const HostID & host_id)
|
||||
{
|
||||
return host_id.toString();
|
||||
}
|
||||
};
|
||||
auto version = CURRENT_VERSION;
|
||||
wb << "version: " << version << "\n";
|
||||
wb << "query: " << escape << query << "\n";
|
||||
wb << "hosts: " << host_id_strings << "\n";
|
||||
wb << "initiator: " << initiator << "\n";
|
||||
|
||||
return wb.str();
|
||||
}
|
||||
|
||||
|
||||
struct DDLLogEntry
|
||||
void DDLLogEntry::parse(const String & data)
|
||||
{
|
||||
String query;
|
||||
std::vector<HostID> hosts;
|
||||
String initiator; // optional
|
||||
ReadBufferFromString rb(data);
|
||||
|
||||
static constexpr int CURRENT_VERSION = 1;
|
||||
int version;
|
||||
rb >> "version: " >> version >> "\n";
|
||||
|
||||
String toString()
|
||||
{
|
||||
WriteBufferFromOwnString wb;
|
||||
if (version != CURRENT_VERSION)
|
||||
throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unknown DDLLogEntry format version: {}", version);
|
||||
|
||||
Strings host_id_strings(hosts.size());
|
||||
std::transform(hosts.begin(), hosts.end(), host_id_strings.begin(), HostID::applyToString);
|
||||
Strings host_id_strings;
|
||||
rb >> "query: " >> escape >> query >> "\n";
|
||||
rb >> "hosts: " >> host_id_strings >> "\n";
|
||||
|
||||
auto version = CURRENT_VERSION;
|
||||
wb << "version: " << version << "\n";
|
||||
wb << "query: " << escape << query << "\n";
|
||||
wb << "hosts: " << host_id_strings << "\n";
|
||||
wb << "initiator: " << initiator << "\n";
|
||||
if (!rb.eof())
|
||||
rb >> "initiator: " >> initiator >> "\n";
|
||||
else
|
||||
initiator.clear();
|
||||
|
||||
return wb.str();
|
||||
}
|
||||
assertEOF(rb);
|
||||
|
||||
void parse(const String & data)
|
||||
{
|
||||
ReadBufferFromString rb(data);
|
||||
|
||||
int version;
|
||||
rb >> "version: " >> version >> "\n";
|
||||
|
||||
if (version != CURRENT_VERSION)
|
||||
throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unknown DDLLogEntry format version: {}", version);
|
||||
|
||||
Strings host_id_strings;
|
||||
rb >> "query: " >> escape >> query >> "\n";
|
||||
rb >> "hosts: " >> host_id_strings >> "\n";
|
||||
|
||||
if (!rb.eof())
|
||||
rb >> "initiator: " >> initiator >> "\n";
|
||||
else
|
||||
initiator.clear();
|
||||
|
||||
assertEOF(rb);
|
||||
|
||||
hosts.resize(host_id_strings.size());
|
||||
std::transform(host_id_strings.begin(), host_id_strings.end(), hosts.begin(), HostID::fromString);
|
||||
}
|
||||
};
|
||||
hosts.resize(host_id_strings.size());
|
||||
std::transform(host_id_strings.begin(), host_id_strings.end(), hosts.begin(), HostID::fromString);
|
||||
}
|
||||
|
||||
|
||||
struct DDLTask
|
||||
@ -315,7 +251,7 @@ DDLWorker::DDLWorker(int pool_size_, const std::string & zk_root_dir, Context &
|
||||
: context(context_)
|
||||
, log(&Poco::Logger::get("DDLWorker"))
|
||||
, pool_size(pool_size_)
|
||||
, worker_pool(pool_size_)
|
||||
, worker_pool(std::make_unique<ThreadPool>(pool_size))
|
||||
{
|
||||
CurrentMetrics::set(CurrentMetrics::MaxDDLEntryID, 0);
|
||||
last_tasks.reserve(pool_size);
|
||||
@ -352,7 +288,7 @@ DDLWorker::~DDLWorker()
|
||||
stop_flag = true;
|
||||
queue_updated_event->set();
|
||||
cleanup_event->set();
|
||||
worker_pool.wait();
|
||||
worker_pool.reset();
|
||||
main_thread.join();
|
||||
cleanup_thread.join();
|
||||
}
|
||||
@ -517,7 +453,7 @@ void DDLWorker::scheduleTasks()
|
||||
|
||||
if (!already_processed)
|
||||
{
|
||||
worker_pool.scheduleOrThrowOnError([this, task_ptr = task.release()]()
|
||||
worker_pool->scheduleOrThrowOnError([this, task_ptr = task.release()]()
|
||||
{
|
||||
setThreadName("DDLWorkerExec");
|
||||
enqueueTask(DDLTaskPtr(task_ptr));
|
||||
@ -1138,6 +1074,17 @@ String DDLWorker::enqueueQuery(DDLLogEntry & entry)
|
||||
|
||||
void DDLWorker::runMainThread()
|
||||
{
|
||||
auto reset_state = [&](bool reset_pool = true)
|
||||
{
|
||||
/// It will wait for all threads in pool to finish and will not rethrow exceptions (if any).
|
||||
/// We create new thread pool to forget previous exceptions.
|
||||
if (reset_pool)
|
||||
worker_pool = std::make_unique<ThreadPool>(pool_size);
|
||||
/// Clear other in-memory state, like server just started.
|
||||
last_tasks.clear();
|
||||
max_id = 0;
|
||||
};
|
||||
|
||||
setThreadName("DDLWorker");
|
||||
LOG_DEBUG(log, "Started DDLWorker thread");
|
||||
|
||||
@ -1153,7 +1100,12 @@ void DDLWorker::runMainThread()
|
||||
catch (const Coordination::Exception & e)
|
||||
{
|
||||
if (!Coordination::isHardwareError(e.code))
|
||||
throw; /// A logical error.
|
||||
{
|
||||
/// A logical error.
|
||||
LOG_ERROR(log, "ZooKeeper error: {}. Failed to start DDLWorker.",getCurrentExceptionMessage(true));
|
||||
reset_state(false);
|
||||
assert(false); /// Catch such failures in tests with debug build
|
||||
}
|
||||
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
|
||||
@ -1162,8 +1114,8 @@ void DDLWorker::runMainThread()
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log, "Terminating. Cannot initialize DDL queue.");
|
||||
return;
|
||||
tryLogCurrentException(log, "Cannot initialize DDL queue.");
|
||||
reset_state(false);
|
||||
}
|
||||
}
|
||||
while (!initialized && !stop_flag);
|
||||
@ -1192,14 +1144,14 @@ void DDLWorker::runMainThread()
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_ERROR(log, "Unexpected ZooKeeper error: {}. Terminating.", getCurrentExceptionMessage(true));
|
||||
return;
|
||||
LOG_ERROR(log, "Unexpected ZooKeeper error: {}", getCurrentExceptionMessage(true));
|
||||
reset_state();
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log, "Unexpected error, will terminate:");
|
||||
return;
|
||||
tryLogCurrentException(log, "Unexpected error:");
|
||||
reset_state();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,12 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/BlockIO.h>
|
||||
#include <Interpreters/Cluster.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <DataStreams/BlockIO.h>
|
||||
#include <Storages/IStorage_fwd.h>
|
||||
#include <Poco/Net/NetException.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/DNSResolver.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Common/isLocalAddress.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <Storages/IStorage.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
@ -16,24 +19,80 @@
|
||||
|
||||
namespace zkutil
|
||||
{
|
||||
class ZooKeeper;
|
||||
class ZooKeeper;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
class ASTAlterQuery;
|
||||
class AccessRightsElements;
|
||||
struct DDLLogEntry;
|
||||
|
||||
struct HostID
|
||||
{
|
||||
String host_name;
|
||||
UInt16 port;
|
||||
|
||||
HostID() = default;
|
||||
|
||||
explicit HostID(const Cluster::Address & address) : host_name(address.host_name), port(address.port) { }
|
||||
|
||||
static HostID fromString(const String & host_port_str)
|
||||
{
|
||||
HostID res;
|
||||
std::tie(res.host_name, res.port) = Cluster::Address::fromString(host_port_str);
|
||||
return res;
|
||||
}
|
||||
|
||||
String toString() const { return Cluster::Address::toString(host_name, port); }
|
||||
|
||||
String readableString() const { return host_name + ":" + DB::toString(port); }
|
||||
|
||||
bool isLocalAddress(UInt16 clickhouse_port) const
|
||||
{
|
||||
try
|
||||
{
|
||||
return DB::isLocalAddress(DNSResolver::instance().resolveAddress(host_name, port), clickhouse_port);
|
||||
}
|
||||
catch (const Poco::Net::NetException &)
|
||||
{
|
||||
/// Avoid "Host not found" exceptions
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static String applyToString(const HostID & host_id) { return host_id.toString(); }
|
||||
};
|
||||
|
||||
struct DDLLogEntry
|
||||
{
|
||||
String query;
|
||||
std::vector<HostID> hosts;
|
||||
String initiator; // optional
|
||||
|
||||
static constexpr int CURRENT_VERSION = 1;
|
||||
|
||||
public:
|
||||
String toString();
|
||||
void parse(const String & data);
|
||||
};
|
||||
|
||||
struct DDLTask;
|
||||
using DDLTaskPtr = std::unique_ptr<DDLTask>;
|
||||
|
||||
|
||||
/// Pushes distributed DDL query to the queue
|
||||
BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context);
|
||||
BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context, const AccessRightsElements & query_requires_access, bool query_requires_grant_option = false);
|
||||
BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context, AccessRightsElements && query_requires_access, bool query_requires_grant_option = false);
|
||||
BlockIO executeDDLQueryOnCluster(
|
||||
const ASTPtr & query_ptr,
|
||||
const Context & context,
|
||||
const AccessRightsElements & query_requires_access,
|
||||
bool query_requires_grant_option = false);
|
||||
BlockIO executeDDLQueryOnCluster(
|
||||
const ASTPtr & query_ptr,
|
||||
const Context & context,
|
||||
AccessRightsElements && query_requires_access,
|
||||
bool query_requires_grant_option = false);
|
||||
|
||||
|
||||
class DDLWorker
|
||||
@ -127,7 +186,7 @@ private:
|
||||
|
||||
/// Size of the pool for query execution.
|
||||
size_t pool_size = 1;
|
||||
ThreadPool worker_pool;
|
||||
std::unique_ptr<ThreadPool> worker_pool;
|
||||
|
||||
/// Cleaning starts after new node event is received if the last cleaning wasn't made sooner than N seconds ago
|
||||
Int64 cleanup_delay_period = 60; // minute (in seconds)
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <Poco/File.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <Storages/StorageMemory.h>
|
||||
#include <Storages/LiveView/TemporaryLiveViewCleaner.h>
|
||||
#include <Core/BackgroundSchedulePool.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
@ -148,10 +149,16 @@ void DatabaseCatalog::loadDatabases()
|
||||
std::lock_guard lock{tables_marked_dropped_mutex};
|
||||
if (!tables_marked_dropped.empty())
|
||||
(*drop_task)->schedule();
|
||||
|
||||
/// Another background thread which drops temporary LiveViews.
|
||||
/// We should start it after loadMarkedAsDroppedTables() to avoid race condition.
|
||||
TemporaryLiveViewCleaner::instance().startupIfNecessary();
|
||||
}
|
||||
|
||||
void DatabaseCatalog::shutdownImpl()
|
||||
{
|
||||
TemporaryLiveViewCleaner::shutdown();
|
||||
|
||||
if (drop_task)
|
||||
(*drop_task)->deactivate();
|
||||
|
||||
@ -524,6 +531,7 @@ std::unique_ptr<DatabaseCatalog> DatabaseCatalog::database_catalog;
|
||||
DatabaseCatalog::DatabaseCatalog(Context & global_context_)
|
||||
: global_context(global_context_), log(&Poco::Logger::get("DatabaseCatalog"))
|
||||
{
|
||||
TemporaryLiveViewCleaner::init(global_context);
|
||||
}
|
||||
|
||||
DatabaseCatalog & DatabaseCatalog::init(Context & global_context_)
|
||||
|
@ -62,7 +62,7 @@ public:
|
||||
|
||||
using Actions = std::vector<Action>;
|
||||
|
||||
/// This map helps to find input position bu it's name.
|
||||
/// This map helps to find input position by it's name.
|
||||
/// Key is a view to input::result_name.
|
||||
/// Result is a list because it is allowed for inputs to have same names.
|
||||
using NameToInputMap = std::unordered_map<std::string_view, std::list<size_t>>;
|
||||
@ -87,6 +87,7 @@ public:
|
||||
const Actions & getActions() const { return actions; }
|
||||
const std::list<Node> & getNodes() const { return actions_dag->getNodes(); }
|
||||
const ActionsDAG & getActionsDAG() const { return *actions_dag; }
|
||||
const ColumnNumbers & getResultPositions() const { return result_positions; }
|
||||
|
||||
/// Get a list of input columns.
|
||||
Names getRequiredColumns() const;
|
||||
|
@ -1489,23 +1489,6 @@ void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, si
|
||||
columns_to_remove.insert(step.required_output[i]);
|
||||
}
|
||||
|
||||
if (!columns_to_remove.empty())
|
||||
{
|
||||
auto columns = prewhere_info->prewhere_actions->getResultColumns();
|
||||
|
||||
auto remove_actions = std::make_shared<ActionsDAG>();
|
||||
for (const auto & column : columns)
|
||||
{
|
||||
if (columns_to_remove.count(column.name))
|
||||
{
|
||||
remove_actions->addInput(column);
|
||||
remove_actions->removeColumn(column.name);
|
||||
}
|
||||
}
|
||||
|
||||
prewhere_info->remove_columns_actions = std::move(remove_actions);
|
||||
}
|
||||
|
||||
columns_to_remove_after_prewhere = std::move(columns_to_remove);
|
||||
}
|
||||
else if (hasFilter())
|
||||
|
@ -53,6 +53,13 @@ BlockInputStreamPtr InterpreterExistsQuery::executeImpl()
|
||||
result = DatabaseCatalog::instance().isTableExist({database, exists_query->table}, context);
|
||||
}
|
||||
}
|
||||
else if ((exists_query = query_ptr->as<ASTExistsViewQuery>()))
|
||||
{
|
||||
String database = context.resolveDatabase(exists_query->database);
|
||||
context.checkAccess(AccessType::SHOW_TABLES, database, exists_query->table);
|
||||
auto tbl = DatabaseCatalog::instance().tryGetTable({database, exists_query->table}, context);
|
||||
result = tbl != nullptr && tbl->isView();
|
||||
}
|
||||
else if ((exists_query = query_ptr->as<ASTExistsDatabaseQuery>()))
|
||||
{
|
||||
String database = context.resolveDatabase(exists_query->database);
|
||||
|
@ -156,6 +156,10 @@ std::unique_ptr<IInterpreter> InterpreterFactory::get(ASTPtr & query, Context &
|
||||
{
|
||||
return std::make_unique<InterpreterExistsQuery>(query, context);
|
||||
}
|
||||
else if (query->as<ASTExistsViewQuery>())
|
||||
{
|
||||
return std::make_unique<InterpreterExistsQuery>(query, context);
|
||||
}
|
||||
else if (query->as<ASTExistsDictionaryQuery>())
|
||||
{
|
||||
return std::make_unique<InterpreterExistsQuery>(query, context);
|
||||
|
@ -106,7 +106,7 @@ Block InterpreterInsertQuery::getSampleBlock(
|
||||
|
||||
/// The table does not have a column with that name
|
||||
if (!table_sample.has(current_name))
|
||||
throw Exception("No such column " + current_name + " in table " + query.table_id.getNameForLogs(),
|
||||
throw Exception("No such column " + current_name + " in table " + table->getStorageID().getNameForLogs(),
|
||||
ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
|
||||
|
||||
if (!allow_materialized && !table_sample_non_materialized.has(current_name))
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include <Interpreters/JoinedTables.h>
|
||||
#include <Interpreters/OpenTelemetrySpanLog.h>
|
||||
#include <Interpreters/QueryAliasesVisitor.h>
|
||||
#include <Interpreters/replaceAliasColumnsInQuery.h>
|
||||
|
||||
#include <Processors/Pipe.h>
|
||||
#include <Processors/QueryPlan/AddingDelayedSourceStep.h>
|
||||
@ -1223,6 +1224,7 @@ void InterpreterSelectQuery::executeFetchColumns(
|
||||
temp_query_info.query = query_ptr;
|
||||
temp_query_info.syntax_analyzer_result = syntax_analyzer_result;
|
||||
temp_query_info.sets = query_analyzer->getPreparedSets();
|
||||
|
||||
num_rows = storage->totalRowsByPartitionPredicate(temp_query_info, *context);
|
||||
}
|
||||
if (num_rows)
|
||||
@ -1329,9 +1331,12 @@ void InterpreterSelectQuery::executeFetchColumns(
|
||||
if (is_alias)
|
||||
{
|
||||
auto column_decl = storage_columns.get(column);
|
||||
/// TODO: can make CAST only if the type is different (but requires SyntaxAnalyzer).
|
||||
auto cast_column_default = addTypeConversionToAST(column_default->expression->clone(), column_decl.type->getName());
|
||||
column_expr = setAlias(cast_column_default->clone(), column);
|
||||
column_expr = column_default->expression->clone();
|
||||
// recursive visit for alias to alias
|
||||
replaceAliasColumnsInQuery(column_expr, metadata_snapshot->getColumns(), syntax_analyzer_result->getArrayJoinSourceNameSet(), *context);
|
||||
|
||||
column_expr = addTypeConversionToAST(std::move(column_expr), column_decl.type->getName(), metadata_snapshot->getColumns().getAll(), *context);
|
||||
column_expr = setAlias(column_expr, column);
|
||||
}
|
||||
else
|
||||
column_expr = std::make_shared<ASTIdentifier>(column);
|
||||
@ -1543,7 +1548,7 @@ void InterpreterSelectQuery::executeFetchColumns(
|
||||
getSortDescriptionFromGroupBy(query),
|
||||
query_info.syntax_analyzer_result);
|
||||
|
||||
query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot);
|
||||
query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, *context);
|
||||
}
|
||||
|
||||
StreamLocalLimits limits;
|
||||
|
@ -467,8 +467,11 @@ void InterpreterSystemQuery::restartReplicas(Context & system_context)
|
||||
guard.second = catalog.getDDLGuard(guard.first.database_name, guard.first.table_name);
|
||||
|
||||
ThreadPool pool(std::min(size_t(getNumberOfPhysicalCPUCores()), replica_names.size()));
|
||||
for (auto & table : replica_names)
|
||||
pool.scheduleOrThrowOnError([&]() { tryRestartReplica(table, system_context, false); });
|
||||
for (auto & replica : replica_names)
|
||||
{
|
||||
LOG_TRACE(log, "Restarting replica on {}", replica.getNameForLogs());
|
||||
pool.scheduleOrThrowOnError([&]() { tryRestartReplica(replica, system_context, false); });
|
||||
}
|
||||
pool.wait();
|
||||
}
|
||||
|
||||
|
@ -78,6 +78,9 @@ void RewriteAnyFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast, Da
|
||||
|
||||
auto & func_arguments = func.arguments->children;
|
||||
|
||||
if (func_arguments.size() != 1)
|
||||
return;
|
||||
|
||||
const auto * first_arg_func = func_arguments[0]->as<ASTFunction>();
|
||||
if (!first_arg_func || first_arg_func->arguments->children.empty())
|
||||
return;
|
||||
|
@ -230,16 +230,8 @@ void TableJoin::addJoinedColumn(const NameAndTypePair & joined_column)
|
||||
void TableJoin::addJoinedColumnsAndCorrectNullability(ColumnsWithTypeAndName & columns) const
|
||||
{
|
||||
for (auto & col : columns)
|
||||
{
|
||||
/// Materialize column.
|
||||
/// Column is not empty if it is constant, but after Join all constants will be materialized.
|
||||
/// So, we need remove constants from header.
|
||||
if (col.column)
|
||||
col.column = nullptr;
|
||||
|
||||
if (leftBecomeNullable(col.type))
|
||||
col.type = makeNullable(col.type);
|
||||
}
|
||||
|
||||
for (const auto & col : columns_added_by_join)
|
||||
{
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <Interpreters/ExpressionActions.h> /// getSmallestColumn()
|
||||
#include <Interpreters/getTableExpressions.h>
|
||||
#include <Interpreters/TreeOptimizer.h>
|
||||
#include <Interpreters/replaceAliasColumnsInQuery.h>
|
||||
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
@ -427,6 +428,7 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & sele
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::vector<const ASTFunction *> getAggregates(ASTPtr & query, const ASTSelectQuery & select_query)
|
||||
{
|
||||
/// There can not be aggregate functions inside the WHERE and PREWHERE.
|
||||
@ -730,6 +732,13 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
|
||||
required_source_columns.swap(source_columns);
|
||||
}
|
||||
|
||||
NameSet TreeRewriterResult::getArrayJoinSourceNameSet() const
|
||||
{
|
||||
NameSet forbidden_columns;
|
||||
for (const auto & elem : array_join_result_to_source)
|
||||
forbidden_columns.insert(elem.first);
|
||||
return forbidden_columns;
|
||||
}
|
||||
|
||||
TreeRewriterResultPtr TreeRewriter::analyzeSelect(
|
||||
ASTPtr & query,
|
||||
@ -793,6 +802,12 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
|
||||
result.analyzed_join->table_join);
|
||||
collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases);
|
||||
|
||||
/// rewrite filters for select query, must go after getArrayJoinedColumns
|
||||
if (settings.optimize_respect_aliases && result.metadata_snapshot)
|
||||
{
|
||||
replaceAliasColumnsInQuery(query, result.metadata_snapshot->getColumns(), result.getArrayJoinSourceNameSet(), context);
|
||||
}
|
||||
|
||||
result.aggregates = getAggregates(query, *select_query);
|
||||
result.window_function_asts = getWindowFunctions(query, *select_query);
|
||||
result.collectUsedColumns(query, true);
|
||||
|
@ -70,6 +70,7 @@ struct TreeRewriterResult
|
||||
void collectSourceColumns(bool add_special);
|
||||
void collectUsedColumns(const ASTPtr & query, bool is_select);
|
||||
Names requiredSourceColumns() const { return required_source_columns.getNames(); }
|
||||
NameSet getArrayJoinSourceNameSet() const;
|
||||
const Scalars & getScalars() const { return scalars; }
|
||||
};
|
||||
|
||||
|
@ -4,11 +4,20 @@
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTWithAlias.h>
|
||||
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/TreeRewriter.h>
|
||||
#include <Interpreters/ExpressionAnalyzer.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int THERE_IS_NO_DEFAULT_VALUE;
|
||||
}
|
||||
|
||||
ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name)
|
||||
{
|
||||
auto func = makeASTFunction("cast", ast, std::make_shared<ASTLiteral>(type_name));
|
||||
@ -23,4 +32,23 @@ ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name)
|
||||
return func;
|
||||
}
|
||||
|
||||
ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name, const NamesAndTypesList & all_columns, const Context & context)
|
||||
{
|
||||
auto syntax_analyzer_result = TreeRewriter(context).analyze(ast, all_columns);
|
||||
const auto actions = ExpressionAnalyzer(ast, syntax_analyzer_result, context).getActions(true);
|
||||
|
||||
for (const auto & action : actions->getActions())
|
||||
if (action.node->type == ActionsDAG::ActionType::ARRAY_JOIN)
|
||||
throw Exception("Unsupported default value that requires ARRAY JOIN action", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE);
|
||||
|
||||
auto block = actions->getSampleBlock();
|
||||
|
||||
auto desc_type = block.getByName(ast->getColumnName()).type;
|
||||
if (desc_type->getName() != type_name)
|
||||
return addTypeConversionToAST(std::move(ast), type_name);
|
||||
|
||||
return std::move(ast);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -6,8 +6,12 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
class NamesAndTypesList;
|
||||
/// It will produce an expression with CAST to get an AST with the required type.
|
||||
ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name);
|
||||
|
||||
// If same type, then ignore the wrapper of CAST function
|
||||
ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name, const NamesAndTypesList & all_columns, const Context & context);
|
||||
|
||||
}
|
||||
|
16
src/Interpreters/replaceAliasColumnsInQuery.cpp
Normal file
16
src/Interpreters/replaceAliasColumnsInQuery.cpp
Normal file
@ -0,0 +1,16 @@
|
||||
#include <Interpreters/replaceAliasColumnsInQuery.h>
|
||||
#include <Interpreters/ColumnAliasesVisitor.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void replaceAliasColumnsInQuery(ASTPtr & ast, const ColumnsDescription & columns, const NameSet & forbidden_columns, const Context & context)
|
||||
{
|
||||
ColumnAliasesVisitor::Data aliase_column_data(columns, forbidden_columns, context);
|
||||
ColumnAliasesVisitor aliase_column_visitor(aliase_column_data);
|
||||
aliase_column_visitor.visit(ast);
|
||||
}
|
||||
|
||||
}
|
14
src/Interpreters/replaceAliasColumnsInQuery.h
Normal file
14
src/Interpreters/replaceAliasColumnsInQuery.h
Normal file
@ -0,0 +1,14 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/types.h>
|
||||
#include <Core/Names.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ColumnsDescription;
|
||||
class Context;
|
||||
void replaceAliasColumnsInQuery(ASTPtr & ast, const ColumnsDescription & columns, const NameSet & forbidden_columns, const Context & context);
|
||||
|
||||
}
|
@ -37,6 +37,7 @@ SRCS(
|
||||
ClusterProxy/SelectStreamFactory.cpp
|
||||
ClusterProxy/executeQuery.cpp
|
||||
CollectJoinOnKeysVisitor.cpp
|
||||
ColumnAliasesVisitor.cpp
|
||||
Context.cpp
|
||||
CrashLog.cpp
|
||||
CrossToInnerJoinVisitor.cpp
|
||||
@ -157,6 +158,7 @@ SRCS(
|
||||
interpretSubquery.cpp
|
||||
join_common.cpp
|
||||
loadMetadata.cpp
|
||||
replaceAliasColumnsInQuery.cpp
|
||||
processColumnTransformers.cpp
|
||||
sortBlock.cpp
|
||||
|
||||
|
@ -261,11 +261,13 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
ParserIdentifier id_parser;
|
||||
ParserKeyword distinct("DISTINCT");
|
||||
ParserKeyword all("ALL");
|
||||
ParserExpressionList contents(false);
|
||||
ParserSelectWithUnionQuery select;
|
||||
ParserKeyword over("OVER");
|
||||
|
||||
bool has_distinct_modifier = false;
|
||||
bool has_all = false;
|
||||
bool has_distinct = false;
|
||||
|
||||
ASTPtr identifier;
|
||||
ASTPtr query;
|
||||
@ -279,10 +281,34 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
return false;
|
||||
++pos;
|
||||
|
||||
auto pos_after_bracket = pos;
|
||||
auto old_expected = expected;
|
||||
|
||||
if (all.ignore(pos, expected))
|
||||
has_all = true;
|
||||
|
||||
if (distinct.ignore(pos, expected))
|
||||
has_distinct_modifier = true;
|
||||
else
|
||||
has_distinct = true;
|
||||
|
||||
if (!has_all && all.ignore(pos, expected))
|
||||
has_all = true;
|
||||
|
||||
if (has_all && has_distinct)
|
||||
return false;
|
||||
|
||||
if (has_all || has_distinct)
|
||||
{
|
||||
/// case f(ALL), f(ALL, x), f(DISTINCT), f(DISTINCT, x), ALL and DISTINCT should be treat as identifier
|
||||
if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket)
|
||||
{
|
||||
pos = pos_after_bracket;
|
||||
expected = old_expected;
|
||||
has_all = false;
|
||||
has_distinct = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_distinct && !has_all)
|
||||
{
|
||||
auto old_pos = pos;
|
||||
auto maybe_an_subquery = pos->type == TokenType::OpeningRoundBracket;
|
||||
@ -370,14 +396,37 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
++pos;
|
||||
|
||||
/// Parametric aggregate functions cannot have DISTINCT in parameters list.
|
||||
if (has_distinct_modifier)
|
||||
if (has_distinct)
|
||||
return false;
|
||||
|
||||
expr_list_params = expr_list_args;
|
||||
expr_list_args = nullptr;
|
||||
|
||||
pos_after_bracket = pos;
|
||||
old_expected = expected;
|
||||
|
||||
if (all.ignore(pos, expected))
|
||||
has_all = true;
|
||||
|
||||
if (distinct.ignore(pos, expected))
|
||||
has_distinct_modifier = true;
|
||||
has_distinct = true;
|
||||
|
||||
if (!has_all && all.ignore(pos, expected))
|
||||
has_all = true;
|
||||
|
||||
if (has_all && has_distinct)
|
||||
return false;
|
||||
|
||||
if (has_all || has_distinct)
|
||||
{
|
||||
/// case f(ALL), f(ALL, x), f(DISTINCT), f(DISTINCT, x), ALL and DISTINCT should be treat as identifier
|
||||
if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket)
|
||||
{
|
||||
pos = pos_after_bracket;
|
||||
expected = old_expected;
|
||||
has_distinct = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!contents.parse(pos, expr_list_args, expected))
|
||||
return false;
|
||||
@ -391,7 +440,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
tryGetIdentifierNameInto(identifier, function_node->name);
|
||||
|
||||
/// func(DISTINCT ...) is equivalent to funcDistinct(...)
|
||||
if (has_distinct_modifier)
|
||||
if (has_distinct)
|
||||
function_node->name += "Distinct";
|
||||
|
||||
function_node->arguments = expr_list_args;
|
||||
|
@ -30,6 +30,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
node = select_query;
|
||||
|
||||
ParserKeyword s_select("SELECT");
|
||||
ParserKeyword s_all("ALL");
|
||||
ParserKeyword s_distinct("DISTINCT");
|
||||
ParserKeyword s_from("FROM");
|
||||
ParserKeyword s_prewhere("PREWHERE");
|
||||
@ -91,14 +92,24 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
}
|
||||
}
|
||||
|
||||
/// SELECT [DISTINCT] [TOP N [WITH TIES]] expr list
|
||||
/// SELECT [ALL/DISTINCT] [TOP N [WITH TIES]] expr list
|
||||
{
|
||||
bool has_all = false;
|
||||
if (!s_select.ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
if (s_all.ignore(pos, expected))
|
||||
has_all = true;
|
||||
|
||||
if (s_distinct.ignore(pos, expected))
|
||||
select_query->distinct = true;
|
||||
|
||||
if (!has_all && s_all.ignore(pos, expected))
|
||||
has_all = true;
|
||||
|
||||
if (has_all && select_query->distinct)
|
||||
return false;
|
||||
|
||||
if (s_top.ignore(pos, expected))
|
||||
{
|
||||
ParserNumber num;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user