Merge branch 'master' into starts_ends_with_utf8

This commit is contained in:
李扬 2023-08-02 10:29:29 +08:00 committed by GitHub
commit 041af6899d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
481 changed files with 8854 additions and 2935 deletions

View File

@ -3643,7 +3643,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (release-clang)
CHECK_NAME=Unit tests (release)
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
EOF
- name: Download json reports

View File

@ -4541,7 +4541,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (release-clang)
CHECK_NAME=Unit tests (release)
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
EOF
- name: Download json reports

View File

@ -23,7 +23,6 @@
* Added `Overlay` database engine to combine multiple databases into one. Added `Filesystem` database engine to represent a directory in the filesystem as a set of implicitly available tables with auto-detected formats and structures. A new `S3` database engine allows to read-only interact with s3 storage by representing a prefix as a set of tables. A new `HDFS` database engine allows to interact with HDFS storage in the same way. [#48821](https://github.com/ClickHouse/ClickHouse/pull/48821) ([alekseygolub](https://github.com/alekseygolub)).
* Add support for external disks in Keeper for storing snapshots and logs. [#50098](https://github.com/ClickHouse/ClickHouse/pull/50098) ([Antonio Andelic](https://github.com/antonio2368)).
* Add support for multi-directory selection (`{}`) globs. [#50559](https://github.com/ClickHouse/ClickHouse/pull/50559) ([Andrey Zvonov](https://github.com/zvonand)).
* Support ZooKeeper `reconfig` command for ClickHouse Keeper with incremental reconfiguration which can be enabled via `keeper_server.enable_reconfiguration` setting. Support adding servers, removing servers, and changing server priorities. [#49450](https://github.com/ClickHouse/ClickHouse/pull/49450) ([Mike Kot](https://github.com/myrrc)).
* Kafka connector can fetch Avro schema from schema registry with basic authentication using url-encoded credentials. [#49664](https://github.com/ClickHouse/ClickHouse/pull/49664) ([Ilya Golshtein](https://github.com/ilejn)).
* Add function `arrayJaccardIndex` which computes the Jaccard similarity between two arrays. [#50076](https://github.com/ClickHouse/ClickHouse/pull/50076) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)).
* Add a column `is_obsolete` to `system.settings` and similar tables. Closes [#50819](https://github.com/ClickHouse/ClickHouse/issues/50819). [#50826](https://github.com/ClickHouse/ClickHouse/pull/50826) ([flynn](https://github.com/ucasfl)).
@ -124,6 +123,7 @@
* (experimental MaterializedMySQL) Now double quoted comments are supported in MaterializedMySQL. [#52355](https://github.com/ClickHouse/ClickHouse/pull/52355) ([Val Doroshchuk](https://github.com/valbok)).
* Upgrade Intel QPL from v1.1.0 to v1.2.0 2. Upgrade Intel accel-config from v3.5 to v4.0 3. Fixed issue that Device IOTLB miss has big perf. impact for IAA accelerators. [#52180](https://github.com/ClickHouse/ClickHouse/pull/52180) ([jasperzhu](https://github.com/jinjunzh)).
* The `session_timezone` setting (new in version 23.6) is demoted to experimental. [#52445](https://github.com/ClickHouse/ClickHouse/pull/52445) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Support ZooKeeper `reconfig` command for ClickHouse Keeper with incremental reconfiguration which can be enabled via `keeper_server.enable_reconfiguration` setting. Support adding servers, removing servers, and changing server priorities. [#49450](https://github.com/ClickHouse/ClickHouse/pull/49450) ([Mike Kot](https://github.com/myrrc)). It is suspected that this feature is incomplete.
#### Build/Testing/Packaging Improvement
* Add experimental ClickHouse builds for Linux RISC-V 64 to CI. [#31398](https://github.com/ClickHouse/ClickHouse/pull/31398) ([Alexey Milovidov](https://github.com/alexey-milovidov)).

View File

@ -165,8 +165,14 @@ elseif(GLIBC_COMPATIBILITY)
message (${RECONFIGURE_MESSAGE_LEVEL} "Glibc compatibility cannot be enabled in current configuration")
endif ()
# Make sure the final executable has symbols exported
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")
if (OS_LINUX)
# We should not export dynamic symbols, because:
# - The main clickhouse binary does not use dlopen,
# and whatever is poisoning it by LD_PRELOAD should not link to our symbols.
# - The clickhouse-odbc-bridge and clickhouse-library-bridge binaries
# should not expose their symbols to ODBC drivers and libraries.
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic")
endif ()
if (OS_DARWIN)
# The `-all_load` flag forces loading of all symbols from all libraries,

View File

@ -8,6 +8,7 @@
#include <functional>
#include <iosfwd>
#include <base/defines.h>
#include <base/types.h>
#include <base/unaligned.h>
@ -274,6 +275,8 @@ struct CRC32Hash
if (size == 0)
return 0;
chassert(pos);
if (size < 8)
{
return static_cast<unsigned>(hashLessThan8(x.data, x.size));

View File

@ -115,8 +115,15 @@
/// because SIGABRT is easier to debug than SIGTRAP (the second one makes gdb crazy)
#if !defined(chassert)
#if defined(ABORT_ON_LOGICAL_ERROR)
// clang-format off
#include <base/types.h>
namespace DB
{
void abortOnFailedAssertion(const String & description);
}
#define chassert(x) static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x)
#define UNREACHABLE() abort()
// clang-format off
#else
/// Here sizeof() trick is used to suppress unused warning for result,
/// since simple "(void)x" will evaluate the expression, while

View File

@ -57,7 +57,7 @@ public:
URI();
/// Creates an empty URI.
explicit URI(const std::string & uri, bool disable_url_encoding = false);
explicit URI(const std::string & uri, bool enable_url_encoding = true);
/// Parses an URI from the given string. Throws a
/// SyntaxException if the uri is not valid.
@ -362,7 +362,7 @@ private:
std::string _query;
std::string _fragment;
bool _disable_url_encoding = false;
bool _enable_url_encoding = true;
};

View File

@ -36,8 +36,8 @@ URI::URI():
}
URI::URI(const std::string& uri, bool decode_and_encode_path):
_port(0), _disable_url_encoding(decode_and_encode_path)
URI::URI(const std::string& uri, bool enable_url_encoding):
_port(0), _enable_url_encoding(enable_url_encoding)
{
parse(uri);
}
@ -108,7 +108,7 @@ URI::URI(const URI& uri):
_path(uri._path),
_query(uri._query),
_fragment(uri._fragment),
_disable_url_encoding(uri._disable_url_encoding)
_enable_url_encoding(uri._enable_url_encoding)
{
}
@ -121,7 +121,7 @@ URI::URI(const URI& baseURI, const std::string& relativeURI):
_path(baseURI._path),
_query(baseURI._query),
_fragment(baseURI._fragment),
_disable_url_encoding(baseURI._disable_url_encoding)
_enable_url_encoding(baseURI._enable_url_encoding)
{
resolve(relativeURI);
}
@ -153,7 +153,7 @@ URI& URI::operator = (const URI& uri)
_path = uri._path;
_query = uri._query;
_fragment = uri._fragment;
_disable_url_encoding = uri._disable_url_encoding;
_enable_url_encoding = uri._enable_url_encoding;
}
return *this;
}
@ -184,7 +184,7 @@ void URI::swap(URI& uri)
std::swap(_path, uri._path);
std::swap(_query, uri._query);
std::swap(_fragment, uri._fragment);
std::swap(_disable_url_encoding, uri._disable_url_encoding);
std::swap(_enable_url_encoding, uri._enable_url_encoding);
}
@ -687,18 +687,18 @@ void URI::decode(const std::string& str, std::string& decodedStr, bool plusAsSpa
void URI::encodePath(std::string & encodedStr) const
{
if (_disable_url_encoding)
encodedStr = _path;
else
if (_enable_url_encoding)
encode(_path, RESERVED_PATH, encodedStr);
else
encodedStr = _path;
}
void URI::decodePath(const std::string & encodedStr)
{
if (_disable_url_encoding)
_path = encodedStr;
else
if (_enable_url_encoding)
decode(encodedStr, _path);
else
_path = encodedStr;
}
bool URI::isWellKnownPort() const

View File

@ -22,8 +22,9 @@ macro(clickhouse_split_debug_symbols)
# Splits debug symbols into separate file, leaves the binary untouched:
COMMAND "${OBJCOPY_PATH}" --only-keep-debug "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
# Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check:
COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note --keep-section=.clickhouse.hash "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
# Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check.
# Also, after we disabled the export of symbols for dynamic linking, we still to keep a static symbol table for good stack traces.
COMMAND "${STRIP_PATH}" --strip-debug --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
# Associate stripped binary with debug symbols:
COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
COMMENT "Stripping clickhouse binary" VERBATIM

View File

@ -161,5 +161,9 @@
"docker/test/sqllogic": {
"name": "clickhouse/sqllogic-test",
"dependent": []
},
"docker/test/integration/nginx_dav": {
"name": "clickhouse/nginx-dav",
"dependent": []
}
}

View File

@ -64,7 +64,7 @@ then
ninja $NINJA_FLAGS clickhouse-keeper
ls -la ./programs/
ldd ./programs/clickhouse-keeper
ldd ./programs/clickhouse-keeper ||:
if [ -n "$MAKE_DEB" ]; then
# No quotes because I want it to expand to nothing if empty.
@ -80,19 +80,9 @@ else
cmake --debug-trycompile -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
fi
if [ "coverity" == "$COMBINED_OUTPUT" ]
then
mkdir -p /workdir/cov-analysis
wget --post-data "token=$COVERITY_TOKEN&project=ClickHouse%2FClickHouse" -qO- https://scan.coverity.com/download/linux64 | tar xz -C /workdir/cov-analysis --strip-components 1
export PATH=$PATH:/workdir/cov-analysis/bin
cov-configure --config ./coverity.config --template --comptype clangcc --compiler "$CC"
SCAN_WRAPPER="cov-build --config ./coverity.config --dir cov-int"
fi
# No quotes because I want it to expand to nothing if empty.
# shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
$SCAN_WRAPPER ninja $NINJA_FLAGS $BUILD_TARGET
ninja $NINJA_FLAGS $BUILD_TARGET
ls -la ./programs
@ -175,13 +165,6 @@ then
mv "$COMBINED_OUTPUT.tar.zst" /output
fi
if [ "coverity" == "$COMBINED_OUTPUT" ]
then
# Coverity does not understand ZSTD.
tar -cvz -f "coverity-scan.tar.gz" cov-int
mv "coverity-scan.tar.gz" /output
fi
ccache_status
ccache --evict-older-than 1d

View File

@ -253,11 +253,6 @@ def parse_env_variables(
cmake_flags.append(f"-DCMAKE_C_COMPILER={cc}")
cmake_flags.append(f"-DCMAKE_CXX_COMPILER={cxx}")
# Create combined output archive for performance tests.
if package_type == "coverity":
result.append("COMBINED_OUTPUT=coverity")
result.append('COVERITY_TOKEN="$COVERITY_TOKEN"')
if sanitizer:
result.append(f"SANITIZER={sanitizer}")
if build_type:
@ -356,7 +351,7 @@ def parse_args() -> argparse.Namespace:
)
parser.add_argument(
"--package-type",
choices=["deb", "binary", "coverity"],
choices=["deb", "binary"],
required=True,
)
parser.add_argument(

View File

@ -11,6 +11,7 @@ RUN apt-get update \
pv \
ripgrep \
zstd \
locales \
--yes --no-install-recommends
# Sanitizer options for services (clickhouse-server)
@ -28,7 +29,10 @@ ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_de
ENV UBSAN_OPTIONS='print_stacktrace=1'
ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'
ENV TZ=Europe/Moscow
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
ENV LC_ALL en_US.UTF-8
ENV TZ=Europe/Amsterdam
RUN ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone
CMD sleep 1

View File

@ -32,7 +32,7 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \
&& odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \
&& rm -rf /tmp/clickhouse-odbc-tmp
ENV TZ=Europe/Moscow
ENV TZ=Europe/Amsterdam
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
ENV COMMIT_SHA=''

View File

@ -8,7 +8,7 @@ ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
ENV LANG=C.UTF-8
ENV TZ=Europe/Moscow
ENV TZ=Europe/Amsterdam
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN apt-get update \

View File

@ -0,0 +1,6 @@
FROM nginx:alpine-slim
COPY default.conf /etc/nginx/conf.d/
RUN mkdir /usr/share/nginx/files/ \
&& chown nginx: /usr/share/nginx/files/ -R

View File

@ -0,0 +1,25 @@
server {
listen 80;
#root /usr/share/nginx/test.com;
index index.html index.htm;
server_name test.com localhost;
location / {
expires max;
root /usr/share/nginx/files;
client_max_body_size 20m;
client_body_temp_path /usr/share/nginx/tmp;
dav_methods PUT; # Allowed methods, only PUT is necessary
create_full_put_path on; # nginx automatically creates nested directories
dav_access user:rw group:r all:r; # access permissions for files
limit_except GET {
allow all;
}
}
error_page 405 =200 $uri;
}

View File

@ -1,16 +1,15 @@
version: '2.3'
services:
meili1:
image: getmeili/meilisearch:v0.27.0
image: getmeili/meilisearch:v0.27.0
restart: always
ports:
- ${MEILI_EXTERNAL_PORT:-7700}:${MEILI_INTERNAL_PORT:-7700}
meili_secure:
image: getmeili/meilisearch:v0.27.0
image: getmeili/meilisearch:v0.27.0
restart: always
ports:
- ${MEILI_SECURE_EXTERNAL_PORT:-7700}:${MEILI_SECURE_INTERNAL_PORT:-7700}
environment:
MEILI_MASTER_KEY: "password"

View File

@ -9,10 +9,10 @@ services:
DATADIR: /mysql/
expose:
- ${MYSQL_PORT:-3306}
command: --server_id=100
--log-bin='mysql-bin-1.log'
--default-time-zone='+3:00'
--gtid-mode="ON"
command: --server_id=100
--log-bin='mysql-bin-1.log'
--default-time-zone='+3:00'
--gtid-mode="ON"
--enforce-gtid-consistency
--log-error-verbosity=3
--log-error=/mysql/error.log
@ -21,4 +21,4 @@ services:
volumes:
- type: ${MYSQL_LOGS_FS:-tmpfs}
source: ${MYSQL_LOGS:-}
target: /mysql/
target: /mysql/

View File

@ -9,9 +9,9 @@ services:
DATADIR: /mysql/
expose:
- ${MYSQL8_PORT:-3306}
command: --server_id=100 --log-bin='mysql-bin-1.log'
--default_authentication_plugin='mysql_native_password'
--default-time-zone='+3:00' --gtid-mode="ON"
command: --server_id=100 --log-bin='mysql-bin-1.log'
--default_authentication_plugin='mysql_native_password'
--default-time-zone='+3:00' --gtid-mode="ON"
--enforce-gtid-consistency
--log-error-verbosity=3
--log-error=/mysql/error.log
@ -20,4 +20,4 @@ services:
volumes:
- type: ${MYSQL8_LOGS_FS:-tmpfs}
source: ${MYSQL8_LOGS:-}
target: /mysql/
target: /mysql/

View File

@ -9,10 +9,10 @@ services:
DATADIR: /mysql/
expose:
- ${MYSQL_CLUSTER_PORT:-3306}
command: --server_id=100
--log-bin='mysql-bin-2.log'
--default-time-zone='+3:00'
--gtid-mode="ON"
command: --server_id=100
--log-bin='mysql-bin-2.log'
--default-time-zone='+3:00'
--gtid-mode="ON"
--enforce-gtid-consistency
--log-error-verbosity=3
--log-error=/mysql/2_error.log
@ -31,10 +31,10 @@ services:
DATADIR: /mysql/
expose:
- ${MYSQL_CLUSTER_PORT:-3306}
command: --server_id=100
--log-bin='mysql-bin-3.log'
--default-time-zone='+3:00'
--gtid-mode="ON"
command: --server_id=100
--log-bin='mysql-bin-3.log'
--default-time-zone='+3:00'
--gtid-mode="ON"
--enforce-gtid-consistency
--log-error-verbosity=3
--log-error=/mysql/3_error.log
@ -53,10 +53,10 @@ services:
DATADIR: /mysql/
expose:
- ${MYSQL_CLUSTER_PORT:-3306}
command: --server_id=100
--log-bin='mysql-bin-4.log'
--default-time-zone='+3:00'
--gtid-mode="ON"
command: --server_id=100
--log-bin='mysql-bin-4.log'
--default-time-zone='+3:00'
--gtid-mode="ON"
--enforce-gtid-consistency
--log-error-verbosity=3
--log-error=/mysql/4_error.log
@ -65,4 +65,4 @@ services:
volumes:
- type: ${MYSQL_CLUSTER_LOGS_FS:-tmpfs}
source: ${MYSQL_CLUSTER_LOGS:-}
target: /mysql/
target: /mysql/

View File

@ -5,7 +5,7 @@ services:
# Files will be put into /usr/share/nginx/files.
nginx:
image: kssenii/nginx-test:1.1
image: clickhouse/nginx-dav:${DOCKER_NGINX_DAV_TAG:-latest}
restart: always
ports:
- 80:80

View File

@ -12,9 +12,9 @@ services:
timeout: 5s
retries: 5
networks:
default:
aliases:
- postgre-sql.local
default:
aliases:
- postgre-sql.local
environment:
POSTGRES_HOST_AUTH_METHOD: "trust"
POSTGRES_PASSWORD: mysecretpassword

View File

@ -12,7 +12,7 @@ services:
command: ["zkServer.sh", "start-foreground"]
entrypoint: /zookeeper-ssl-entrypoint.sh
volumes:
- type: bind
- type: bind
source: /misc/zookeeper-ssl-entrypoint.sh
target: /zookeeper-ssl-entrypoint.sh
- type: bind
@ -37,7 +37,7 @@ services:
command: ["zkServer.sh", "start-foreground"]
entrypoint: /zookeeper-ssl-entrypoint.sh
volumes:
- type: bind
- type: bind
source: /misc/zookeeper-ssl-entrypoint.sh
target: /zookeeper-ssl-entrypoint.sh
- type: bind
@ -61,7 +61,7 @@ services:
command: ["zkServer.sh", "start-foreground"]
entrypoint: /zookeeper-ssl-entrypoint.sh
volumes:
- type: bind
- type: bind
source: /misc/zookeeper-ssl-entrypoint.sh
target: /zookeeper-ssl-entrypoint.sh
- type: bind

View File

@ -64,15 +64,16 @@ export CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH=/clickhouse-odbc-bridge
export CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH=/clickhouse-library-bridge
export DOCKER_BASE_TAG=${DOCKER_BASE_TAG:=latest}
export DOCKER_HELPER_TAG=${DOCKER_HELPER_TAG:=latest}
export DOCKER_MYSQL_GOLANG_CLIENT_TAG=${DOCKER_MYSQL_GOLANG_CLIENT_TAG:=latest}
export DOCKER_DOTNET_CLIENT_TAG=${DOCKER_DOTNET_CLIENT_TAG:=latest}
export DOCKER_HELPER_TAG=${DOCKER_HELPER_TAG:=latest}
export DOCKER_KERBERIZED_HADOOP_TAG=${DOCKER_KERBERIZED_HADOOP_TAG:=latest}
export DOCKER_KERBEROS_KDC_TAG=${DOCKER_KERBEROS_KDC_TAG:=latest}
export DOCKER_MYSQL_GOLANG_CLIENT_TAG=${DOCKER_MYSQL_GOLANG_CLIENT_TAG:=latest}
export DOCKER_MYSQL_JAVA_CLIENT_TAG=${DOCKER_MYSQL_JAVA_CLIENT_TAG:=latest}
export DOCKER_MYSQL_JS_CLIENT_TAG=${DOCKER_MYSQL_JS_CLIENT_TAG:=latest}
export DOCKER_MYSQL_PHP_CLIENT_TAG=${DOCKER_MYSQL_PHP_CLIENT_TAG:=latest}
export DOCKER_NGINX_DAV_TAG=${DOCKER_NGINX_DAV_TAG:=latest}
export DOCKER_POSTGRESQL_JAVA_CLIENT_TAG=${DOCKER_POSTGRESQL_JAVA_CLIENT_TAG:=latest}
export DOCKER_KERBEROS_KDC_TAG=${DOCKER_KERBEROS_KDC_TAG:=latest}
export DOCKER_KERBERIZED_HADOOP_TAG=${DOCKER_KERBERIZED_HADOOP_TAG:=latest}
cd /ClickHouse/tests/integration
exec "$@"

View File

@ -11,7 +11,7 @@ ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
ENV LANG=C.UTF-8
ENV TZ=Europe/Moscow
ENV TZ=Europe/Amsterdam
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN apt-get update \

View File

@ -52,7 +52,7 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \
&& odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \
&& rm -rf /tmp/clickhouse-odbc-tmp
ENV TZ=Europe/Moscow
ENV TZ=Europe/Amsterdam
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
ENV NUM_TRIES=1

View File

@ -233,4 +233,10 @@ rowNumberInAllBlocks()
LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
# But OOMs in stress test are allowed
if rg 'OOM in dmesg|Signal 9' /test_output/check_status.tsv
then
sed -i 's/failure/success/' /test_output/check_status.tsv
fi
collect_core_dumps

View File

@ -18,9 +18,13 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
python3-pip \
shellcheck \
yamllint \
locales \
&& pip3 install black==23.1.0 boto3 codespell==2.2.1 mypy==1.3.0 PyGithub unidiff pylint==2.6.2 \
&& apt-get clean \
&& rm -rf /root/.cache/pip
&& rm -rf /root/.cache/pip
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
ENV LC_ALL en_US.UTF-8
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH

View File

@ -231,4 +231,10 @@ rowNumberInAllBlocks()
LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
# But OOMs in stress test are allowed
if rg 'OOM in dmesg|Signal 9' /test_output/check_status.tsv
then
sed -i 's/failure/success/' /test_output/check_status.tsv
fi
collect_core_dumps

View File

@ -106,4 +106,4 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da
## Storage Settings {#storage-settings}
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
- [disable_url_encoding](/docs/en/operations/settings/settings.md#disable_url_encoding) -allows to disable decoding/encoding path in uri. Disabled by default.
- [enable_url_encoding](/docs/en/operations/settings/settings.md#enable_url_encoding) - allows to enable/disable decoding/encoding path in uri. Enabled by default.

View File

@ -1723,6 +1723,34 @@ You can select data from a ClickHouse table and save them into some file in the
``` bash
$ clickhouse-client --query = "SELECT * FROM test.hits FORMAT CapnProto SETTINGS format_schema = 'schema:Message'"
```
### Using autogenerated schema {#using-autogenerated-capn-proto-schema}
If you don't have an external CapnProto schema for your data, you can still output/input data in CapnProto format using autogenerated schema.
For example:
```sql
SELECT * FROM test.hits format CapnProto SETTINGS format_capn_proto_use_autogenerated_schema=1
```
In this case ClickHouse will autogenerate CapnProto schema according to the table structure using function [structureToCapnProtoSchema](../sql-reference/functions/other-functions.md#structure_to_capn_proto_schema) and will use this schema to serialize data in CapnProto format.
You can also read CapnProto file with autogenerated schema (in this case the file must be created using the same schema):
```bash
$ cat hits.bin | clickhouse-client --query "INSERT INTO test.hits SETTINGS format_capn_proto_use_autogenerated_schema=1 FORMAT CapnProto"
```
The setting [format_capn_proto_use_autogenerated_schema](../operations/settings/settings-formats.md#format_capn_proto_use_autogenerated_schema) is enabled by default and applies if [format_schema](../operations/settings/settings-formats.md#formatschema-format-schema) is not set.
You can also save autogenerated schema in the file during input/output using setting [output_format_schema](../operations/settings/settings-formats.md#outputformatschema-output-format-schema). For example:
```sql
SELECT * FROM test.hits format CapnProto SETTINGS format_capn_proto_use_autogenerated_schema=1, output_format_schema='path/to/schema/schema.capnp'
```
In this case autogenerated CapnProto schema will be saved in file `path/to/schema/schema.capnp`.
## Prometheus {#prometheus}
Expose metrics in [Prometheus text-based exposition format](https://prometheus.io/docs/instrumenting/exposition_formats/#text-based-format).
@ -1861,6 +1889,33 @@ ClickHouse inputs and outputs protobuf messages in the `length-delimited` format
It means before every message should be written its length as a [varint](https://developers.google.com/protocol-buffers/docs/encoding#varints).
See also [how to read/write length-delimited protobuf messages in popular languages](https://cwiki.apache.org/confluence/display/GEODE/Delimiting+Protobuf+Messages).
### Using autogenerated schema {#using-autogenerated-protobuf-schema}
If you don't have an external Protobuf schema for your data, you can still output/input data in Protobuf format using autogenerated schema.
For example:
```sql
SELECT * FROM test.hits format Protobuf SETTINGS format_protobuf_use_autogenerated_schema=1
```
In this case ClickHouse will autogenerate Protobuf schema according to the table structure using function [structureToProtobufSchema](../sql-reference/functions/other-functions.md#structure_to_protobuf_schema) and will use this schema to serialize data in Protobuf format.
You can also read Protobuf file with autogenerated schema (in this case the file must be created using the same schema):
```bash
$ cat hits.bin | clickhouse-client --query "INSERT INTO test.hits SETTINGS format_protobuf_use_autogenerated_schema=1 FORMAT Protobuf"
```
The setting [format_protobuf_use_autogenerated_schema](../operations/settings/settings-formats.md#format_protobuf_use_autogenerated_schema) is enabled by default and applies if [format_schema](../operations/settings/settings-formats.md#formatschema-format-schema) is not set.
You can also save autogenerated schema in the file during input/output using setting [output_format_schema](../operations/settings/settings-formats.md#outputformatschema-output-format-schema). For example:
```sql
SELECT * FROM test.hits format Protobuf SETTINGS format_protobuf_use_autogenerated_schema=1, output_format_schema='path/to/schema/schema.proto'
```
In this case autogenerated Protobuf schema will be saved in file `path/to/schema/schema.capnp`.
## ProtobufSingle {#protobufsingle}
Same as [Protobuf](#protobuf) but for storing/parsing single Protobuf message without length delimiters.

View File

@ -84,6 +84,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des
- `password` for the file on disk
- `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')`
- `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables
- `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD`
### Usage examples

View File

@ -67,7 +67,7 @@ Substitutions can also be performed from ZooKeeper. To do this, specify the attr
## Encrypting Configuration {#encryption}
You can use symmetric encryption to encrypt a configuration element, for example, a password field. To do so, first configure the [encryption codec](../sql-reference/statements/create/table.md#encryption-codecs), then add attribute `encryption_codec` with the name of the encryption codec as value to the element to encrypt.
You can use symmetric encryption to encrypt a configuration element, for example, a password field. To do so, first configure the [encryption codec](../sql-reference/statements/create/table.md#encryption-codecs), then add attribute `encrypted_by` with the name of the encryption codec as value to the element to encrypt.
Unlike attributes `from_zk`, `from_env` and `incl` (or element `include`), no substitution, i.e. decryption of the encrypted value, is performed in the preprocessed file. Decryption happens only at runtime in the server process.
@ -75,19 +75,22 @@ Example:
```xml
<clickhouse>
<encryption_codecs>
<aes_128_gcm_siv>
<key_hex>00112233445566778899aabbccddeeff</key_hex>
</aes_128_gcm_siv>
</encryption_codecs>
<interserver_http_credentials>
<user>admin</user>
<password encryption_codec="AES_128_GCM_SIV">961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85</password>
<password encrypted_by="AES_128_GCM_SIV">961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85</password>
</interserver_http_credentials>
</clickhouse>
```
To get the encrypted value `encrypt_decrypt` example application may be used.
To encrypt a value, you can use the (example) program `encrypt_decrypt`:
Example:
@ -138,12 +141,17 @@ Here you can see default config written in YAML: [config.yaml.example](https://g
There are some differences between YAML and XML formats in terms of ClickHouse configurations. Here are some tips for writing a configuration in YAML format.
You should use a Scalar node to write a key-value pair:
An XML tag with a text value is represented by a YAML key-value pair
``` yaml
key: value
```
To create a node, containing other nodes you should use a Map:
Corresponding XML:
``` xml
<key>value</value>
```
A nested XML node is represented by a YAML map:
``` yaml
map_key:
key1: val1
@ -151,7 +159,16 @@ map_key:
key3: val3
```
To create a list of values or nodes assigned to one tag you should use a Sequence:
Corresponding XML:
``` xml
<map_key>
<key1>val1</key1>
<key2>val2</key2>
<key3>val3</key3>
</map_key>
```
To create the same XML tag multiple times, use a YAML sequence:
``` yaml
seq_key:
- val1
@ -162,8 +179,22 @@ seq_key:
key3: val5
```
If you want to write an attribute for a Sequence or Map node, you should use a @ prefix before the attribute key. Note, that @ is reserved by YAML standard, so you should also to wrap it into double quotes:
Corresponding XML:
```xml
<seq_key>val1</seq_key>
<seq_key>val2</seq_key>
<seq_key>
<key1>val3</key1>
</seq_key>
<seq_key>
<map>
<key2>val4</key2>
<key3>val5</key3>
</map>
</seq_key>
```
To provide an XML attribute, you can use an attribute key with a `@` prefix. Note that `@` is reserved by YAML standard, so must be wrapped in double quotes:
``` yaml
map:
"@attr1": value1
@ -171,16 +202,14 @@ map:
key: 123
```
From that Map we will get these XML nodes:
Corresponding XML:
``` xml
<map attr1="value1" attr2="value2">
<key>123</key>
</map>
```
You can also set attributes for Sequence:
It is also possible to use attributes in YAML sequence:
``` yaml
seq:
- "@attr1": value1
@ -189,13 +218,25 @@ seq:
- abc
```
So, we can get YAML config equal to this XML one:
Corresponding XML:
``` xml
<seq attr1="value1" attr2="value2">123</seq>
<seq attr1="value1" attr2="value2">abc</seq>
```
The aforementioned syntax does not allow to express XML text nodes with XML attributes as YAML. This special case can be achieved using an
`#text` attribute key:
```yaml
map_key:
"@attr1": value1
"#text": value2
```
Corresponding XML:
```xml
<map_key attr1="value1">value2</map>
```
## Implementation Details {#implementation-details}
For each config file, the server also generates `file-preprocessed.xml` files when starting. These files contain all the completed substitutions and overrides, and they are intended for informational use. If ZooKeeper substitutions were used in the config files but ZooKeeper is not available on the server start, the server loads the configuration from the preprocessed file.

View File

@ -0,0 +1,26 @@
---
slug: /en/operations/optimizing-performance/profile-guided-optimization
sidebar_position: 54
sidebar_label: Profile Guided Optimization (PGO)
---
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
# Profile Guided Optimization
Profile-Guided Optimization (PGO) is a compiler optimization technique where a program is optimized based on the runtime profile.
According to the tests, PGO helps with achieving better performance for ClickHouse. According to the tests, we see improvements up to 15% in QPS on the ClickBench test suite. The more detailed results are available [here](https://pastebin.com/xbue3HMU). The performance benefits depend on your typical workload - you can get better or worse results.
More information about PGO in ClickHouse you can read in the corresponding GitHub [issue](https://github.com/ClickHouse/ClickHouse/issues/44567).
## How to build ClickHouse with PGO?
There are two major kinds of PGO: [Instrumentation](https://clang.llvm.org/docs/UsersManual.html#using-sampling-profilers) and [Sampling](https://clang.llvm.org/docs/UsersManual.html#using-sampling-profilers) (also known as AutoFDO). In this guide is described the Instrumentation PGO with ClickHouse.
1. Build ClickHouse in Instrumented mode. In Clang it can be done via passing `-fprofile-instr-generate` option to `CXXFLAGS`.
2. Run instrumented ClickHouse on a sample workload. Here you need to use your usual workload. One of the approaches could be using [ClickBench](https://github.com/ClickHouse/ClickBench) as a sample workload. ClickHouse in the instrumentation mode could work slowly so be ready for that and do not run instrumented ClickHouse in performance-critical environments.
3. Recompile ClickHouse once again with `-fprofile-instr-use` compiler flags and profiles that are collected from the previous step.
A more detailed guide on how to apply PGO is in the Clang [documentation](https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization).
If you are going to collect a sample workload directly from a production environment, we recommend trying to use Sampling PGO.

View File

@ -61,11 +61,12 @@ use_query_cache = true`) but one should keep in mind that all `SELECT` queries i
may return cached results then.
The query cache can be cleared using statement `SYSTEM DROP QUERY CACHE`. The content of the query cache is displayed in system table
`system.query_cache`. The number of query cache hits and misses are shown as events "QueryCacheHits" and "QueryCacheMisses" in system table
[system.events](system-tables/events.md). Both counters are only updated for `SELECT` queries which run with setting "use_query_cache =
true". Other queries do not affect the cache miss counter. Field `query_log_usage` in system table
[system.query_log](system-tables/query_log.md) shows for each ran query whether the query result was written into or read from the query
cache.
`system.query_cache`. The number of query cache hits and misses since database start are shown as events "QueryCacheHits" and
"QueryCacheMisses" in system table [system.events](system-tables/events.md). Both counters are only updated for `SELECT` queries which run
with setting `use_query_cache = true`, other queries do not affect "QueryCacheMisses". Field `query_log_usage` in system table
[system.query_log](system-tables/query_log.md) shows for each executed query whether the query result was written into or read from the
query cache. Asynchronous metrics "QueryCacheEntries" and "QueryCacheBytes" in system table
[system.asynchronous_metrics](system-tables/asynchronous_metrics.md) show how many entries / bytes the query cache currently contains.
The query cache exists once per ClickHouse server process. However, cache results are by default not shared between users. This can be
changed (see below) but doing so is not recommended for security reasons.

View File

@ -512,7 +512,7 @@ Both the cache for `local_disk`, and temporary data will be stored in `/tiny_loc
<type>cache</type>
<disk>local_disk</disk>
<path>/tiny_local_cache/</path>
<max_size>10M</max_size>
<max_size_rows>10M</max_size_rows>
<max_file_segment_size>1M</max_file_segment_size>
<cache_on_write_operations>1</cache_on_write_operations>
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
@ -1592,6 +1592,10 @@ To manually turn on metrics history collection [`system.metric_log`](../../opera
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
</metric_log>
</clickhouse>
```
@ -1695,6 +1699,14 @@ Use the following parameters to configure logging:
- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined.
- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined.
- `flush_interval_milliseconds` Interval for flushing data from the buffer in memory to the table.
- `max_size_rows` Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk.
Default: 1048576.
- `reserved_size_rows` Pre-allocated memory size in lines for the logs.
Default: 8192.
- `buffer_size_rows_flush_threshold` Lines amount threshold, reaching it launches flushing logs to the disk in background.
Default: `max_size_rows / 2`.
- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash.
Default: false.
- `storage_policy` Name of storage policy to use for the table (optional)
- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional).
@ -1706,6 +1718,10 @@ Use the following parameters to configure logging:
<table>part_log</table>
<partition_by>toMonday(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
</part_log>
```
@ -1773,6 +1789,14 @@ Use the following parameters to configure logging:
- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined.
- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined.
- `flush_interval_milliseconds` Interval for flushing data from the buffer in memory to the table.
- `max_size_rows` Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk.
Default: 1048576.
- `reserved_size_rows` Pre-allocated memory size in lines for the logs.
Default: 8192.
- `buffer_size_rows_flush_threshold` Lines amount threshold, reaching it launches flushing logs to the disk in background.
Default: `max_size_rows / 2`.
- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash.
Default: false.
- `storage_policy` Name of storage policy to use for the table (optional)
- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional).
@ -1786,6 +1810,10 @@ If the table does not exist, ClickHouse will create it. If the structure of the
<table>query_log</table>
<engine>Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day</engine>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
</query_log>
```
@ -1831,6 +1859,14 @@ Use the following parameters to configure logging:
- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined.
- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined.
- `flush_interval_milliseconds` Interval for flushing data from the buffer in memory to the table.
- `max_size_rows` Maximal size in lines for the logs. When non-flushed logs amount reaches max_size_rows, logs dumped to the disk.
Default: 1048576.
- `reserved_size_rows` Pre-allocated memory size in lines for the logs.
Default: 8192.
- `buffer_size_rows_flush_threshold` Lines amount threshold, reaching it launches flushing logs to the disk in background.
Default: `max_size_rows / 2`.
- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash.
Default: false.
- `storage_policy` Name of storage policy to use for the table (optional)
- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional).
@ -1844,6 +1880,10 @@ If the table does not exist, ClickHouse will create it. If the structure of the
<table>query_thread_log</table>
<partition_by>toMonday(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
</query_thread_log>
```
@ -1861,6 +1901,14 @@ Use the following parameters to configure logging:
- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined.
- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined.
- `flush_interval_milliseconds` Interval for flushing data from the buffer in memory to the table.
- `max_size_rows` Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk.
Default: 1048576.
- `reserved_size_rows` Pre-allocated memory size in lines for the logs.
Default: 8192.
- `buffer_size_rows_flush_threshold` Lines amount threshold, reaching it launches flushing logs to the disk in background.
Default: `max_size_rows / 2`.
- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash.
Default: false.
- `storage_policy` Name of storage policy to use for the table (optional)
- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional).
@ -1874,6 +1922,10 @@ If the table does not exist, ClickHouse will create it. If the structure of the
<table>query_views_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
</query_views_log>
```
@ -1890,6 +1942,14 @@ Parameters:
- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined.
- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined.
- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table.
- `max_size_rows` Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk.
Default: 1048576.
- `reserved_size_rows` Pre-allocated memory size in lines for the logs.
Default: 8192.
- `buffer_size_rows_flush_threshold` Lines amount threshold, reaching it launches flushing logs to the disk in background.
Default: `max_size_rows / 2`.
- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash.
Default: false.
- `storage_policy` Name of storage policy to use for the table (optional)
- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional).
@ -1901,13 +1961,16 @@ Parameters:
<database>system</database>
<table>text_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
<!-- <partition_by>event_date</partition_by> -->
<engine>Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day</engine>
</text_log>
</clickhouse>
```
## trace_log {#server_configuration_parameters-trace_log}
Settings for the [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation.
@ -1920,6 +1983,12 @@ Parameters:
- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined.
- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/index.md) for a system table. Can't be used if `partition_by` or `order_by` defined.
- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table.
- `max_size_rows` Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk.
Default: 1048576.
- `reserved_size_rows` Pre-allocated memory size in lines for the logs.
Default: 8192.
- `buffer_size_rows_flush_threshold` Lines amount threshold, reaching it launches flushing logs to the disk in background.
Default: `max_size_rows / 2`.
- `storage_policy` Name of storage policy to use for the table (optional)
- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional).
@ -1931,6 +2000,10 @@ The default server configuration file `config.xml` contains the following settin
<table>trace_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
</trace_log>
```
@ -1945,9 +2018,18 @@ Parameters:
- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined.
- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` defined.
- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table.
- `max_size_rows` Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk.
Default: 1048576.
- `reserved_size_rows` Pre-allocated memory size in lines for the logs.
Default: 8192.
- `buffer_size_rows_flush_threshold` Lines amount threshold, reaching it launches flushing logs to the disk in background.
Default: `max_size_rows / 2`.
- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash.
Default: false.
- `storage_policy` Name of storage policy to use for the table (optional)
**Example**
```xml
<clickhouse>
<asynchronous_insert_log>
@ -1955,11 +2037,53 @@ Parameters:
<table>asynchronous_insert_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<partition_by>toYYYYMM(event_date)</partition_by>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
<!-- <engine>Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day</engine> -->
</asynchronous_insert_log>
</clickhouse>
```
## crash_log {#server_configuration_parameters-crash_log}
Settings for the [crash_log](../../operations/system-tables/crash-log.md) system table operation.
Parameters:
- `database` — Database for storing a table.
- `table` — Table name.
- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined.
- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined.
- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/index.md) for a system table. Can't be used if `partition_by` or `order_by` defined.
- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table.
- `max_size_rows` Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk.
Default: 1048576.
- `reserved_size_rows` Pre-allocated memory size in lines for the logs.
Default: 8192.
- `buffer_size_rows_flush_threshold` Lines amount threshold, reaching it launches flushing logs to the disk in background.
Default: `max_size_rows / 2`.
- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash.
Default: false.
- `storage_policy` Name of storage policy to use for the table (optional)
- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional).
The default server configuration file `config.xml` contains the following settings section:
``` xml
<crash_log>
<database>system</database>
<table>crash_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1024</max_size_rows>
<reserved_size_rows>1024</reserved_size_rows>
<buffer_size_rows_flush_threshold>512</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
</crash_log>
```
## query_masking_rules {#query-masking-rules}
Regexp-based rules, which will be applied to queries as well as all log messages before storing them in server logs,
@ -2164,6 +2288,8 @@ This section contains the following parameters:
- `session_timeout_ms` — Maximum timeout for the client session in milliseconds.
- `operation_timeout_ms` — Maximum timeout for one operation in milliseconds.
- `root` — The [znode](http://zookeeper.apache.org/doc/r3.5.5/zookeeperOver.html#Nodes+and+ephemeral+nodes) that is used as the root for znodes used by the ClickHouse server. Optional.
- `fallback_session_lifetime.min` - If the first zookeeper host resolved by zookeeper_load_balancing strategy is unavailable, limit the lifetime of a zookeeper session to the fallback node. This is done for load-balancing purposes to avoid excessive load on one of zookeeper hosts. This setting sets the minimal duration of the fallback session. Set in seconds. Optional. Default is 3 hours.
- `fallback_session_lifetime.max` - If the first zookeeper host resolved by zookeeper_load_balancing strategy is unavailable, limit the lifetime of a zookeeper session to the fallback node. This is done for load-balancing purposes to avoid excessive load on one of zookeeper hosts. This setting sets the maximum duration of the fallback session. Set in seconds. Optional. Default is 6 hours.
- `identity` — User and password, that can be required by ZooKeeper to give access to requested znodes. Optional.
- zookeeper_load_balancing - Specifies the algorithm of ZooKeeper node selection.
* random - randomly selects one of ZooKeeper nodes.

View File

@ -327,3 +327,39 @@ The maximum amount of data consumed by temporary files on disk in bytes for all
Zero means unlimited.
Default value: 0.
## max_sessions_for_user {#max-sessions-per-user}
Maximum number of simultaneous sessions per authenticated user to the ClickHouse server.
Example:
``` xml
<profiles>
<single_session_profile>
<max_sessions_for_user>1</max_sessions_for_user>
</single_session_profile>
<two_sessions_profile>
<max_sessions_for_user>2</max_sessions_for_user>
</two_sessions_profile>
<unlimited_sessions_profile>
<max_sessions_for_user>0</max_sessions_for_user>
</unlimited_sessions_profile>
</profiles>
<users>
<!-- User Alice can connect to a ClickHouse server no more than once at a time. -->
<Alice>
<profile>single_session_user</profile>
</Alice>
<!-- User Bob can use 2 simultaneous sessions. -->
<Bob>
<profile>two_sessions_profile</profile>
</Bob>
<!-- User Charles can use arbitrarily many of simultaneous sessions. -->
<Charles>
<profile>unlimited_sessions_profile</profile>
</Charles>
</users>
```
Default value: 0 (Infinite count of simultaneous sessions).

View File

@ -321,6 +321,10 @@ If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` ar
This parameter is useful when you are using formats that require a schema definition, such as [Capn Proto](https://capnproto.org/) or [Protobuf](https://developers.google.com/protocol-buffers/). The value depends on the format.
## output_format_schema {#output-format-schema}
The path to the file where the automatically generated schema will be saved in [Capn Proto](../../interfaces/formats.md#capnproto-capnproto) or [Protobuf](../../interfaces/formats.md#protobuf-protobuf) formats.
## output_format_enable_streaming {#output_format_enable_streaming}
Enable streaming in output formats that support it.
@ -1164,7 +1168,7 @@ Enabled by default.
Compression method used in output Arrow format. Supported codecs: `lz4_frame`, `zstd`, `none` (uncompressed)
Default value: `none`.
Default value: `lz4_frame`.
## ORC format settings {#orc-format-settings}
@ -1330,6 +1334,11 @@ When serializing Nullable columns with Google wrappers, serialize default values
Disabled by default.
### format_protobuf_use_autogenerated_schema {#format_capn_proto_use_autogenerated_schema}
Use autogenerated Protobuf schema when [format_schema](#formatschema-format-schema) is not set.
The schema is generated from ClickHouse table structure using function [structureToProtobufSchema](../../sql-reference/functions/other-functions.md#structure_to_protobuf_schema)
## Avro format settings {#avro-format-settings}
### input_format_avro_allow_missing_fields {#input_format_avro_allow_missing_fields}
@ -1626,6 +1635,11 @@ Possible values:
Default value: `'by_values'`.
### format_capn_proto_use_autogenerated_schema {#format_capn_proto_use_autogenerated_schema}
Use autogenerated CapnProto schema when [format_schema](#formatschema-format-schema) is not set.
The schema is generated from ClickHouse table structure using function [structureToCapnProtoSchema](../../sql-reference/functions/other-functions.md#structure_to_capnproto_schema)
## MySQLDump format settings {#musqldump-format-settings}
### input_format_mysql_dump_table_name (#input_format_mysql_dump_table_name)

View File

@ -39,7 +39,7 @@ Example:
<max_threads>8</max_threads>
</default>
<!-- Settings for quries from the user interface -->
<!-- Settings for queries from the user interface -->
<web>
<max_rows_to_read>1000000000</max_rows_to_read>
<max_bytes_to_read>100000000000</max_bytes_to_read>
@ -67,6 +67,8 @@ Example:
<max_ast_depth>50</max_ast_depth>
<max_ast_elements>100</max_ast_elements>
<max_sessions_for_user>4</max_sessions_for_user>
<readonly>1</readonly>
</web>
</profiles>

View File

@ -3468,11 +3468,11 @@ Possible values:
Default value: `0`.
## disable_url_encoding {#disable_url_encoding}
## enable_url_encoding {#enable_url_encoding}
Allows to disable decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables.
Allows to enable/disable decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables.
Disabled by default.
Enabled by default.
## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously}

View File

@ -32,6 +32,10 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10
└─────────────────────────────────────────┴────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
<!--- Unlike with system.events and system.metrics, the asynchronous metrics are not gathered in a simple list in a source code file - they
are mixed with logic in src/Interpreters/ServerAsynchronousMetrics.cpp.
Listing them here explicitly for reader convenience. --->
## Metric descriptions
@ -483,6 +487,14 @@ The value is similar to `OSUserTime` but divided to the number of CPU cores to b
Number of threads in the server of the PostgreSQL compatibility protocol.
### QueryCacheBytes
Total size of the query cache cache in bytes.
### QueryCacheEntries
Total number of entries in the query cache.
### ReplicasMaxAbsoluteDelay
Maximum difference in seconds between the most fresh replicated part and the most fresh data part still to be replicated, across Replicated tables. A very high value indicates a replica with no data.

View File

@ -10,6 +10,9 @@ Columns:
- `event` ([String](../../sql-reference/data-types/string.md)) — Event name.
- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of events occurred.
- `description` ([String](../../sql-reference/data-types/string.md)) — Event description.
- `name` ([String](../../sql-reference/data-types/string.md)) — Alias for `event`.
You can find all supported events in source file [src/Common/ProfileEvents.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/ProfileEvents.cpp).
**Example**

View File

@ -47,6 +47,10 @@ An example:
<engine>ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024</engine>
-->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
</query_log>
</clickhouse>
```

View File

@ -10,8 +10,9 @@ Columns:
- `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name.
- `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — Metric value.
- `description` ([String](../../sql-reference/data-types/string.md)) — Metric description.
- `name` ([String](../../sql-reference/data-types/string.md)) — Alias for `metric`.
The list of supported metrics you can find in the [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp) source file of ClickHouse.
You can find all supported metrics in source file [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp).
**Example**

View File

@ -48,7 +48,7 @@ Columns:
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number of rows read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_rows` includes the total number of rows read at all replicas. Each replica sends its `read_rows` value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value.
- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number of bytes read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_bytes` includes the total number of rows read at all replicas. Each replica sends its `read_bytes` value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value.
- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0.
- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0.
- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes (uncompressed). For other queries, the column value is 0.
- `result_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of rows in a result of the `SELECT` query, or a number of rows in the `INSERT` query.
- `result_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — RAM volume in bytes used to store a query result.
- `memory_usage` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Memory consumption by the query.

View File

@ -51,3 +51,7 @@ keeper foo bar
- `rmr <path>` -- Recursively deletes path. Confirmation required
- `flwc <command>` -- Executes four-letter-word command
- `help` -- Prints this message
- `get_stat [path]` -- Returns the node's stat (default `.`)
- `find_super_nodes <threshold> [path]` -- Finds nodes with number of children larger than some threshold for the given path (default `.`)
- `delete_stable_backups` -- Deletes ClickHouse nodes used for backups that are now inactive
- `find_big_family [path] [n]` -- Returns the top n nodes with the biggest family in the subtree (default path = `.` and n = 10)

View File

@ -140,8 +140,8 @@ Time shifts for multiple days. Some pacific islands changed their timezone offse
- [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md)
- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md)
- [Functions for working with arrays](../../sql-reference/functions/array-functions.md)
- [The `date_time_input_format` setting](../../operations/settings/settings.md#settings-date_time_input_format)
- [The `date_time_output_format` setting](../../operations/settings/settings.md#settings-date_time_output_format)
- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#settings-date_time_input_format)
- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#settings-date_time_output_format)
- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
- [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone)
- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime)

View File

@ -2552,3 +2552,187 @@ Result:
This function can be used together with [generateRandom](../../sql-reference/table-functions/generate.md) to generate completely random tables.
## structureToCapnProtoSchema {#structure_to_capn_proto_schema}
Converts ClickHouse table structure to CapnProto schema.
**Syntax**
``` sql
structureToCapnProtoSchema(structure)
```
**Arguments**
- `structure` — Table structure in a format `column1_name column1_type, column2_name column2_type, ...`.
- `root_struct_name` — Name for root struct in CapnProto schema. Default value - `Message`;
**Returned value**
- CapnProto schema
Type: [String](../../sql-reference/data-types/string.md).
**Examples**
Query:
``` sql
SELECT structureToCapnProtoSchema('column1 String, column2 UInt32, column3 Array(String)') FORMAT RawBLOB
```
Result:
``` text
@0xf96402dd754d0eb7;
struct Message
{
column1 @0 : Data;
column2 @1 : UInt32;
column3 @2 : List(Data);
}
```
Query:
``` sql
SELECT structureToCapnProtoSchema('column1 Nullable(String), column2 Tuple(element1 UInt32, element2 Array(String)), column3 Map(String, String)') FORMAT RawBLOB
```
Result:
``` text
@0xd1c8320fecad2b7f;
struct Message
{
struct Column1
{
union
{
value @0 : Data;
null @1 : Void;
}
}
column1 @0 : Column1;
struct Column2
{
element1 @0 : UInt32;
element2 @1 : List(Data);
}
column2 @1 : Column2;
struct Column3
{
struct Entry
{
key @0 : Data;
value @1 : Data;
}
entries @0 : List(Entry);
}
column3 @2 : Column3;
}
```
Query:
``` sql
SELECT structureToCapnProtoSchema('column1 String, column2 UInt32', 'Root') FORMAT RawBLOB
```
Result:
``` text
@0x96ab2d4ab133c6e1;
struct Root
{
column1 @0 : Data;
column2 @1 : UInt32;
}
```
## structureToProtobufSchema {#structure_to_protobuf_schema}
Converts ClickHouse table structure to Protobuf schema.
**Syntax**
``` sql
structureToProtobufSchema(structure)
```
**Arguments**
- `structure` — Table structure in a format `column1_name column1_type, column2_name column2_type, ...`.
- `root_message_name` — Name for root message in Protobuf schema. Default value - `Message`;
**Returned value**
- Protobuf schema
Type: [String](../../sql-reference/data-types/string.md).
**Examples**
Query:
``` sql
SELECT structureToProtobufSchema('column1 String, column2 UInt32, column3 Array(String)') FORMAT RawBLOB
```
Result:
``` text
syntax = "proto3";
message Message
{
bytes column1 = 1;
uint32 column2 = 2;
repeated bytes column3 = 3;
}
```
Query:
``` sql
SELECT structureToProtobufSchema('column1 Nullable(String), column2 Tuple(element1 UInt32, element2 Array(String)), column3 Map(String, String)') FORMAT RawBLOB
```
Result:
``` text
syntax = "proto3";
message Message
{
bytes column1 = 1;
message Column2
{
uint32 element1 = 1;
repeated bytes element2 = 2;
}
Column2 column2 = 2;
map<string, bytes> column3 = 3;
}
```
Query:
``` sql
SELECT structureToProtobufSchema('column1 String, column2 UInt32', 'Root') FORMAT RawBLOB
```
Result:
``` text
syntax = "proto3";
message Root
{
bytes column1 = 1;
uint32 column2 = 2;
}
```

View File

@ -56,7 +56,7 @@ Character `|` inside patterns is used to specify failover addresses. They are it
## Storage Settings {#storage-settings}
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
- [disable_url_encoding](/docs/en/operations/settings/settings.md#disable_url_encoding) - allows to disable decoding/encoding path in uri. Disabled by default.
- [enable_url_encoding](/docs/en/operations/settings/settings.md#enable_url_encoding) - allows to enable/disable decoding/encoding path in uri. Enabled by default.
**See Also**

View File

@ -87,7 +87,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
## Шифрование {#encryption}
Вы можете использовать симметричное шифрование для зашифровки элемента конфигурации, например, поля password. Чтобы это сделать, сначала настройте [кодек шифрования](../sql-reference/statements/create/table.md#encryption-codecs), затем добавьте аттибут`encryption_codec` с именем кодека шифрования как значение к элементу, который надо зашифровать.
Вы можете использовать симметричное шифрование для зашифровки элемента конфигурации, например, поля password. Чтобы это сделать, сначала настройте [кодек шифрования](../sql-reference/statements/create/table.md#encryption-codecs), затем добавьте аттибут`encrypted_by` с именем кодека шифрования как значение к элементу, который надо зашифровать.
В отличии от аттрибутов `from_zk`, `from_env` и `incl` (или элемента `include`), подстановка, т.е. расшифровка зашифрованного значения, не выподняется в файле предобработки. Расшифровка происходит только во время исполнения в серверном процессе.
@ -95,15 +95,18 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
```xml
<clickhouse>
<encryption_codecs>
<aes_128_gcm_siv>
<key_hex>00112233445566778899aabbccddeeff</key_hex>
</aes_128_gcm_siv>
</encryption_codecs>
<interserver_http_credentials>
<user>admin</user>
<password encryption_codec="AES_128_GCM_SIV">961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85</password>
<password encrypted_by="AES_128_GCM_SIV">961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85</password>
</interserver_http_credentials>
</clickhouse>
```

View File

@ -1058,6 +1058,10 @@ ClickHouse использует потоки из глобального пул
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
</metric_log>
</clickhouse>
```
@ -1155,12 +1159,19 @@ ClickHouse использует потоки из глобального пул
При настройке логирования используются следующие параметры:
- `database` — имя базы данных;
- `table` — имя таблицы;
- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
- `database` — имя базы данных;
- `table` — имя таблицы;
- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
- `max_size_rows` максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск.
Значение по умолчанию: 1048576.
- `reserved_size_rows` преаллоцированный размер в строках для буфера с логами.
Значение по умолчанию: 8192.
- `buffer_size_bytes_flush_threshold` количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме.
Значение по умолчанию: `max_size / 2`.
- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы.
Значение по умолчанию: false.
**Пример**
``` xml
@ -1169,6 +1180,10 @@ ClickHouse использует потоки из глобального пул
<table>part_log</table>
<partition_by>toMonday(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
</part_log>
```
@ -1218,11 +1233,19 @@ ClickHouse использует потоки из глобального пул
При настройке логирования используются следующие параметры:
- `database` — имя базы данных;
- `table` — имя таблицы, куда будет записываться лог;
- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
- `database` — имя базы данных;
- `table` — имя таблицы;
- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
- `max_size_rows` максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск.
Значение по умолчанию: 1048576.
- `reserved_size_rows` преаллоцированный размер в строках для буфера с логами.
Значение по умолчанию: 8192.
- `buffer_size_bytes_flush_threshold` количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме.
Значение по умолчанию: `max_size / 2`.
- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы.
Значение по умолчанию: false.
Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически.
@ -1234,6 +1257,10 @@ ClickHouse использует потоки из глобального пул
<table>query_log</table>
<engine>Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day</engine>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
</query_log>
```
@ -1245,11 +1272,19 @@ ClickHouse использует потоки из глобального пул
При настройке логирования используются следующие параметры:
- `database` — имя базы данных;
- `table` — имя таблицы, куда будет записываться лог;
- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
- `database` — имя базы данных;
- `table` — имя таблицы;
- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
- `max_size_rows` максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск.
Значение по умолчанию: 1048576.
- `reserved_size_rows` преаллоцированный размер в строках для буфера с логами.
Значение по умолчанию: 8192.
- `buffer_size_bytes_flush_threshold` количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме.
Значение по умолчанию: `max_size / 2`.
- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы.
Значение по умолчанию: false.
Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически.
@ -1261,6 +1296,10 @@ ClickHouse использует потоки из глобального пул
<table>query_thread_log</table>
<partition_by>toMonday(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
</query_thread_log>
```
@ -1272,11 +1311,19 @@ ClickHouse использует потоки из глобального пул
При настройке логирования используются следующие параметры:
- `database` имя базы данных.
- `table` имя системной таблицы, где будут логироваться запросы.
- `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать, если задан параметр `engine`.
- `engine` — устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать, если задан параметр `partition_by`.
- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
- `database` — имя базы данных;
- `table` — имя таблицы;
- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
- `max_size_rows` максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск.
Значение по умолчанию: 1048576.
- `reserved_size_rows` преаллоцированный размер в строках для буфера с логами.
Значение по умолчанию: 8192.
- `buffer_size_bytes_flush_threshold` количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме.
Значение по умолчанию: `max_size / 2`.
- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы.
Значение по умолчанию: false.
Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически.
@ -1288,6 +1335,10 @@ ClickHouse использует потоки из глобального пул
<table>query_views_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
</query_views_log>
```
@ -1297,12 +1348,20 @@ ClickHouse использует потоки из глобального пул
Параметры:
- `level` — Максимальный уровень сообщения (по умолчанию `Trace`) которое будет сохранено в таблице.
- `database` — имя базы данных для хранения таблицы.
- `table` — имя таблицы, куда будут записываться текстовые сообщения.
- `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать если используется `engine`
- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
- `level` — Максимальный уровень сообщения (по умолчанию `Trace`) которое будет сохранено в таблице.
- `database` — имя базы данных;
- `table` — имя таблицы;
- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
- `max_size_rows` максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск.
Значение по умолчанию: 1048576.
- `reserved_size_rows` преаллоцированный размер в строках для буфера с логами.
Значение по умолчанию: 8192.
- `buffer_size_bytes_flush_threshold` количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме.
Значение по умолчанию: `max_size / 2`.
- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы.
Значение по умолчанию: false.
**Пример**
```xml
@ -1312,6 +1371,10 @@ ClickHouse использует потоки из глобального пул
<database>system</database>
<table>text_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
<!-- <partition_by>event_date</partition_by> -->
<engine>Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day</engine>
</text_log>
@ -1323,13 +1386,21 @@ ClickHouse использует потоки из глобального пул
Настройки для [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation.
Parameters:
Параметры:
- `database` — Database for storing a table.
- `table` — Table name.
- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table.
- `database` — имя базы данных;
- `table` — имя таблицы;
- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
- `max_size_rows` максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск.
Значение по умолчанию: 1048576.
- `reserved_size_rows` преаллоцированный размер в строках для буфера с логами.
Значение по умолчанию: 8192.
- `buffer_size_bytes_flush_threshold` количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме.
Значение по умолчанию: `max_size / 2`.
- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы.
Значение по умолчанию: false.
По умолчанию файл настроек сервера `config.xml` содержит следующие настройки:
@ -1339,9 +1410,84 @@ Parameters:
<table>trace_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
</trace_log>
```
## asynchronous_insert_log {#server_configuration_parameters-asynchronous_insert_log}
Настройки для asynchronous_insert_log Система для логирования ассинхронных вставок.
Параметры:
- `database` — имя базы данных;
- `table` — имя таблицы;
- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
- `max_size_rows` максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск.
Значение по умолчанию: 1048576.
- `reserved_size_rows` преаллоцированный размер в строках для буфера с логами.
Значение по умолчанию: 8192.
- `buffer_size_bytes_flush_threshold` количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме.
Значение по умолчанию: `max_size / 2`.
- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы.
Значение по умолчанию: false.
**Пример**
```xml
<clickhouse>
<asynchronous_insert_log>
<database>system</database>
<table>asynchronous_insert_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<partition_by>toYYYYMM(event_date)</partition_by>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<!-- <engine>Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day</engine> -->
</asynchronous_insert_log>
</clickhouse>
```
## crash_log {#server_configuration_parameters-crash_log}
Настройки для таблицы [crash_log](../../operations/system-tables/crash-log.md).
Параметры:
- `database` — имя базы данных;
- `table` — имя таблицы;
- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
- `max_size_rows` максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск.
Значение по умолчанию: 1024.
- `reserved_size_rows` преаллоцированный размер в строках для буфера с логами.
Значение по умолчанию: 1024.
- `buffer_size_bytes_flush_threshold` количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме.
Значение по умолчанию: `max_size / 2`.
- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы.
Значение по умолчанию: true.
**Пример**
``` xml
<crash_log>
<database>system</database>
<table>crash_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1024</max_size_rows>
<reserved_size_rows>1024</reserved_size_rows>
<buffer_size_rows_flush_threshold>512</buffer_size_rows_flush_threshold>
<flush_on_crash>true</flush_on_crash>
</crash_log>
```
## query_masking_rules {#query-masking-rules}
Правила, основанные на регулярных выражениях, которые будут применены для всех запросов, а также для всех сообщений перед сохранением их в лог на сервере,

View File

@ -314,3 +314,40 @@ FORMAT Null;
При вставке данных, ClickHouse вычисляет количество партиций во вставленном блоке. Если число партиций больше, чем `max_partitions_per_insert_block`, ClickHouse генерирует исключение со следующим текстом:
> «Too many partitions for single INSERT block (more than» + toString(max_parts) + «). The limit is controlled by max_partitions_per_insert_block setting. Large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).»
## max_sessions_for_user {#max-sessions-per-user}
Максимальное количество одновременных сессий на одного аутентифицированного пользователя.
Пример:
``` xml
<profiles>
<single_session_profile>
<max_sessions_for_user>1</max_sessions_for_user>
</single_session_profile>
<two_sessions_profile>
<max_sessions_for_user>2</max_sessions_for_user>
</two_sessions_profile>
<unlimited_sessions_profile>
<max_sessions_for_user>0</max_sessions_for_user>
</unlimited_sessions_profile>
</profiles>
<users>
<!-- Пользователь Alice может одновременно подключаться не
более одного раза к серверу ClickHouse. -->
<Alice>
<profile>single_session_profile</profile>
</Alice>
<!-- Пользователь Bob может использовать 2 одновременных сессии. -->
<Bob>
<profile>two_sessions_profile</profile>
</Bob>
<!-- Пользователь Charles может иметь любое количество одновременных сессий. -->
<Charles>
<profile>unlimited_sessions_profile</profile>
</Charles>
</users>
```
Значение по умолчанию: 0 (неограниченное количество сессий).

View File

@ -39,7 +39,7 @@ SET profile = 'web'
<max_threads>8</max_threads>
</default>
<!-- Settings for quries from the user interface -->
<!-- Settings for queries from the user interface -->
<web>
<max_rows_to_read>1000000000</max_rows_to_read>
<max_bytes_to_read>100000000000</max_bytes_to_read>
@ -67,6 +67,7 @@ SET profile = 'web'
<max_ast_depth>50</max_ast_depth>
<max_ast_elements>100</max_ast_elements>
<max_sessions_for_user>4</max_sessions_for_user>
<readonly>1</readonly>
</web>
</profiles>

View File

@ -45,6 +45,10 @@ sidebar_label: "Системные таблицы"
<engine>ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024</engine>
-->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
</query_log>
</clickhouse>
```

View File

@ -1,5 +1,6 @@
#include "Commands.h"
#include <queue>
#include "KeeperClient.h"
@ -24,8 +25,18 @@ void LSCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con
else
path = client->cwd;
for (const auto & child : client->zookeeper->getChildren(path))
std::cout << child << " ";
auto children = client->zookeeper->getChildren(path);
std::sort(children.begin(), children.end());
bool need_space = false;
for (const auto & child : children)
{
if (std::exchange(need_space, true))
std::cout << " ";
std::cout << child;
}
std::cout << "\n";
}
@ -130,6 +141,173 @@ void GetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co
std::cout << client->zookeeper->get(client->getAbsolutePath(query->args[0].safeGet<String>())) << "\n";
}
bool GetStatCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
return true;
node->args.push_back(std::move(arg));
return true;
}
void GetStatCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
Coordination::Stat stat;
String path;
if (!query->args.empty())
path = client->getAbsolutePath(query->args[0].safeGet<String>());
else
path = client->cwd;
client->zookeeper->get(path, &stat);
std::cout << "cZxid = " << stat.czxid << "\n";
std::cout << "mZxid = " << stat.mzxid << "\n";
std::cout << "pZxid = " << stat.pzxid << "\n";
std::cout << "ctime = " << stat.ctime << "\n";
std::cout << "mtime = " << stat.mtime << "\n";
std::cout << "version = " << stat.version << "\n";
std::cout << "cversion = " << stat.cversion << "\n";
std::cout << "aversion = " << stat.aversion << "\n";
std::cout << "ephemeralOwner = " << stat.ephemeralOwner << "\n";
std::cout << "dataLength = " << stat.dataLength << "\n";
std::cout << "numChildren = " << stat.numChildren << "\n";
}
bool FindSuperNodes::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
ASTPtr threshold;
if (!ParserUnsignedInteger{}.parse(pos, threshold, expected))
return false;
node->args.push_back(threshold->as<ASTLiteral &>().value);
String path;
if (!parseKeeperPath(pos, expected, path))
path = ".";
node->args.push_back(std::move(path));
return true;
}
void FindSuperNodes::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
auto threshold = query->args[0].safeGet<UInt64>();
auto path = client->getAbsolutePath(query->args[1].safeGet<String>());
Coordination::Stat stat;
client->zookeeper->get(path, &stat);
if (stat.numChildren >= static_cast<Int32>(threshold))
{
std::cout << static_cast<String>(path) << "\t" << stat.numChildren << "\n";
return;
}
auto children = client->zookeeper->getChildren(path);
std::sort(children.begin(), children.end());
for (const auto & child : children)
{
auto next_query = *query;
next_query.args[1] = DB::Field(path / child);
execute(&next_query, client);
}
}
bool DeleteStableBackups::parse(IParser::Pos & /* pos */, std::shared_ptr<ASTKeeperQuery> & /* node */, Expected & /* expected */) const
{
return true;
}
void DeleteStableBackups::execute(const ASTKeeperQuery * /* query */, KeeperClient * client) const
{
client->askConfirmation(
"You are going to delete all inactive backups in /clickhouse/backups.",
[client]
{
fs::path backup_root = "/clickhouse/backups";
auto backups = client->zookeeper->getChildren(backup_root);
std::sort(backups.begin(), backups.end());
for (const auto & child : backups)
{
auto backup_path = backup_root / child;
std::cout << "Found backup " << backup_path << ", checking if it's active\n";
String stage_path = backup_path / "stage";
auto stages = client->zookeeper->getChildren(stage_path);
bool is_active = false;
for (const auto & stage : stages)
{
if (startsWith(stage, "alive"))
{
is_active = true;
break;
}
}
if (is_active)
{
std::cout << "Backup " << backup_path << " is active, not going to delete\n";
continue;
}
std::cout << "Backup " << backup_path << " is not active, deleting it\n";
client->zookeeper->removeRecursive(backup_path);
}
});
}
bool FindBigFamily::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String path;
if (!parseKeeperPath(pos, expected, path))
path = ".";
node->args.push_back(std::move(path));
ASTPtr count;
if (ParserUnsignedInteger{}.parse(pos, count, expected))
node->args.push_back(count->as<ASTLiteral &>().value);
else
node->args.push_back(UInt64(10));
return true;
}
void FindBigFamily::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
auto path = client->getAbsolutePath(query->args[0].safeGet<String>());
auto n = query->args[1].safeGet<UInt64>();
std::vector<std::tuple<Int32, String>> result;
std::queue<fs::path> queue;
queue.push(path);
while (!queue.empty())
{
auto next_path = queue.front();
queue.pop();
auto children = client->zookeeper->getChildren(next_path);
std::transform(children.cbegin(), children.cend(), children.begin(), [&](const String & child) { return next_path / child; });
auto response = client->zookeeper->get(children);
for (size_t i = 0; i < response.size(); ++i)
{
result.emplace_back(response[i].stat.numChildren, children[i]);
queue.push(children[i]);
}
}
std::sort(result.begin(), result.end(), std::greater());
for (UInt64 i = 0; i < std::min(result.size(), static_cast<size_t>(n)); ++i)
std::cout << std::get<1>(result[i]) << "\t" << std::get<0>(result[i]) << "\n";
}
bool RMCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
@ -170,7 +348,7 @@ bool HelpCommand::parse(IParser::Pos & /* pos */, std::shared_ptr<ASTKeeperQuery
void HelpCommand::execute(const ASTKeeperQuery * /* query */, KeeperClient * /* client */) const
{
for (const auto & pair : KeeperClient::commands)
std::cout << pair.second->getHelpMessage() << "\n";
std::cout << pair.second->generateHelpString() << "\n";
}
bool FourLetterWordCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const

View File

@ -21,6 +21,12 @@ public:
virtual String getName() const = 0;
virtual ~IKeeperClientCommand() = default;
String generateHelpString() const
{
return fmt::vformat(getHelpMessage(), fmt::make_format_args(getName()));
}
};
using Command = std::shared_ptr<IKeeperClientCommand>;
@ -34,7 +40,7 @@ class LSCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "ls [path] -- Lists the nodes for the given path (default: cwd)"; }
String getHelpMessage() const override { return "{} [path] -- Lists the nodes for the given path (default: cwd)"; }
};
class CDCommand : public IKeeperClientCommand
@ -45,7 +51,7 @@ class CDCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "cd [path] -- Change the working path (default `.`)"; }
String getHelpMessage() const override { return "{} [path] -- Change the working path (default `.`)"; }
};
class SetCommand : public IKeeperClientCommand
@ -58,7 +64,7 @@ class SetCommand : public IKeeperClientCommand
String getHelpMessage() const override
{
return "set <path> <value> [version] -- Updates the node's value. Only update if version matches (default: -1)";
return "{} <path> <value> [version] -- Updates the node's value. Only update if version matches (default: -1)";
}
};
@ -70,7 +76,7 @@ class CreateCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "create <path> <value> -- Creates new node"; }
String getHelpMessage() const override { return "{} <path> <value> -- Creates new node"; }
};
class GetCommand : public IKeeperClientCommand
@ -81,9 +87,63 @@ class GetCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "get <path> -- Returns the node's value"; }
String getHelpMessage() const override { return "{} <path> -- Returns the node's value"; }
};
class GetStatCommand : public IKeeperClientCommand
{
String getName() const override { return "get_stat"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "{} [path] -- Returns the node's stat (default `.`)"; }
};
class FindSuperNodes : public IKeeperClientCommand
{
String getName() const override { return "find_super_nodes"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override
{
return "{} <threshold> [path] -- Finds nodes with number of children larger than some threshold for the given path (default `.`)";
}
};
class DeleteStableBackups : public IKeeperClientCommand
{
String getName() const override { return "delete_stable_backups"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override
{
return "{} -- Deletes ClickHouse nodes used for backups that are now inactive";
}
};
class FindBigFamily : public IKeeperClientCommand
{
String getName() const override { return "find_big_family"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override
{
return "{} [path] [n] -- Returns the top n nodes with the biggest family in the subtree (default path = `.` and n = 10)";
}
};
class RMCommand : public IKeeperClientCommand
{
String getName() const override { return "rm"; }
@ -92,7 +152,7 @@ class RMCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "remove <path> -- Remove the node"; }
String getHelpMessage() const override { return "{} <path> -- Remove the node"; }
};
class RMRCommand : public IKeeperClientCommand
@ -103,7 +163,7 @@ class RMRCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "rmr <path> -- Recursively deletes path. Confirmation required"; }
String getHelpMessage() const override { return "{} <path> -- Recursively deletes path. Confirmation required"; }
};
class HelpCommand : public IKeeperClientCommand
@ -114,7 +174,7 @@ class HelpCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "help -- Prints this message"; }
String getHelpMessage() const override { return "{} -- Prints this message"; }
};
class FourLetterWordCommand : public IKeeperClientCommand
@ -125,7 +185,7 @@ class FourLetterWordCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "flwc <command> -- Executes four-letter-word command"; }
String getHelpMessage() const override { return "{} <command> -- Executes four-letter-word command"; }
};
}

View File

@ -177,6 +177,10 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */)
std::make_shared<SetCommand>(),
std::make_shared<CreateCommand>(),
std::make_shared<GetCommand>(),
std::make_shared<GetStatCommand>(),
std::make_shared<FindSuperNodes>(),
std::make_shared<DeleteStableBackups>(),
std::make_shared<FindBigFamily>(),
std::make_shared<RMCommand>(),
std::make_shared<RMRCommand>(),
std::make_shared<HelpCommand>(),

View File

@ -58,6 +58,7 @@ bool KeeperParser::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
return false;
String command_name(pos->begin, pos->end);
std::transform(command_name.begin(), command_name.end(), command_name.begin(), [](unsigned char c) { return std::tolower(c); });
Command command;
auto iter = KeeperClient::commands.find(command_name);

View File

@ -13,10 +13,6 @@ set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES
library-bridge.cpp
)
if (OS_LINUX)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic")
endif ()
clickhouse_add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES})
target_link_libraries(clickhouse-library-bridge PRIVATE

View File

@ -266,6 +266,10 @@ void LocalServer::tryInitPath()
global_context->setUserFilesPath(""); // user's files are everywhere
std::string user_scripts_path = config().getString("user_scripts_path", fs::path(path) / "user_scripts/");
global_context->setUserScriptsPath(user_scripts_path);
fs::create_directories(user_scripts_path);
/// top_level_domains_lists
const std::string & top_level_domains_path = config().getString("top_level_domains_path", path + "top_level_domains/");
if (!top_level_domains_path.empty())
@ -490,6 +494,17 @@ try
applyCmdSettings(global_context);
/// try to load user defined executable functions, throw on error and die
try
{
global_context->loadOrReloadUserDefinedExecutableFunctions(config());
}
catch (...)
{
tryLogCurrentException(&logger(), "Caught exception while loading user defined executable functions.");
throw;
}
if (is_interactive)
{
clearTerminal();
@ -569,7 +584,9 @@ void LocalServer::processConfig()
}
print_stack_trace = config().getBool("stacktrace", false);
load_suggestions = (is_interactive || delayed_interactive) && !config().getBool("disable_suggestion", false);
const std::string clickhouse_dialect{"clickhouse"};
load_suggestions = (is_interactive || delayed_interactive) && !config().getBool("disable_suggestion", false)
&& config().getString("dialect", clickhouse_dialect) == clickhouse_dialect;
auto logging = (config().has("logger.console")
|| config().has("logger.level")

View File

@ -15,12 +15,6 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES
validateODBCConnectionString.cpp
)
if (OS_LINUX)
# clickhouse-odbc-bridge is always a separate binary.
# Reason: it must not export symbols from SSL, mariadb-client, etc. to not break ABI compatibility with ODBC drivers.
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic")
endif ()
clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES})
target_link_libraries(clickhouse-odbc-bridge PRIVATE

View File

@ -1035,6 +1035,11 @@ try
/// Initialize merge tree metadata cache
if (config().has("merge_tree_metadata_cache"))
{
global_context->addWarningMessage("The setting 'merge_tree_metadata_cache' is enabled."
" But the feature of 'metadata cache in RocksDB' is experimental and is not ready for production."
" The usage of this feature can lead to data corruption and loss. The setting should be disabled in production."
" See the corresponding report at https://github.com/ClickHouse/ClickHouse/issues/51182");
fs::create_directories(path / "rocksdb/");
size_t size = config().getUInt64("merge_tree_metadata_cache.lru_cache_size", 256 << 20);
bool continue_if_corrupted = config().getBool("merge_tree_metadata_cache.continue_if_corrupted", false);
@ -1686,17 +1691,26 @@ try
global_context->initializeTraceCollector();
/// Set up server-wide memory profiler (for total memory tracker).
UInt64 total_memory_profiler_step = config().getUInt64("total_memory_profiler_step", 0);
if (total_memory_profiler_step)
if (server_settings.total_memory_profiler_step)
{
total_memory_tracker.setProfilerStep(total_memory_profiler_step);
total_memory_tracker.setProfilerStep(server_settings.total_memory_profiler_step);
}
double total_memory_tracker_sample_probability = config().getDouble("total_memory_tracker_sample_probability", 0);
if (total_memory_tracker_sample_probability > 0.0)
if (server_settings.total_memory_tracker_sample_probability > 0.0)
{
total_memory_tracker.setSampleProbability(total_memory_tracker_sample_probability);
total_memory_tracker.setSampleProbability(server_settings.total_memory_tracker_sample_probability);
}
if (server_settings.total_memory_profiler_sample_min_allocation_size)
{
total_memory_tracker.setSampleMinAllocationSize(server_settings.total_memory_profiler_sample_min_allocation_size);
}
if (server_settings.total_memory_profiler_sample_max_allocation_size)
{
total_memory_tracker.setSampleMaxAllocationSize(server_settings.total_memory_profiler_sample_max_allocation_size);
}
}
#endif
@ -2031,27 +2045,26 @@ void Server::createServers(
for (const auto & protocol : protocols)
{
if (!server_type.shouldStart(ServerType::Type::CUSTOM, protocol))
std::string prefix = "protocols." + protocol + ".";
std::string port_name = prefix + "port";
std::string description {"<undefined> protocol"};
if (config.has(prefix + "description"))
description = config.getString(prefix + "description");
if (!config.has(prefix + "port"))
continue;
if (!server_type.shouldStart(ServerType::Type::CUSTOM, port_name))
continue;
std::vector<std::string> hosts;
if (config.has("protocols." + protocol + ".host"))
hosts.push_back(config.getString("protocols." + protocol + ".host"));
if (config.has(prefix + "host"))
hosts.push_back(config.getString(prefix + "host"));
else
hosts = listen_hosts;
for (const auto & host : hosts)
{
std::string conf_name = "protocols." + protocol;
std::string prefix = conf_name + ".";
if (!config.has(prefix + "port"))
continue;
std::string description {"<undefined> protocol"};
if (config.has(prefix + "description"))
description = config.getString(prefix + "description");
std::string port_name = prefix + "port";
bool is_secure = false;
auto stack = buildProtocolStackFromConfig(config, protocol, http_params, async_metrics, is_secure);

View File

@ -1026,6 +1026,14 @@
<!-- Interval of flushing data. -->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<!-- Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. -->
<max_size_rows>1048576</max_size_rows>
<!-- Pre-allocated size in lines for the logs. -->
<reserved_size_rows>8192</reserved_size_rows>
<!-- Lines amount threshold, reaching it launches flushing logs to the disk in background. -->
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<!-- Indication whether logs should be dumped to the disk in case of a crash -->
<flush_on_crash>false</flush_on_crash>
<!-- example of using a different storage policy for a system table -->
<!-- storage_policy>local_ssd</storage_policy -->
@ -1039,6 +1047,11 @@
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<!-- Indication whether logs should be dumped to the disk in case of a crash -->
<flush_on_crash>false</flush_on_crash>
</trace_log>
<!-- Query thread log. Has information about all threads participated in query execution.
@ -1048,6 +1061,10 @@
<table>query_thread_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
</query_thread_log>
<!-- Query views log. Has information about all dependent views associated with a query.
@ -1066,6 +1083,10 @@
<table>part_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
</part_log>
<!-- Uncomment to write text log into table.
@ -1075,6 +1096,10 @@
<database>system</database>
<table>text_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
<level></level>
</text_log>
-->
@ -1084,7 +1109,11 @@
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
<flush_on_crash>false</flush_on_crash>
</metric_log>
<!--
@ -1095,6 +1124,10 @@
<database>system</database>
<table>asynchronous_metric_log</table>
<flush_interval_milliseconds>7000</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
</asynchronous_metric_log>
<!--
@ -1119,6 +1152,10 @@
<database>system</database>
<table>opentelemetry_span_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
</opentelemetry_span_log>
@ -1130,6 +1167,10 @@
<partition_by />
<flush_interval_milliseconds>1000</flush_interval_milliseconds>
<max_size_rows>1024</max_size_rows>
<reserved_size_rows>1024</reserved_size_rows>
<buffer_size_rows_flush_threshold>512</buffer_size_rows_flush_threshold>
<flush_on_crash>true</flush_on_crash>
</crash_log>
<!-- Session log. Stores user log in (successful or not) and log out events.
@ -1142,6 +1183,10 @@
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
</session_log> -->
<!-- Profiling on Processors level. -->
@ -1151,6 +1196,10 @@
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
</processors_profile_log>
<!-- Log of asynchronous inserts. It allows to check status
@ -1161,6 +1210,10 @@
<table>asynchronous_insert_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
<partition_by>event_date</partition_by>
<ttl>event_date + INTERVAL 3 DAY</ttl>
</asynchronous_insert_log>
@ -1418,12 +1471,6 @@
<max_entry_size_in_rows>30000000</max_entry_size_in_rows>
</query_cache>
<!-- Uncomment if enable merge tree metadata cache -->
<!--merge_tree_metadata_cache>
<lru_cache_size>268435456</lru_cache_size>
<continue_if_corrupted>true</continue_if_corrupted>
</merge_tree_metadata_cache-->
<!-- This allows to disable exposing addresses in stack traces for security reasons.
Please be aware that it does not improve security much, but makes debugging much harder.
The addresses that are small offsets from zero will be displayed nevertheless to show nullptr dereferences.

View File

@ -328,9 +328,6 @@ void ContextAccess::setRolesInfo(const std::shared_ptr<const EnabledRolesInfo> &
enabled_row_policies = access_control->getEnabledRowPolicies(*params.user_id, roles_info->enabled_roles);
enabled_quota = access_control->getEnabledQuota(
*params.user_id, user_name, roles_info->enabled_roles, params.address, params.forwarded_address, params.quota_key);
enabled_settings = access_control->getEnabledSettings(
*params.user_id, user->settings, roles_info->enabled_roles, roles_info->settings_from_enabled_roles);
@ -416,19 +413,32 @@ RowPolicyFilterPtr ContextAccess::getRowPolicyFilter(const String & database, co
std::shared_ptr<const EnabledQuota> ContextAccess::getQuota() const
{
std::lock_guard lock{mutex};
if (enabled_quota)
return enabled_quota;
static const auto unlimited_quota = EnabledQuota::getUnlimitedQuota();
return unlimited_quota;
if (!enabled_quota)
{
if (roles_info)
{
enabled_quota = access_control->getEnabledQuota(*params.user_id,
user_name,
roles_info->enabled_roles,
params.address,
params.forwarded_address,
params.quota_key);
}
else
{
static const auto unlimited_quota = EnabledQuota::getUnlimitedQuota();
return unlimited_quota;
}
}
return enabled_quota;
}
std::optional<QuotaUsage> ContextAccess::getQuotaUsage() const
{
std::lock_guard lock{mutex};
if (enabled_quota)
return enabled_quota->getUsage();
return {};
return getQuota()->getUsage();
}

View File

@ -1,4 +1,5 @@
#include <string_view>
#include <unordered_map>
#include <Access/SettingsConstraints.h>
#include <Access/resolveSetting.h>
#include <Access/AccessControl.h>
@ -6,6 +7,7 @@
#include <Storages/MergeTree/MergeTreeSettings.h>
#include <Common/FieldVisitorToString.h>
#include <Common/FieldVisitorsAccurateComparison.h>
#include <Common/SettingSource.h>
#include <IO/WriteHelpers.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <boost/range/algorithm_ext/erase.hpp>
@ -20,6 +22,39 @@ namespace ErrorCodes
extern const int UNKNOWN_SETTING;
}
namespace
{
struct SettingSourceRestrictions
{
constexpr SettingSourceRestrictions() { allowed_sources.set(); }
constexpr SettingSourceRestrictions(std::initializer_list<SettingSource> allowed_sources_)
{
for (auto allowed_source : allowed_sources_)
setSourceAllowed(allowed_source, true);
}
constexpr bool isSourceAllowed(SettingSource source) { return allowed_sources[source]; }
constexpr void setSourceAllowed(SettingSource source, bool allowed) { allowed_sources[source] = allowed; }
std::bitset<SettingSource::COUNT> allowed_sources;
};
const std::unordered_map<std::string_view, SettingSourceRestrictions> SETTINGS_SOURCE_RESTRICTIONS = {
{"max_sessions_for_user", {SettingSource::PROFILE}},
};
SettingSourceRestrictions getSettingSourceRestrictions(std::string_view name)
{
auto settingConstraintIter = SETTINGS_SOURCE_RESTRICTIONS.find(name);
if (settingConstraintIter != SETTINGS_SOURCE_RESTRICTIONS.end())
return settingConstraintIter->second;
else
return SettingSourceRestrictions(); // allows everything
}
}
SettingsConstraints::SettingsConstraints(const AccessControl & access_control_) : access_control(&access_control_)
{
}
@ -98,7 +133,7 @@ void SettingsConstraints::merge(const SettingsConstraints & other)
}
void SettingsConstraints::check(const Settings & current_settings, const SettingsProfileElements & profile_elements) const
void SettingsConstraints::check(const Settings & current_settings, const SettingsProfileElements & profile_elements, SettingSource source) const
{
for (const auto & element : profile_elements)
{
@ -108,19 +143,19 @@ void SettingsConstraints::check(const Settings & current_settings, const Setting
if (element.value)
{
SettingChange value(element.setting_name, *element.value);
check(current_settings, value);
check(current_settings, value, source);
}
if (element.min_value)
{
SettingChange value(element.setting_name, *element.min_value);
check(current_settings, value);
check(current_settings, value, source);
}
if (element.max_value)
{
SettingChange value(element.setting_name, *element.max_value);
check(current_settings, value);
check(current_settings, value, source);
}
SettingConstraintWritability new_value = SettingConstraintWritability::WRITABLE;
@ -142,24 +177,24 @@ void SettingsConstraints::check(const Settings & current_settings, const Setting
}
}
void SettingsConstraints::check(const Settings & current_settings, const SettingChange & change) const
void SettingsConstraints::check(const Settings & current_settings, const SettingChange & change, SettingSource source) const
{
checkImpl(current_settings, const_cast<SettingChange &>(change), THROW_ON_VIOLATION);
checkImpl(current_settings, const_cast<SettingChange &>(change), THROW_ON_VIOLATION, source);
}
void SettingsConstraints::check(const Settings & current_settings, const SettingsChanges & changes) const
void SettingsConstraints::check(const Settings & current_settings, const SettingsChanges & changes, SettingSource source) const
{
for (const auto & change : changes)
check(current_settings, change);
check(current_settings, change, source);
}
void SettingsConstraints::check(const Settings & current_settings, SettingsChanges & changes) const
void SettingsConstraints::check(const Settings & current_settings, SettingsChanges & changes, SettingSource source) const
{
boost::range::remove_erase_if(
changes,
[&](SettingChange & change) -> bool
{
return !checkImpl(current_settings, const_cast<SettingChange &>(change), THROW_ON_VIOLATION);
return !checkImpl(current_settings, const_cast<SettingChange &>(change), THROW_ON_VIOLATION, source);
});
}
@ -174,13 +209,13 @@ void SettingsConstraints::check(const MergeTreeSettings & current_settings, cons
check(current_settings, change);
}
void SettingsConstraints::clamp(const Settings & current_settings, SettingsChanges & changes) const
void SettingsConstraints::clamp(const Settings & current_settings, SettingsChanges & changes, SettingSource source) const
{
boost::range::remove_erase_if(
changes,
[&](SettingChange & change) -> bool
{
return !checkImpl(current_settings, change, CLAMP_ON_VIOLATION);
return !checkImpl(current_settings, change, CLAMP_ON_VIOLATION, source);
});
}
@ -215,7 +250,10 @@ bool getNewValueToCheck(const T & current_settings, SettingChange & change, Fiel
return true;
}
bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingChange & change, ReactionOnViolation reaction) const
bool SettingsConstraints::checkImpl(const Settings & current_settings,
SettingChange & change,
ReactionOnViolation reaction,
SettingSource source) const
{
std::string_view setting_name = Settings::Traits::resolveName(change.name);
@ -247,7 +285,7 @@ bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingCh
if (!getNewValueToCheck(current_settings, change, new_value, reaction == THROW_ON_VIOLATION))
return false;
return getChecker(current_settings, setting_name).check(change, new_value, reaction);
return getChecker(current_settings, setting_name).check(change, new_value, reaction, source);
}
bool SettingsConstraints::checkImpl(const MergeTreeSettings & current_settings, SettingChange & change, ReactionOnViolation reaction) const
@ -255,10 +293,13 @@ bool SettingsConstraints::checkImpl(const MergeTreeSettings & current_settings,
Field new_value;
if (!getNewValueToCheck(current_settings, change, new_value, reaction == THROW_ON_VIOLATION))
return false;
return getMergeTreeChecker(change.name).check(change, new_value, reaction);
return getMergeTreeChecker(change.name).check(change, new_value, reaction, SettingSource::QUERY);
}
bool SettingsConstraints::Checker::check(SettingChange & change, const Field & new_value, ReactionOnViolation reaction) const
bool SettingsConstraints::Checker::check(SettingChange & change,
const Field & new_value,
ReactionOnViolation reaction,
SettingSource source) const
{
if (!explain.empty())
{
@ -326,6 +367,14 @@ bool SettingsConstraints::Checker::check(SettingChange & change, const Field & n
change.value = max_value;
}
if (!getSettingSourceRestrictions(setting_name).isSourceAllowed(source))
{
if (reaction == THROW_ON_VIOLATION)
throw Exception(ErrorCodes::READONLY, "Setting {} is not allowed to be set by {}", setting_name, toString(source));
else
return false;
}
return true;
}

View File

@ -2,6 +2,7 @@
#include <Access/SettingsProfileElement.h>
#include <Common/SettingsChanges.h>
#include <Common/SettingSource.h>
#include <unordered_map>
namespace Poco::Util
@ -73,17 +74,18 @@ public:
void merge(const SettingsConstraints & other);
/// Checks whether `change` violates these constraints and throws an exception if so.
void check(const Settings & current_settings, const SettingsProfileElements & profile_elements) const;
void check(const Settings & current_settings, const SettingChange & change) const;
void check(const Settings & current_settings, const SettingsChanges & changes) const;
void check(const Settings & current_settings, SettingsChanges & changes) const;
void check(const Settings & current_settings, const SettingsProfileElements & profile_elements, SettingSource source) const;
void check(const Settings & current_settings, const SettingChange & change, SettingSource source) const;
void check(const Settings & current_settings, const SettingsChanges & changes, SettingSource source) const;
void check(const Settings & current_settings, SettingsChanges & changes, SettingSource source) const;
/// Checks whether `change` violates these constraints and throws an exception if so. (setting short name is expected inside `changes`)
void check(const MergeTreeSettings & current_settings, const SettingChange & change) const;
void check(const MergeTreeSettings & current_settings, const SettingsChanges & changes) const;
/// Checks whether `change` violates these and clamps the `change` if so.
void clamp(const Settings & current_settings, SettingsChanges & changes) const;
void clamp(const Settings & current_settings, SettingsChanges & changes, SettingSource source) const;
friend bool operator ==(const SettingsConstraints & left, const SettingsConstraints & right);
friend bool operator !=(const SettingsConstraints & left, const SettingsConstraints & right) { return !(left == right); }
@ -133,7 +135,10 @@ private:
{}
// Perform checking
bool check(SettingChange & change, const Field & new_value, ReactionOnViolation reaction) const;
bool check(SettingChange & change,
const Field & new_value,
ReactionOnViolation reaction,
SettingSource source) const;
};
struct StringHash
@ -145,7 +150,11 @@ private:
}
};
bool checkImpl(const Settings & current_settings, SettingChange & change, ReactionOnViolation reaction) const;
bool checkImpl(const Settings & current_settings,
SettingChange & change,
ReactionOnViolation reaction,
SettingSource source) const;
bool checkImpl(const MergeTreeSettings & current_settings, SettingChange & change, ReactionOnViolation reaction) const;
Checker getChecker(const Settings & current_settings, std::string_view setting_name) const;

View File

@ -7,6 +7,7 @@
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/QueryNode.h>
#include <Analyzer/TableFunctionNode.h>
#include <Analyzer/UnionNode.h>
#include <Interpreters/Context.h>
@ -90,26 +91,25 @@ private:
template <typename Derived>
using ConstInDepthQueryTreeVisitor = InDepthQueryTreeVisitor<Derived, true /*const_visitor*/>;
/** Same as InDepthQueryTreeVisitor and additionally keeps track of current scope context.
/** Same as InDepthQueryTreeVisitor (but has a different interface) and additionally keeps track of current scope context.
* This can be useful if your visitor has special logic that depends on current scope context.
*
* To specify behavior of the visitor you can implement following methods in derived class:
* 1. needChildVisit This methods allows to skip subtree.
* 2. enterImpl This method is called before children are processed.
* 3. leaveImpl This method is called after children are processed.
*/
template <typename Derived, bool const_visitor = false>
class InDepthQueryTreeVisitorWithContext
{
public:
using VisitQueryTreeNodeType = std::conditional_t<const_visitor, const QueryTreeNodePtr, QueryTreeNodePtr>;
using VisitQueryTreeNodeType = QueryTreeNodePtr;
explicit InDepthQueryTreeVisitorWithContext(ContextPtr context, size_t initial_subquery_depth = 0)
: current_context(std::move(context))
, subquery_depth(initial_subquery_depth)
{}
/// Return true if visitor should traverse tree top to bottom, false otherwise
bool shouldTraverseTopToBottom() const
{
return true;
}
/// Return true if visitor should visit child, false otherwise
bool needChildVisit(VisitQueryTreeNodeType & parent [[maybe_unused]], VisitQueryTreeNodeType & child [[maybe_unused]])
{
@ -146,18 +146,16 @@ public:
++subquery_depth;
bool traverse_top_to_bottom = getDerived().shouldTraverseTopToBottom();
if (!traverse_top_to_bottom)
visitChildren(query_tree_node);
getDerived().enterImpl(query_tree_node);
getDerived().visitImpl(query_tree_node);
if (traverse_top_to_bottom)
visitChildren(query_tree_node);
visitChildren(query_tree_node);
getDerived().leaveImpl(query_tree_node);
}
void enterImpl(VisitQueryTreeNodeType & node [[maybe_unused]])
{}
void leaveImpl(VisitQueryTreeNodeType & node [[maybe_unused]])
{}
private:
@ -171,17 +169,31 @@ private:
return *static_cast<Derived *>(this);
}
bool shouldSkipSubtree(
VisitQueryTreeNodeType & parent,
VisitQueryTreeNodeType & child,
size_t subtree_index)
{
bool need_visit_child = getDerived().needChildVisit(parent, child);
if (!need_visit_child)
return true;
if (auto * table_function_node = parent->as<TableFunctionNode>())
{
const auto & unresolved_indexes = table_function_node->getUnresolvedArgumentIndexes();
return std::find(unresolved_indexes.begin(), unresolved_indexes.end(), subtree_index) != unresolved_indexes.end();
}
return false;
}
void visitChildren(VisitQueryTreeNodeType & expression)
{
size_t index = 0;
for (auto & child : expression->getChildren())
{
if (!child)
continue;
bool need_visit_child = getDerived().needChildVisit(expression, child);
if (need_visit_child)
if (child && !shouldSkipSubtree(expression, child, index))
visit(child);
++index;
}
}
@ -189,50 +201,4 @@ private:
size_t subquery_depth = 0;
};
template <typename Derived>
using ConstInDepthQueryTreeVisitorWithContext = InDepthQueryTreeVisitorWithContext<Derived, true /*const_visitor*/>;
/** Visitor that use another visitor to visit node only if condition for visiting node is true.
* For example, your visitor need to visit only query tree nodes or union nodes.
*
* Condition interface:
* struct Condition
* {
* bool operator()(VisitQueryTreeNodeType & node)
* {
* return shouldNestedVisitorVisitNode(node);
* }
* }
*/
template <typename Visitor, typename Condition, bool const_visitor = false>
class InDepthQueryTreeConditionalVisitor : public InDepthQueryTreeVisitor<InDepthQueryTreeConditionalVisitor<Visitor, Condition, const_visitor>, const_visitor>
{
public:
using Base = InDepthQueryTreeVisitor<InDepthQueryTreeConditionalVisitor<Visitor, Condition, const_visitor>, const_visitor>;
using VisitQueryTreeNodeType = typename Base::VisitQueryTreeNodeType;
explicit InDepthQueryTreeConditionalVisitor(Visitor & visitor_, Condition & condition_)
: visitor(visitor_)
, condition(condition_)
{
}
bool shouldTraverseTopToBottom() const
{
return visitor.shouldTraverseTopToBottom();
}
void visitImpl(VisitQueryTreeNodeType & query_tree_node)
{
if (condition(query_tree_node))
visitor.visit(query_tree_node);
}
Visitor & visitor;
Condition & condition;
};
template <typename Visitor, typename Condition>
using ConstInDepthQueryTreeConditionalVisitor = InDepthQueryTreeConditionalVisitor<Visitor, Condition, true /*const_visitor*/>;
}

View File

@ -51,13 +51,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<AggregateFunctionsArithmericOperationsVisitor>;
using Base::Base;
/// Traverse tree bottom to top
static bool shouldTraverseTopToBottom()
{
return false;
}
void visitImpl(QueryTreeNodePtr & node)
void leaveImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_arithmetic_operations_in_aggregate_functions)
return;

View File

@ -22,7 +22,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<RewriteArrayExistsToHasVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_rewrite_array_exists_to_has)
return;

View File

@ -20,7 +20,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<AutoFinalOnQueryPassVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().final)
return;

View File

@ -50,7 +50,7 @@ public:
&& settings.max_hyperscan_regexp_total_length == 0;
}
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
auto * function_node = node->as<FunctionNode>();
if (!function_node || function_node->getFunctionName() != "or")

View File

@ -688,7 +688,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<ConvertQueryToCNFVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
auto * query_node = node->as<QueryNode>();
if (!query_node)

View File

@ -22,7 +22,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<CountDistinctVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().count_distinct_optimization)
return;

View File

@ -193,7 +193,7 @@ public:
return true;
}
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!isEnabled())
return;

View File

@ -29,7 +29,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node) const
void enterImpl(QueryTreeNodePtr & node) const
{
if (!getSettings().optimize_functions_to_subcolumns)
return;

View File

@ -37,7 +37,7 @@ public:
, names_to_collect(names_to_collect_)
{}
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_syntax_fuse_functions)
return;

View File

@ -46,7 +46,7 @@ public:
{
}
void visitImpl(const QueryTreeNodePtr & node)
void enterImpl(const QueryTreeNodePtr & node)
{
auto * function_node = node->as<FunctionNode>();
if (!function_node || function_node->getFunctionName() != "grouping")

View File

@ -23,7 +23,7 @@ public:
, multi_if_function_ptr(std::move(multi_if_function_ptr_))
{}
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_if_chain_to_multiif)
return;

View File

@ -113,7 +113,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<ConvertStringsToEnumVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_if_transform_strings_to_enum)
return;

View File

@ -19,7 +19,7 @@ public:
: Base(std::move(context))
{}
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
auto * function_node = node->as<FunctionNode>();

View File

@ -21,7 +21,7 @@ public:
, if_function_ptr(std::move(if_function_ptr_))
{}
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_multiif_to_if)
return;

View File

@ -20,7 +20,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<NormalizeCountVariantsVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_normalize_count_variants)
return;

View File

@ -0,0 +1,221 @@
#include <Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.h>
#include <Functions/FunctionFactory.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ColumnNode.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Common/DateLUT.h>
#include <Common/DateLUTImpl.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
namespace
{
class OptimizeDateOrDateTimeConverterWithPreimageVisitor : public InDepthQueryTreeVisitorWithContext<OptimizeDateOrDateTimeConverterWithPreimageVisitor>
{
public:
using Base = InDepthQueryTreeVisitorWithContext<OptimizeDateOrDateTimeConverterWithPreimageVisitor>;
explicit OptimizeDateOrDateTimeConverterWithPreimageVisitor(ContextPtr context)
: Base(std::move(context))
{}
static bool needChildVisit(QueryTreeNodePtr & node, QueryTreeNodePtr & /*child*/)
{
const static std::unordered_set<String> relations = {
"equals",
"notEquals",
"less",
"greater",
"lessOrEquals",
"greaterOrEquals",
};
if (const auto * function = node->as<FunctionNode>())
{
return !relations.contains(function->getFunctionName());
}
return true;
}
void enterImpl(QueryTreeNodePtr & node) const
{
const static std::unordered_map<String, String> swap_relations = {
{"equals", "equals"},
{"notEquals", "notEquals"},
{"less", "greater"},
{"greater", "less"},
{"lessOrEquals", "greaterOrEquals"},
{"greaterOrEquals", "lessOrEquals"},
};
const auto * function = node->as<FunctionNode>();
if (!function || !swap_relations.contains(function->getFunctionName())) return;
if (function->getArguments().getNodes().size() != 2) return;
size_t func_id = function->getArguments().getNodes().size();
for (size_t i = 0; i < function->getArguments().getNodes().size(); i++)
{
if (const auto * func = function->getArguments().getNodes()[i]->as<FunctionNode>())
{
func_id = i;
}
}
if (func_id == function->getArguments().getNodes().size()) return;
size_t literal_id = 1 - func_id;
const auto * literal = function->getArguments().getNodes()[literal_id]->as<ConstantNode>();
if (!literal || literal->getValue().getType() != Field::Types::UInt64) return;
String comparator = literal_id > func_id ? function->getFunctionName(): swap_relations.at(function->getFunctionName());
const auto * func_node = function->getArguments().getNodes()[func_id]->as<FunctionNode>();
/// Currently we only handle single-argument functions.
if (!func_node || func_node->getArguments().getNodes().size() != 1) return;
const auto * column_id = func_node->getArguments().getNodes()[0]->as<ColumnNode>();
if (!column_id) return;
const auto * column_type = column_id->getColumnType().get();
if (!isDateOrDate32(column_type) && !isDateTime(column_type) && !isDateTime64(column_type)) return;
const auto & converter = FunctionFactory::instance().tryGet(func_node->getFunctionName(), getContext());
if (!converter) return;
ColumnsWithTypeAndName args;
args.emplace_back(column_id->getColumnType(), "tmp");
auto converter_base = converter->build(args);
if (!converter_base || !converter_base->hasInformationAboutPreimage()) return;
auto preimage_range = converter_base->getPreimage(*(column_id->getColumnType()), literal->getValue());
if (!preimage_range) return;
const auto new_node = generateOptimizedDateFilter(comparator, *column_id, *preimage_range);
if (!new_node) return;
node = new_node;
}
private:
QueryTreeNodePtr generateOptimizedDateFilter(const String & comparator, const ColumnNode & column_node, const std::pair<Field, Field>& range) const
{
const DateLUTImpl & date_lut = DateLUT::instance("UTC");
String start_date_or_date_time;
String end_date_or_date_time;
if (isDateOrDate32(column_node.getColumnType().get()))
{
start_date_or_date_time = date_lut.dateToString(range.first.get<DateLUTImpl::Time>());
end_date_or_date_time = date_lut.dateToString(range.second.get<DateLUTImpl::Time>());
}
else if (isDateTime(column_node.getColumnType().get()) || isDateTime64(column_node.getColumnType().get()))
{
start_date_or_date_time = date_lut.timeToString(range.first.get<DateLUTImpl::Time>());
end_date_or_date_time = date_lut.timeToString(range.second.get<DateLUTImpl::Time>());
}
else [[unlikely]] return {};
if (comparator == "equals")
{
const auto lhs = std::make_shared<FunctionNode>("greaterOrEquals");
lhs->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
lhs->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(start_date_or_date_time));
resolveOrdinaryFunctionNode(*lhs, lhs->getFunctionName());
const auto rhs = std::make_shared<FunctionNode>("less");
rhs->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
rhs->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(end_date_or_date_time));
resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName());
const auto new_date_filter = std::make_shared<FunctionNode>("and");
new_date_filter->getArguments().getNodes() = {lhs, rhs};
resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName());
return new_date_filter;
}
else if (comparator == "notEquals")
{
const auto lhs = std::make_shared<FunctionNode>("less");
lhs->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
lhs->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(start_date_or_date_time));
resolveOrdinaryFunctionNode(*lhs, lhs->getFunctionName());
const auto rhs = std::make_shared<FunctionNode>("greaterOrEquals");
rhs->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
rhs->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(end_date_or_date_time));
resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName());
const auto new_date_filter = std::make_shared<FunctionNode>("or");
new_date_filter->getArguments().getNodes() = {lhs, rhs};
resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName());
return new_date_filter;
}
else if (comparator == "greater")
{
const auto new_date_filter = std::make_shared<FunctionNode>("greaterOrEquals");
new_date_filter->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
new_date_filter->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(end_date_or_date_time));
resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName());
return new_date_filter;
}
else if (comparator == "lessOrEquals")
{
const auto new_date_filter = std::make_shared<FunctionNode>("less");
new_date_filter->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
new_date_filter->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(end_date_or_date_time));
resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName());
return new_date_filter;
}
else if (comparator == "less" || comparator == "greaterOrEquals")
{
const auto new_date_filter = std::make_shared<FunctionNode>(comparator);
new_date_filter->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
new_date_filter->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(start_date_or_date_time));
resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName());
return new_date_filter;
}
else [[unlikely]]
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Expected equals, notEquals, less, lessOrEquals, greater, greaterOrEquals. Actual {}",
comparator);
}
}
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
auto function = FunctionFactory::instance().get(function_name, getContext());
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
};
}
void OptimizeDateOrDateTimeConverterWithPreimagePass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
{
OptimizeDateOrDateTimeConverterWithPreimageVisitor visitor(std::move(context));
visitor.visit(query_tree_node);
}
}

View File

@ -0,0 +1,24 @@
#pragma once
#include <Analyzer/IQueryTreePass.h>
namespace DB
{
/** Replace predicate having Date/DateTime converters with their preimages to improve performance.
* Given a Date column c, toYear(c) = 2023 -> c >= '2023-01-01' AND c < '2024-01-01'
* Or if c is a DateTime column, toYear(c) = 2023 -> c >= '2023-01-01 00:00:00' AND c < '2024-01-01 00:00:00'.
* The similar optimization also applies to other converters.
*/
class OptimizeDateOrDateTimeConverterWithPreimagePass final : public IQueryTreePass
{
public:
String getName() override { return "OptimizeDateOrDateTimeConverterWithPreimagePass"; }
String getDescription() override { return "Replace predicate having Date/DateTime converters with their preimages"; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
};
}

View File

@ -26,7 +26,7 @@ public:
return !child->as<FunctionNode>();
}
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_group_by_function_keys)
return;

View File

@ -28,7 +28,7 @@ public:
return true;
}
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_redundant_functions_in_order_by)
return;

View File

@ -116,6 +116,7 @@ namespace ErrorCodes
extern const int UNKNOWN_TABLE;
extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
extern const int FUNCTION_CANNOT_HAVE_PARAMETERS;
}
/** Query analyzer implementation overview. Please check documentation in QueryAnalysisPass.h first.
@ -4896,6 +4897,12 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
lambda_expression_untyped->formatASTForErrorMessage(),
scope.scope_node->formatASTForErrorMessage());
if (!parameters.empty())
{
throw Exception(
ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Function {} is not parametric", function_node.formatASTForErrorMessage());
}
auto lambda_expression_clone = lambda_expression_untyped->clone();
IdentifierResolveScope lambda_scope(lambda_expression_clone, &scope /*parent_scope*/);
@ -5012,9 +5019,13 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
}
FunctionOverloadResolverPtr function = UserDefinedExecutableFunctionFactory::instance().tryGet(function_name, scope.context, parameters);
bool is_executable_udf = true;
if (!function)
{
function = FunctionFactory::instance().tryGet(function_name, scope.context);
is_executable_udf = false;
}
if (!function)
{
@ -5065,6 +5076,12 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
return result_projection_names;
}
/// Executable UDFs may have parameters. They are checked in UserDefinedExecutableFunctionFactory.
if (!parameters.empty() && !is_executable_udf)
{
throw Exception(ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Function {} is not parametric", function_name);
}
/** For lambda arguments we need to initialize lambda argument types DataTypeFunction using `getLambdaArgumentTypes` function.
* Then each lambda arguments are initialized with columns, where column source is lambda.
* This information is important for later steps of query processing.
@ -6434,7 +6451,7 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
table_function_ptr->parseArguments(table_function_ast, scope_context);
auto table_function_storage = scope_context->getQueryContext()->executeTableFunction(table_function_ast, table_function_ptr);
table_function_node_typed.resolve(std::move(table_function_ptr), std::move(table_function_storage), scope_context);
table_function_node_typed.resolve(std::move(table_function_ptr), std::move(table_function_storage), scope_context, std::move(skip_analysis_arguments_indexes));
}
/// Resolve array join node in scope
@ -6477,55 +6494,69 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif
resolveExpressionNode(array_join_expression, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
auto result_type = array_join_expression->getResultType();
bool is_array_type = isArray(result_type);
bool is_map_type = isMap(result_type);
if (!is_array_type && !is_map_type)
throw Exception(ErrorCodes::TYPE_MISMATCH,
"ARRAY JOIN {} requires expression {} with Array or Map type. Actual {}. In scope {}",
array_join_node_typed.formatASTForErrorMessage(),
array_join_expression->formatASTForErrorMessage(),
result_type->getName(),
scope.scope_node->formatASTForErrorMessage());
if (is_map_type)
result_type = assert_cast<const DataTypeMap &>(*result_type).getNestedType();
result_type = assert_cast<const DataTypeArray &>(*result_type).getNestedType();
String array_join_column_name;
if (!array_join_expression_alias.empty())
auto process_array_join_expression = [&](QueryTreeNodePtr & expression)
{
array_join_column_name = array_join_expression_alias;
}
else if (auto * array_join_expression_inner_column = array_join_expression->as<ColumnNode>())
auto result_type = expression->getResultType();
bool is_array_type = isArray(result_type);
bool is_map_type = isMap(result_type);
if (!is_array_type && !is_map_type)
throw Exception(ErrorCodes::TYPE_MISMATCH,
"ARRAY JOIN {} requires expression {} with Array or Map type. Actual {}. In scope {}",
array_join_node_typed.formatASTForErrorMessage(),
expression->formatASTForErrorMessage(),
result_type->getName(),
scope.scope_node->formatASTForErrorMessage());
if (is_map_type)
result_type = assert_cast<const DataTypeMap &>(*result_type).getNestedType();
result_type = assert_cast<const DataTypeArray &>(*result_type).getNestedType();
String array_join_column_name;
if (!array_join_expression_alias.empty())
{
array_join_column_name = array_join_expression_alias;
}
else if (auto * array_join_expression_inner_column = array_join_expression->as<ColumnNode>())
{
array_join_column_name = array_join_expression_inner_column->getColumnName();
}
else if (!identifier_full_name.empty())
{
array_join_column_name = identifier_full_name;
}
else
{
array_join_column_name = "__array_join_expression_" + std::to_string(array_join_expressions_counter);
++array_join_expressions_counter;
}
if (array_join_column_names.contains(array_join_column_name))
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"ARRAY JOIN {} multiple columns with name {}. In scope {}",
array_join_node_typed.formatASTForErrorMessage(),
array_join_column_name,
scope.scope_node->formatASTForErrorMessage());
array_join_column_names.emplace(array_join_column_name);
NameAndTypePair array_join_column(array_join_column_name, result_type);
auto array_join_column_node = std::make_shared<ColumnNode>(std::move(array_join_column), expression, array_join_node);
array_join_column_node->setAlias(array_join_expression_alias);
array_join_column_expressions.push_back(std::move(array_join_column_node));
};
// Support ARRAY JOIN COLUMNS(...). COLUMNS transformer is resolved to list of columns.
if (auto * columns_list = array_join_expression->as<ListNode>())
{
array_join_column_name = array_join_expression_inner_column->getColumnName();
}
else if (!identifier_full_name.empty())
{
array_join_column_name = identifier_full_name;
for (auto & array_join_subexpression : columns_list->getNodes())
process_array_join_expression(array_join_subexpression);
}
else
{
array_join_column_name = "__array_join_expression_" + std::to_string(array_join_expressions_counter);
++array_join_expressions_counter;
process_array_join_expression(array_join_expression);
}
if (array_join_column_names.contains(array_join_column_name))
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"ARRAY JOIN {} multiple columns with name {}. In scope {}",
array_join_node_typed.formatASTForErrorMessage(),
array_join_column_name,
scope.scope_node->formatASTForErrorMessage());
array_join_column_names.emplace(array_join_column_name);
NameAndTypePair array_join_column(array_join_column_name, result_type);
auto array_join_column_node = std::make_shared<ColumnNode>(std::move(array_join_column), array_join_expression, array_join_node);
array_join_column_node->setAlias(array_join_expression_alias);
array_join_column_expressions.push_back(std::move(array_join_column_node));
}
/** Allow to resolve ARRAY JOIN columns from aliases with types after ARRAY JOIN only after ARRAY JOIN expression list is resolved, because
@ -6537,11 +6568,9 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif
* And it is expected that `value_element` inside projection expression list will be resolved as `value_element` expression
* with type after ARRAY JOIN.
*/
for (size_t i = 0; i < array_join_nodes_size; ++i)
array_join_nodes = std::move(array_join_column_expressions);
for (auto & array_join_column_expression : array_join_nodes)
{
auto & array_join_column_expression = array_join_nodes[i];
array_join_column_expression = std::move(array_join_column_expressions[i]);
auto it = scope.alias_name_to_expression_node.find(array_join_column_expression->getAlias());
if (it != scope.alias_name_to_expression_node.end())
{

View File

@ -26,7 +26,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<RewriteAggregateFunctionWithIfVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_rewrite_aggregate_function_with_if)
return;

View File

@ -24,7 +24,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<ShardNumColumnToFunctionVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node) const
void enterImpl(QueryTreeNodePtr & node) const
{
auto * column_node = node->as<ColumnNode>();
if (!column_node)

View File

@ -26,7 +26,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<SumIfToCountIfVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_rewrite_sum_if_to_count_if)
return;

View File

@ -31,7 +31,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<UniqInjectiveFunctionsEliminationVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_injective_functions_inside_uniq)
return;

View File

@ -42,6 +42,7 @@
#include <Analyzer/Passes/CrossToInnerJoinPass.h>
#include <Analyzer/Passes/ShardNumColumnToFunctionPass.h>
#include <Analyzer/Passes/ConvertQueryToCNFPass.h>
#include <Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.h>
namespace DB
{
@ -278,6 +279,7 @@ void addQueryTreePasses(QueryTreePassManager & manager)
manager.addPass(std::make_unique<AutoFinalOnQueryPass>());
manager.addPass(std::make_unique<CrossToInnerJoinPass>());
manager.addPass(std::make_unique<ShardNumColumnToFunctionPass>());
manager.addPass(std::make_unique<OptimizeDateOrDateTimeConverterWithPreimagePass>());
}
}

View File

@ -27,12 +27,13 @@ TableFunctionNode::TableFunctionNode(String table_function_name_)
children[arguments_child_index] = std::make_shared<ListNode>();
}
void TableFunctionNode::resolve(TableFunctionPtr table_function_value, StoragePtr storage_value, ContextPtr context)
void TableFunctionNode::resolve(TableFunctionPtr table_function_value, StoragePtr storage_value, ContextPtr context, std::vector<size_t> unresolved_arguments_indexes_)
{
table_function = std::move(table_function_value);
storage = std::move(storage_value);
storage_id = storage->getStorageID();
storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), context);
unresolved_arguments_indexes = std::move(unresolved_arguments_indexes_);
}
const StorageID & TableFunctionNode::getStorageID() const
@ -132,6 +133,7 @@ QueryTreeNodePtr TableFunctionNode::cloneImpl() const
result->storage_snapshot = storage_snapshot;
result->table_expression_modifiers = table_expression_modifiers;
result->settings_changes = settings_changes;
result->unresolved_arguments_indexes = unresolved_arguments_indexes;
return result;
}

View File

@ -98,7 +98,7 @@ public:
}
/// Resolve table function with table function, storage and context
void resolve(TableFunctionPtr table_function_value, StoragePtr storage_value, ContextPtr context);
void resolve(TableFunctionPtr table_function_value, StoragePtr storage_value, ContextPtr context, std::vector<size_t> unresolved_arguments_indexes_);
/// Get storage id, throws exception if function node is not resolved
const StorageID & getStorageID() const;
@ -106,6 +106,11 @@ public:
/// Get storage snapshot, throws exception if function node is not resolved
const StorageSnapshotPtr & getStorageSnapshot() const;
const std::vector<size_t> & getUnresolvedArgumentIndexes() const
{
return unresolved_arguments_indexes;
}
/// Return true if table function node has table expression modifiers, false otherwise
bool hasTableExpressionModifiers() const
{
@ -164,6 +169,7 @@ private:
StoragePtr storage;
StorageID storage_id;
StorageSnapshotPtr storage_snapshot;
std::vector<size_t> unresolved_arguments_indexes;
std::optional<TableExpressionModifiers> table_expression_modifiers;
SettingsChanges settings_changes;

View File

@ -30,6 +30,7 @@ public:
String compression_method;
int compression_level = -1;
String password;
String s3_storage_class;
ContextPtr context;
bool is_internal_backup = false;
std::shared_ptr<IBackupCoordination> backup_coordination;

View File

@ -88,7 +88,7 @@ namespace
request.SetMaxKeys(1);
auto outcome = client.ListObjects(request);
if (!outcome.IsSuccess())
throw Exception::createDeprecated(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType());
return outcome.GetResult().GetContents();
}
@ -178,7 +178,7 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s
BackupWriterS3::BackupWriterS3(
const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ContextPtr & context_)
const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const String & storage_class_name, const ContextPtr & context_)
: BackupWriterDefault(&Poco::Logger::get("BackupWriterS3"), context_)
, s3_uri(s3_uri_)
, client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_))
@ -188,6 +188,7 @@ BackupWriterS3::BackupWriterS3(
request_settings.updateFromSettings(context_->getSettingsRef());
request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
request_settings.allow_native_copy = allow_s3_native_copy;
request_settings.setStorageClassName(storage_class_name);
}
void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
@ -271,7 +272,7 @@ void BackupWriterS3::removeFile(const String & file_name)
request.SetKey(fs::path(s3_uri.key) / file_name);
auto outcome = client->DeleteObject(request);
if (!outcome.IsSuccess() && !isNotFoundError(outcome.GetError().GetErrorType()))
throw Exception::createDeprecated(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType());
}
void BackupWriterS3::removeFiles(const Strings & file_names)
@ -329,7 +330,7 @@ void BackupWriterS3::removeFilesBatch(const Strings & file_names)
auto outcome = client->DeleteObjects(request);
if (!outcome.IsSuccess() && !isNotFoundError(outcome.GetError().GetErrorType()))
throw Exception::createDeprecated(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType());
}
}

View File

@ -38,7 +38,7 @@ private:
class BackupWriterS3 : public BackupWriterDefault
{
public:
BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ContextPtr & context_);
BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const String & storage_class_name, const ContextPtr & context_);
~BackupWriterS3() override;
bool fileExists(const String & file_name) override;

View File

@ -21,6 +21,7 @@ namespace ErrorCodes
M(String, id) \
M(String, compression_method) \
M(String, password) \
M(String, s3_storage_class) \
M(Bool, structure_only) \
M(Bool, async) \
M(Bool, decrypt_files_from_encrypted_disks) \

View File

@ -25,6 +25,9 @@ struct BackupSettings
/// Password used to encrypt the backup.
String password;
/// S3 storage class.
String s3_storage_class = "";
/// If this is set to true then only create queries will be written to backup,
/// without the data of tables.
bool structure_only = false;

Some files were not shown because too many files have changed in this diff Show More