Merge branch 'master' into feature/mergetree-checksum-big-endian-support

This commit is contained in:
ltrk2 2023-08-02 11:36:43 -04:00 committed by GitHub
commit 27a2d4d1c7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
474 changed files with 10957 additions and 1726 deletions

View File

@ -3643,7 +3643,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (release-clang)
CHECK_NAME=Unit tests (release)
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
EOF
- name: Download json reports

View File

@ -4541,7 +4541,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (release-clang)
CHECK_NAME=Unit tests (release)
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
EOF
- name: Download json reports

View File

@ -23,7 +23,6 @@
* Added `Overlay` database engine to combine multiple databases into one. Added `Filesystem` database engine to represent a directory in the filesystem as a set of implicitly available tables with auto-detected formats and structures. A new `S3` database engine allows to read-only interact with s3 storage by representing a prefix as a set of tables. A new `HDFS` database engine allows to interact with HDFS storage in the same way. [#48821](https://github.com/ClickHouse/ClickHouse/pull/48821) ([alekseygolub](https://github.com/alekseygolub)).
* Add support for external disks in Keeper for storing snapshots and logs. [#50098](https://github.com/ClickHouse/ClickHouse/pull/50098) ([Antonio Andelic](https://github.com/antonio2368)).
* Add support for multi-directory selection (`{}`) globs. [#50559](https://github.com/ClickHouse/ClickHouse/pull/50559) ([Andrey Zvonov](https://github.com/zvonand)).
* Support ZooKeeper `reconfig` command for ClickHouse Keeper with incremental reconfiguration which can be enabled via `keeper_server.enable_reconfiguration` setting. Support adding servers, removing servers, and changing server priorities. [#49450](https://github.com/ClickHouse/ClickHouse/pull/49450) ([Mike Kot](https://github.com/myrrc)).
* Kafka connector can fetch Avro schema from schema registry with basic authentication using url-encoded credentials. [#49664](https://github.com/ClickHouse/ClickHouse/pull/49664) ([Ilya Golshtein](https://github.com/ilejn)).
* Add function `arrayJaccardIndex` which computes the Jaccard similarity between two arrays. [#50076](https://github.com/ClickHouse/ClickHouse/pull/50076) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)).
* Add a column `is_obsolete` to `system.settings` and similar tables. Closes [#50819](https://github.com/ClickHouse/ClickHouse/issues/50819). [#50826](https://github.com/ClickHouse/ClickHouse/pull/50826) ([flynn](https://github.com/ucasfl)).
@ -124,6 +123,7 @@
* (experimental MaterializedMySQL) Now double quoted comments are supported in MaterializedMySQL. [#52355](https://github.com/ClickHouse/ClickHouse/pull/52355) ([Val Doroshchuk](https://github.com/valbok)).
* Upgrade Intel QPL from v1.1.0 to v1.2.0 2. Upgrade Intel accel-config from v3.5 to v4.0 3. Fixed issue that Device IOTLB miss has big perf. impact for IAA accelerators. [#52180](https://github.com/ClickHouse/ClickHouse/pull/52180) ([jasperzhu](https://github.com/jinjunzh)).
* The `session_timezone` setting (new in version 23.6) is demoted to experimental. [#52445](https://github.com/ClickHouse/ClickHouse/pull/52445) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Support ZooKeeper `reconfig` command for ClickHouse Keeper with incremental reconfiguration which can be enabled via `keeper_server.enable_reconfiguration` setting. Support adding servers, removing servers, and changing server priorities. [#49450](https://github.com/ClickHouse/ClickHouse/pull/49450) ([Mike Kot](https://github.com/myrrc)). It is suspected that this feature is incomplete.
#### Build/Testing/Packaging Improvement
* Add experimental ClickHouse builds for Linux RISC-V 64 to CI. [#31398](https://github.com/ClickHouse/ClickHouse/pull/31398) ([Alexey Milovidov](https://github.com/alexey-milovidov)).

View File

@ -165,8 +165,14 @@ elseif(GLIBC_COMPATIBILITY)
message (${RECONFIGURE_MESSAGE_LEVEL} "Glibc compatibility cannot be enabled in current configuration")
endif ()
# Make sure the final executable has symbols exported
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")
if (OS_LINUX)
# We should not export dynamic symbols, because:
# - The main clickhouse binary does not use dlopen,
# and whatever is poisoning it by LD_PRELOAD should not link to our symbols.
# - The clickhouse-odbc-bridge and clickhouse-library-bridge binaries
# should not expose their symbols to ODBC drivers and libraries.
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic")
endif ()
if (OS_DARWIN)
# The `-all_load` flag forces loading of all symbols from all libraries,

View File

@ -8,6 +8,7 @@
#include <functional>
#include <iosfwd>
#include <base/defines.h>
#include <base/types.h>
#include <base/unaligned.h>
@ -274,6 +275,8 @@ struct CRC32Hash
if (size == 0)
return 0;
chassert(pos);
if (size < 8)
{
return static_cast<unsigned>(hashLessThan8(x.data, x.size));

View File

@ -115,8 +115,15 @@
/// because SIGABRT is easier to debug than SIGTRAP (the second one makes gdb crazy)
#if !defined(chassert)
#if defined(ABORT_ON_LOGICAL_ERROR)
// clang-format off
#include <base/types.h>
namespace DB
{
void abortOnFailedAssertion(const String & description);
}
#define chassert(x) static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x)
#define UNREACHABLE() abort()
// clang-format off
#else
/// Here sizeof() trick is used to suppress unused warning for result,
/// since simple "(void)x" will evaluate the expression, while

View File

@ -57,7 +57,7 @@ public:
URI();
/// Creates an empty URI.
explicit URI(const std::string & uri, bool disable_url_encoding = false);
explicit URI(const std::string & uri, bool enable_url_encoding = true);
/// Parses an URI from the given string. Throws a
/// SyntaxException if the uri is not valid.
@ -362,7 +362,7 @@ private:
std::string _query;
std::string _fragment;
bool _disable_url_encoding = false;
bool _enable_url_encoding = true;
};

View File

@ -36,8 +36,8 @@ URI::URI():
}
URI::URI(const std::string& uri, bool decode_and_encode_path):
_port(0), _disable_url_encoding(decode_and_encode_path)
URI::URI(const std::string& uri, bool enable_url_encoding):
_port(0), _enable_url_encoding(enable_url_encoding)
{
parse(uri);
}
@ -108,7 +108,7 @@ URI::URI(const URI& uri):
_path(uri._path),
_query(uri._query),
_fragment(uri._fragment),
_disable_url_encoding(uri._disable_url_encoding)
_enable_url_encoding(uri._enable_url_encoding)
{
}
@ -121,7 +121,7 @@ URI::URI(const URI& baseURI, const std::string& relativeURI):
_path(baseURI._path),
_query(baseURI._query),
_fragment(baseURI._fragment),
_disable_url_encoding(baseURI._disable_url_encoding)
_enable_url_encoding(baseURI._enable_url_encoding)
{
resolve(relativeURI);
}
@ -153,7 +153,7 @@ URI& URI::operator = (const URI& uri)
_path = uri._path;
_query = uri._query;
_fragment = uri._fragment;
_disable_url_encoding = uri._disable_url_encoding;
_enable_url_encoding = uri._enable_url_encoding;
}
return *this;
}
@ -184,7 +184,7 @@ void URI::swap(URI& uri)
std::swap(_path, uri._path);
std::swap(_query, uri._query);
std::swap(_fragment, uri._fragment);
std::swap(_disable_url_encoding, uri._disable_url_encoding);
std::swap(_enable_url_encoding, uri._enable_url_encoding);
}
@ -687,18 +687,18 @@ void URI::decode(const std::string& str, std::string& decodedStr, bool plusAsSpa
void URI::encodePath(std::string & encodedStr) const
{
if (_disable_url_encoding)
encodedStr = _path;
else
if (_enable_url_encoding)
encode(_path, RESERVED_PATH, encodedStr);
else
encodedStr = _path;
}
void URI::decodePath(const std::string & encodedStr)
{
if (_disable_url_encoding)
_path = encodedStr;
else
if (_enable_url_encoding)
decode(encodedStr, _path);
else
_path = encodedStr;
}
bool URI::isWellKnownPort() const

View File

@ -22,8 +22,9 @@ macro(clickhouse_split_debug_symbols)
# Splits debug symbols into separate file, leaves the binary untouched:
COMMAND "${OBJCOPY_PATH}" --only-keep-debug "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
# Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check:
COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note --keep-section=.clickhouse.hash "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
# Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check.
# Also, after we disabled the export of symbols for dynamic linking, we still to keep a static symbol table for good stack traces.
COMMAND "${STRIP_PATH}" --strip-debug --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
# Associate stripped binary with debug symbols:
COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
COMMENT "Stripping clickhouse binary" VERBATIM

View File

@ -17,7 +17,8 @@
#ifndef METROHASH_PLATFORM_H
#define METROHASH_PLATFORM_H
#include <stdint.h>
#include <bit>
#include <cstdint>
#include <cstring>
// rotate right idiom recognized by most compilers
@ -33,6 +34,11 @@ inline static uint64_t read_u64(const void * const ptr)
// so we use memcpy() which is the most portable. clang & gcc usually translates `memcpy()` into a single `load` instruction
// when hardware supports it, so using memcpy() is efficient too.
memcpy(&result, ptr, sizeof(result));
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
result = std::byteswap(result);
#endif
return result;
}
@ -40,6 +46,11 @@ inline static uint64_t read_u32(const void * const ptr)
{
uint32_t result;
memcpy(&result, ptr, sizeof(result));
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
result = std::byteswap(result);
#endif
return result;
}
@ -47,6 +58,11 @@ inline static uint64_t read_u16(const void * const ptr)
{
uint16_t result;
memcpy(&result, ptr, sizeof(result));
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
result = std::byteswap(result);
#endif
return result;
}

View File

@ -161,5 +161,9 @@
"docker/test/sqllogic": {
"name": "clickhouse/sqllogic-test",
"dependent": []
},
"docker/test/integration/nginx_dav": {
"name": "clickhouse/nginx-dav",
"dependent": []
}
}

View File

@ -6,7 +6,7 @@ Usage:
Build deb package with `clang-14` in `debug` mode:
```
$ mkdir deb/test_output
$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-14 --build-type=debug
$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-14 --debug-build
$ ls -l deb/test_output
-rw-r--r-- 1 root root 3730 clickhouse-client_22.2.2+debug_all.deb
-rw-r--r-- 1 root root 84221888 clickhouse-common-static_22.2.2+debug_amd64.deb

View File

@ -64,7 +64,7 @@ then
ninja $NINJA_FLAGS clickhouse-keeper
ls -la ./programs/
ldd ./programs/clickhouse-keeper
ldd ./programs/clickhouse-keeper ||:
if [ -n "$MAKE_DEB" ]; then
# No quotes because I want it to expand to nothing if empty.
@ -80,19 +80,9 @@ else
cmake --debug-trycompile -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
fi
if [ "coverity" == "$COMBINED_OUTPUT" ]
then
mkdir -p /workdir/cov-analysis
wget --post-data "token=$COVERITY_TOKEN&project=ClickHouse%2FClickHouse" -qO- https://scan.coverity.com/download/linux64 | tar xz -C /workdir/cov-analysis --strip-components 1
export PATH=$PATH:/workdir/cov-analysis/bin
cov-configure --config ./coverity.config --template --comptype clangcc --compiler "$CC"
SCAN_WRAPPER="cov-build --config ./coverity.config --dir cov-int"
fi
# No quotes because I want it to expand to nothing if empty.
# shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
$SCAN_WRAPPER ninja $NINJA_FLAGS $BUILD_TARGET
ninja $NINJA_FLAGS $BUILD_TARGET
ls -la ./programs
@ -175,13 +165,6 @@ then
mv "$COMBINED_OUTPUT.tar.zst" /output
fi
if [ "coverity" == "$COMBINED_OUTPUT" ]
then
# Coverity does not understand ZSTD.
tar -cvz -f "coverity-scan.tar.gz" cov-int
mv "coverity-scan.tar.gz" /output
fi
ccache_status
ccache --evict-older-than 1d

View File

@ -112,12 +112,12 @@ def run_docker_image_with_env(
subprocess.check_call(cmd, shell=True)
def is_release_build(build_type: str, package_type: str, sanitizer: str) -> bool:
return build_type == "" and package_type == "deb" and sanitizer == ""
def is_release_build(debug_build: bool, package_type: str, sanitizer: str) -> bool:
return not debug_build and package_type == "deb" and sanitizer == ""
def parse_env_variables(
build_type: str,
debug_build: bool,
compiler: str,
sanitizer: str,
package_type: str,
@ -240,7 +240,7 @@ def parse_env_variables(
build_target = (
f"{build_target} clickhouse-odbc-bridge clickhouse-library-bridge"
)
if is_release_build(build_type, package_type, sanitizer):
if is_release_build(debug_build, package_type, sanitizer):
cmake_flags.append("-DSPLIT_DEBUG_SYMBOLS=ON")
result.append("WITH_PERFORMANCE=1")
if is_cross_arm:
@ -253,15 +253,10 @@ def parse_env_variables(
cmake_flags.append(f"-DCMAKE_C_COMPILER={cc}")
cmake_flags.append(f"-DCMAKE_CXX_COMPILER={cxx}")
# Create combined output archive for performance tests.
if package_type == "coverity":
result.append("COMBINED_OUTPUT=coverity")
result.append('COVERITY_TOKEN="$COVERITY_TOKEN"')
if sanitizer:
result.append(f"SANITIZER={sanitizer}")
if build_type:
result.append(f"BUILD_TYPE={build_type.capitalize()}")
if debug_build:
result.append("BUILD_TYPE=Debug")
else:
result.append("BUILD_TYPE=None")
@ -356,7 +351,7 @@ def parse_args() -> argparse.Namespace:
)
parser.add_argument(
"--package-type",
choices=["deb", "binary", "coverity"],
choices=["deb", "binary"],
required=True,
)
parser.add_argument(
@ -366,7 +361,7 @@ def parse_args() -> argparse.Namespace:
help="ClickHouse git repository",
)
parser.add_argument("--output-dir", type=dir_name, required=True)
parser.add_argument("--build-type", choices=("debug", ""), default="")
parser.add_argument("--debug-build", action="store_true")
parser.add_argument(
"--compiler",
@ -472,7 +467,7 @@ def main():
build_image(image_with_version, dockerfile)
env_prepared = parse_env_variables(
args.build_type,
args.debug_build,
args.compiler,
args.sanitizer,
args.package_type,

View File

@ -11,6 +11,7 @@ RUN apt-get update \
pv \
ripgrep \
zstd \
locales \
--yes --no-install-recommends
# Sanitizer options for services (clickhouse-server)
@ -28,7 +29,10 @@ ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_de
ENV UBSAN_OPTIONS='print_stacktrace=1'
ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'
ENV TZ=Europe/Moscow
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
ENV LC_ALL en_US.UTF-8
ENV TZ=Europe/Amsterdam
RUN ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone
CMD sleep 1

View File

@ -32,7 +32,7 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \
&& odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \
&& rm -rf /tmp/clickhouse-odbc-tmp
ENV TZ=Europe/Moscow
ENV TZ=Europe/Amsterdam
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
ENV COMMIT_SHA=''

View File

@ -8,7 +8,7 @@ ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
ENV LANG=C.UTF-8
ENV TZ=Europe/Moscow
ENV TZ=Europe/Amsterdam
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN apt-get update \

View File

@ -0,0 +1,6 @@
FROM nginx:alpine-slim
COPY default.conf /etc/nginx/conf.d/
RUN mkdir /usr/share/nginx/files/ \
&& chown nginx: /usr/share/nginx/files/ -R

View File

@ -0,0 +1,25 @@
server {
listen 80;
#root /usr/share/nginx/test.com;
index index.html index.htm;
server_name test.com localhost;
location / {
expires max;
root /usr/share/nginx/files;
client_max_body_size 20m;
client_body_temp_path /usr/share/nginx/tmp;
dav_methods PUT; # Allowed methods, only PUT is necessary
create_full_put_path on; # nginx automatically creates nested directories
dav_access user:rw group:r all:r; # access permissions for files
limit_except GET {
allow all;
}
}
error_page 405 =200 $uri;
}

View File

@ -95,6 +95,7 @@ RUN python3 -m pip install --no-cache-dir \
pytest-timeout \
pytest-xdist \
pytz \
pyyaml==5.3.1 \
redis \
requests-kerberos \
tzlocal==2.1 \

View File

@ -13,4 +13,3 @@ services:
- ${MEILI_SECURE_EXTERNAL_PORT:-7700}:${MEILI_SECURE_INTERNAL_PORT:-7700}
environment:
MEILI_MASTER_KEY: "password"

View File

@ -5,7 +5,7 @@ services:
# Files will be put into /usr/share/nginx/files.
nginx:
image: kssenii/nginx-test:1.1
image: clickhouse/nginx-dav:${DOCKER_NGINX_DAV_TAG:-latest}
restart: always
ports:
- 80:80

View File

@ -12,9 +12,9 @@ services:
timeout: 5s
retries: 5
networks:
default:
aliases:
- postgre-sql.local
default:
aliases:
- postgre-sql.local
environment:
POSTGRES_HOST_AUTH_METHOD: "trust"
POSTGRES_PASSWORD: mysecretpassword

View File

@ -12,7 +12,7 @@ services:
command: ["zkServer.sh", "start-foreground"]
entrypoint: /zookeeper-ssl-entrypoint.sh
volumes:
- type: bind
- type: bind
source: /misc/zookeeper-ssl-entrypoint.sh
target: /zookeeper-ssl-entrypoint.sh
- type: bind
@ -37,7 +37,7 @@ services:
command: ["zkServer.sh", "start-foreground"]
entrypoint: /zookeeper-ssl-entrypoint.sh
volumes:
- type: bind
- type: bind
source: /misc/zookeeper-ssl-entrypoint.sh
target: /zookeeper-ssl-entrypoint.sh
- type: bind
@ -61,7 +61,7 @@ services:
command: ["zkServer.sh", "start-foreground"]
entrypoint: /zookeeper-ssl-entrypoint.sh
volumes:
- type: bind
- type: bind
source: /misc/zookeeper-ssl-entrypoint.sh
target: /zookeeper-ssl-entrypoint.sh
- type: bind

View File

@ -64,15 +64,16 @@ export CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH=/clickhouse-odbc-bridge
export CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH=/clickhouse-library-bridge
export DOCKER_BASE_TAG=${DOCKER_BASE_TAG:=latest}
export DOCKER_HELPER_TAG=${DOCKER_HELPER_TAG:=latest}
export DOCKER_MYSQL_GOLANG_CLIENT_TAG=${DOCKER_MYSQL_GOLANG_CLIENT_TAG:=latest}
export DOCKER_DOTNET_CLIENT_TAG=${DOCKER_DOTNET_CLIENT_TAG:=latest}
export DOCKER_HELPER_TAG=${DOCKER_HELPER_TAG:=latest}
export DOCKER_KERBERIZED_HADOOP_TAG=${DOCKER_KERBERIZED_HADOOP_TAG:=latest}
export DOCKER_KERBEROS_KDC_TAG=${DOCKER_KERBEROS_KDC_TAG:=latest}
export DOCKER_MYSQL_GOLANG_CLIENT_TAG=${DOCKER_MYSQL_GOLANG_CLIENT_TAG:=latest}
export DOCKER_MYSQL_JAVA_CLIENT_TAG=${DOCKER_MYSQL_JAVA_CLIENT_TAG:=latest}
export DOCKER_MYSQL_JS_CLIENT_TAG=${DOCKER_MYSQL_JS_CLIENT_TAG:=latest}
export DOCKER_MYSQL_PHP_CLIENT_TAG=${DOCKER_MYSQL_PHP_CLIENT_TAG:=latest}
export DOCKER_NGINX_DAV_TAG=${DOCKER_NGINX_DAV_TAG:=latest}
export DOCKER_POSTGRESQL_JAVA_CLIENT_TAG=${DOCKER_POSTGRESQL_JAVA_CLIENT_TAG:=latest}
export DOCKER_KERBEROS_KDC_TAG=${DOCKER_KERBEROS_KDC_TAG:=latest}
export DOCKER_KERBERIZED_HADOOP_TAG=${DOCKER_KERBERIZED_HADOOP_TAG:=latest}
cd /ClickHouse/tests/integration
exec "$@"

View File

@ -11,7 +11,7 @@ ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
ENV LANG=C.UTF-8
ENV TZ=Europe/Moscow
ENV TZ=Europe/Amsterdam
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN apt-get update \

View File

@ -52,7 +52,7 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \
&& odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \
&& rm -rf /tmp/clickhouse-odbc-tmp
ENV TZ=Europe/Moscow
ENV TZ=Europe/Amsterdam
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
ENV NUM_TRIES=1

View File

@ -233,4 +233,10 @@ rowNumberInAllBlocks()
LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
# But OOMs in stress test are allowed
if rg 'OOM in dmesg|Signal 9' /test_output/check_status.tsv
then
sed -i 's/failure/success/' /test_output/check_status.tsv
fi
collect_core_dumps

View File

@ -18,10 +18,14 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
python3-pip \
shellcheck \
yamllint \
locales \
&& pip3 install black==23.1.0 boto3 codespell==2.2.1 mypy==1.3.0 PyGithub unidiff pylint==2.6.2 \
&& apt-get clean \
&& rm -rf /root/.cache/pip
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
ENV LC_ALL en_US.UTF-8
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH

View File

@ -231,4 +231,10 @@ rowNumberInAllBlocks()
LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
# But OOMs in stress test are allowed
if rg 'OOM in dmesg|Signal 9' /test_output/check_status.tsv
then
sed -i 's/failure/success/' /test_output/check_status.tsv
fi
collect_core_dumps

View File

@ -14,6 +14,20 @@ Supported platforms:
- PowerPC 64 LE (experimental)
- RISC-V 64 (experimental)
## Building in docker
We use the docker image `clickhouse/binary-builder` for our CI builds. It contains everything necessary to build the binary and packages. There is a script `docker/packager/packager` to ease the image usage:
```bash
# define a directory for the output artifacts
output_dir="build_results"
# a simplest build
./docker/packager/packager --package-type=binary --output-dir "$output_dir"
# build debian packages
./docker/packager/packager --package-type=deb --output-dir "$output_dir"
# by default, debian packages use thin LTO, so we can override it to speed up the build
CMAKE_FLAGS='-DENABLE_THINLTO=' ./docker/packager/packager --package-type=deb --output-dir "./$(git rev-parse --show-cdup)/build_results"
```
## Building on Ubuntu
The following tutorial is based on Ubuntu Linux.

View File

@ -35,7 +35,7 @@ The [system.clusters](../../operations/system-tables/clusters.md) system table c
When creating a new replica of the database, this replica creates tables by itself. If the replica has been unavailable for a long time and has lagged behind the replication log — it checks its local metadata with the current metadata in ZooKeeper, moves the extra tables with data to a separate non-replicated database (so as not to accidentally delete anything superfluous), creates the missing tables, updates the table names if they have been renamed. The data is replicated at the `ReplicatedMergeTree` level, i.e. if the table is not replicated, the data will not be replicated (the database is responsible only for metadata).
[`ALTER TABLE ATTACH|FETCH|DROP|DROP DETACHED|DETACH PARTITION|PART`](../../sql-reference/statements/alter/partition.md) queries are allowed but not replicated. The database engine will only add/fetch/remove the partition/part to the current replica. However, if the table itself uses a Replicated table engine, then the data will be replicated after using `ATTACH`.
[`ALTER TABLE FREEZE|ATTACH|FETCH|DROP|DROP DETACHED|DETACH PARTITION|PART`](../../sql-reference/statements/alter/partition.md) queries are allowed but not replicated. The database engine will only add/fetch/remove the partition/part to the current replica. However, if the table itself uses a Replicated table engine, then the data will be replicated after using `ATTACH`.
## Usage Example {#usage-example}

View File

@ -60,6 +60,7 @@ Engines in the family:
- [EmbeddedRocksDB](../../engines/table-engines/integrations/embedded-rocksdb.md)
- [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md)
- [PostgreSQL](../../engines/table-engines/integrations/postgresql.md)
- [S3Queue](../../engines/table-engines/integrations/s3queue.md)
### Special Engines {#special-engines}

View File

@ -0,0 +1,224 @@
---
slug: /en/engines/table-engines/integrations/s3queue
sidebar_position: 7
sidebar_label: S3Queue
---
# S3Queue Table Engine
This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ecosystem and allows streaming import. This engine is similar to the [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) engines, but provides S3-specific features.
## Create Table {#creating-a-table}
``` sql
CREATE TABLE s3_queue_engine_table (name String, value UInt32)
ENGINE = S3Queue(path [, NOSIGN | aws_access_key_id, aws_secret_access_key,] format, [compression])
[SETTINGS]
[mode = 'unordered',]
[after_processing = 'keep',]
[keeper_path = '',]
[s3queue_loading_retries = 0,]
[s3queue_polling_min_timeout_ms = 1000,]
[s3queue_polling_max_timeout_ms = 10000,]
[s3queue_polling_backoff_ms = 0,]
[s3queue_tracked_files_limit = 1000,]
[s3queue_tracked_file_ttl_sec = 0,]
[s3queue_polling_size = 50,]
```
**Engine parameters**
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path).
- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed.
- `format` — The [format](../../../interfaces/formats.md#formats) of the file.
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3).
- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will autodetect compression by file extension.
**Example**
```sql
CREATE TABLE s3queue_engine_table (name String, value UInt32)
ENGINE=S3Queue('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/*', 'CSV', 'gzip')
SETTINGS
mode = 'ordred';
```
Using named collections:
``` xml
<clickhouse>
<named_collections>
<s3queue_conf>
<url>'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/*</url>
<access_key_id>test<access_key_id>
<secret_access_key>test</secret_access_key>
</s3queue_conf>
</named_collections>
</clickhouse>
```
```sql
CREATE TABLE s3queue_engine_table (name String, value UInt32)
ENGINE=S3Queue(s3queue_conf, format = 'CSV', compression_method = 'gzip')
SETTINGS
mode = 'ordred';
```
## Settings {#s3queue-settings}
### mode {#mode}
Possible values:
- unordered — With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKeeper.
- ordered — With ordered mode, only the max name of the successfully consumed file, and the names of files that will be retried after unsuccessful loading attempt are being stored in ZooKeeper.
Default value: `unordered`.
### after_processing {#after_processing}
Delete or keep file after successful processing.
Possible values:
- keep.
- delete.
Default value: `keep`.
### keeper_path {#keeper_path}
The path in ZooKeeper can be specified as a table engine setting or default path can be formed from the global configuration-provided path and table UUID.
Possible values:
- String.
Default value: `/`.
### s3queue_loading_retries {#s3queue_loading_retries}
Retry file loading up to specified number of times. By default, there are no retries.
Possible values:
- Positive integer.
Default value: `0`.
### s3queue_polling_min_timeout_ms {#s3queue_polling_min_timeout_ms}
Minimal timeout before next polling (in milliseconds).
Possible values:
- Positive integer.
Default value: `1000`.
### s3queue_polling_max_timeout_ms {#s3queue_polling_max_timeout_ms}
Maximum timeout before next polling (in milliseconds).
Possible values:
- Positive integer.
Default value: `10000`.
### s3queue_polling_backoff_ms {#s3queue_polling_backoff_ms}
Polling backoff (in milliseconds).
Possible values:
- Positive integer.
Default value: `0`.
### s3queue_tracked_files_limit {#s3queue_tracked_files_limit}
Allows to limit the number of Zookeeper nodes if the 'unordered' mode is used, does nothing for 'ordered' mode.
If limit reached the oldest processed files will be deleted from ZooKeeper node and processed again.
Possible values:
- Positive integer.
Default value: `1000`.
### s3queue_tracked_file_ttl_sec {#s3queue_tracked_file_ttl_sec}
Maximum number of seconds to store processed files in ZooKeeper node (store forever by default) for 'unordered' mode, does nothing for 'ordered' mode.
After the specified number of seconds, the file will be re-imported.
Possible values:
- Positive integer.
Default value: `0`.
### s3queue_polling_size {#s3queue_polling_size}
Maximum files to fetch from S3 with SELECT or in background task.
Engine takes files for processing from S3 in batches.
We limit the batch size to increase concurrency if multiple table engines with the same `keeper_path` consume files from the same path.
Possible values:
- Positive integer.
Default value: `50`.
## S3-related Settings {#s3-settings}
Engine supports all s3 related settings. For more information about S3 settings see [here](../../../engines/table-engines/integrations/s3.md).
## Description {#description}
`SELECT` is not particularly useful for streaming import (except for debugging), because each file can be imported only once. It is more practical to create real-time threads using [materialized views](../../../sql-reference/statements/create/view.md). To do this:
1. Use the engine to create a table for consuming from specified path in S3 and consider it a data stream.
2. Create a table with the desired structure.
3. Create a materialized view that converts data from the engine and puts it into a previously created table.
When the `MATERIALIZED VIEW` joins the engine, it starts collecting data in the background.
Example:
``` sql
CREATE TABLE s3queue_engine_table (name String, value UInt32)
ENGINE=S3Queue('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/*', 'CSV', 'gzip')
SETTINGS
mode = 'unordred',
keeper_path = '/clickhouse/s3queue/';
CREATE TABLE stats (name String, value UInt32)
ENGINE = MergeTree() ORDER BY name;
CREATE MATERIALIZED VIEW consumer TO stats
AS SELECT name, value FROM s3queue_engine_table;
SELECT * FROM stats ORDER BY name;
```
## Virtual columns {#virtual-columns}
- `_path` — Path to the file.
- `_file` — Name of the file.
For more information about virtual columns see [here](../../../engines/table-engines/index.md#table_engines-virtual_columns).
## Wildcards In Path {#wildcards-in-path}
`path` argument can specify multiple files using bash-like wildcards. For being processed file should exist and match to the whole path pattern. Listing of files is determined during `SELECT` (not at `CREATE` moment).
- `*` — Substitutes any number of any characters except `/` including empty string.
- `?` — Substitutes any single character.
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
- `{N..M}` — Substitutes any number in range from N to M including both borders. N and M can have leading zeroes e.g. `000..078`.
Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function.
:::note
If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
:::

View File

@ -106,4 +106,4 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da
## Storage Settings {#storage-settings}
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
- [disable_url_encoding](/docs/en/operations/settings/settings.md#disable_url_encoding) -allows to disable decoding/encoding path in uri. Disabled by default.
- [enable_url_encoding](/docs/en/operations/settings/settings.md#enable_url_encoding) - allows to enable/disable decoding/encoding path in uri. Enabled by default.

View File

@ -1723,6 +1723,34 @@ You can select data from a ClickHouse table and save them into some file in the
``` bash
$ clickhouse-client --query = "SELECT * FROM test.hits FORMAT CapnProto SETTINGS format_schema = 'schema:Message'"
```
### Using autogenerated schema {#using-autogenerated-capn-proto-schema}
If you don't have an external CapnProto schema for your data, you can still output/input data in CapnProto format using autogenerated schema.
For example:
```sql
SELECT * FROM test.hits format CapnProto SETTINGS format_capn_proto_use_autogenerated_schema=1
```
In this case ClickHouse will autogenerate CapnProto schema according to the table structure using function [structureToCapnProtoSchema](../sql-reference/functions/other-functions.md#structure_to_capn_proto_schema) and will use this schema to serialize data in CapnProto format.
You can also read CapnProto file with autogenerated schema (in this case the file must be created using the same schema):
```bash
$ cat hits.bin | clickhouse-client --query "INSERT INTO test.hits SETTINGS format_capn_proto_use_autogenerated_schema=1 FORMAT CapnProto"
```
The setting [format_capn_proto_use_autogenerated_schema](../operations/settings/settings-formats.md#format_capn_proto_use_autogenerated_schema) is enabled by default and applies if [format_schema](../operations/settings/settings-formats.md#formatschema-format-schema) is not set.
You can also save autogenerated schema in the file during input/output using setting [output_format_schema](../operations/settings/settings-formats.md#outputformatschema-output-format-schema). For example:
```sql
SELECT * FROM test.hits format CapnProto SETTINGS format_capn_proto_use_autogenerated_schema=1, output_format_schema='path/to/schema/schema.capnp'
```
In this case autogenerated CapnProto schema will be saved in file `path/to/schema/schema.capnp`.
## Prometheus {#prometheus}
Expose metrics in [Prometheus text-based exposition format](https://prometheus.io/docs/instrumenting/exposition_formats/#text-based-format).
@ -1861,6 +1889,33 @@ ClickHouse inputs and outputs protobuf messages in the `length-delimited` format
It means before every message should be written its length as a [varint](https://developers.google.com/protocol-buffers/docs/encoding#varints).
See also [how to read/write length-delimited protobuf messages in popular languages](https://cwiki.apache.org/confluence/display/GEODE/Delimiting+Protobuf+Messages).
### Using autogenerated schema {#using-autogenerated-protobuf-schema}
If you don't have an external Protobuf schema for your data, you can still output/input data in Protobuf format using autogenerated schema.
For example:
```sql
SELECT * FROM test.hits format Protobuf SETTINGS format_protobuf_use_autogenerated_schema=1
```
In this case ClickHouse will autogenerate Protobuf schema according to the table structure using function [structureToProtobufSchema](../sql-reference/functions/other-functions.md#structure_to_protobuf_schema) and will use this schema to serialize data in Protobuf format.
You can also read Protobuf file with autogenerated schema (in this case the file must be created using the same schema):
```bash
$ cat hits.bin | clickhouse-client --query "INSERT INTO test.hits SETTINGS format_protobuf_use_autogenerated_schema=1 FORMAT Protobuf"
```
The setting [format_protobuf_use_autogenerated_schema](../operations/settings/settings-formats.md#format_protobuf_use_autogenerated_schema) is enabled by default and applies if [format_schema](../operations/settings/settings-formats.md#formatschema-format-schema) is not set.
You can also save autogenerated schema in the file during input/output using setting [output_format_schema](../operations/settings/settings-formats.md#outputformatschema-output-format-schema). For example:
```sql
SELECT * FROM test.hits format Protobuf SETTINGS format_protobuf_use_autogenerated_schema=1, output_format_schema='path/to/schema/schema.proto'
```
In this case autogenerated Protobuf schema will be saved in file `path/to/schema/schema.capnp`.
## ProtobufSingle {#protobufsingle}
Same as [Protobuf](#protobuf) but for storing/parsing single Protobuf message without length delimiters.

View File

@ -84,6 +84,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des
- `password` for the file on disk
- `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')`
- `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables
- `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD`
### Usage examples

View File

@ -0,0 +1,26 @@
---
slug: /en/operations/optimizing-performance/profile-guided-optimization
sidebar_position: 54
sidebar_label: Profile Guided Optimization (PGO)
---
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
# Profile Guided Optimization
Profile-Guided Optimization (PGO) is a compiler optimization technique where a program is optimized based on the runtime profile.
According to the tests, PGO helps with achieving better performance for ClickHouse. According to the tests, we see improvements up to 15% in QPS on the ClickBench test suite. The more detailed results are available [here](https://pastebin.com/xbue3HMU). The performance benefits depend on your typical workload - you can get better or worse results.
More information about PGO in ClickHouse you can read in the corresponding GitHub [issue](https://github.com/ClickHouse/ClickHouse/issues/44567).
## How to build ClickHouse with PGO?
There are two major kinds of PGO: [Instrumentation](https://clang.llvm.org/docs/UsersManual.html#using-sampling-profilers) and [Sampling](https://clang.llvm.org/docs/UsersManual.html#using-sampling-profilers) (also known as AutoFDO). In this guide is described the Instrumentation PGO with ClickHouse.
1. Build ClickHouse in Instrumented mode. In Clang it can be done via passing `-fprofile-instr-generate` option to `CXXFLAGS`.
2. Run instrumented ClickHouse on a sample workload. Here you need to use your usual workload. One of the approaches could be using [ClickBench](https://github.com/ClickHouse/ClickBench) as a sample workload. ClickHouse in the instrumentation mode could work slowly so be ready for that and do not run instrumented ClickHouse in performance-critical environments.
3. Recompile ClickHouse once again with `-fprofile-instr-use` compiler flags and profiles that are collected from the previous step.
A more detailed guide on how to apply PGO is in the Clang [documentation](https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization).
If you are going to collect a sample workload directly from a production environment, we recommend trying to use Sampling PGO.

View File

@ -2288,6 +2288,8 @@ This section contains the following parameters:
- `session_timeout_ms` — Maximum timeout for the client session in milliseconds.
- `operation_timeout_ms` — Maximum timeout for one operation in milliseconds.
- `root` — The [znode](http://zookeeper.apache.org/doc/r3.5.5/zookeeperOver.html#Nodes+and+ephemeral+nodes) that is used as the root for znodes used by the ClickHouse server. Optional.
- `fallback_session_lifetime.min` - If the first zookeeper host resolved by zookeeper_load_balancing strategy is unavailable, limit the lifetime of a zookeeper session to the fallback node. This is done for load-balancing purposes to avoid excessive load on one of zookeeper hosts. This setting sets the minimal duration of the fallback session. Set in seconds. Optional. Default is 3 hours.
- `fallback_session_lifetime.max` - If the first zookeeper host resolved by zookeeper_load_balancing strategy is unavailable, limit the lifetime of a zookeeper session to the fallback node. This is done for load-balancing purposes to avoid excessive load on one of zookeeper hosts. This setting sets the maximum duration of the fallback session. Set in seconds. Optional. Default is 6 hours.
- `identity` — User and password, that can be required by ZooKeeper to give access to requested znodes. Optional.
- zookeeper_load_balancing - Specifies the algorithm of ZooKeeper node selection.
* random - randomly selects one of ZooKeeper nodes.

View File

@ -327,3 +327,39 @@ The maximum amount of data consumed by temporary files on disk in bytes for all
Zero means unlimited.
Default value: 0.
## max_sessions_for_user {#max-sessions-per-user}
Maximum number of simultaneous sessions per authenticated user to the ClickHouse server.
Example:
``` xml
<profiles>
<single_session_profile>
<max_sessions_for_user>1</max_sessions_for_user>
</single_session_profile>
<two_sessions_profile>
<max_sessions_for_user>2</max_sessions_for_user>
</two_sessions_profile>
<unlimited_sessions_profile>
<max_sessions_for_user>0</max_sessions_for_user>
</unlimited_sessions_profile>
</profiles>
<users>
<!-- User Alice can connect to a ClickHouse server no more than once at a time. -->
<Alice>
<profile>single_session_user</profile>
</Alice>
<!-- User Bob can use 2 simultaneous sessions. -->
<Bob>
<profile>two_sessions_profile</profile>
</Bob>
<!-- User Charles can use arbitrarily many of simultaneous sessions. -->
<Charles>
<profile>unlimited_sessions_profile</profile>
</Charles>
</users>
```
Default value: 0 (Infinite count of simultaneous sessions).

View File

@ -321,6 +321,10 @@ If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` ar
This parameter is useful when you are using formats that require a schema definition, such as [Capn Proto](https://capnproto.org/) or [Protobuf](https://developers.google.com/protocol-buffers/). The value depends on the format.
## output_format_schema {#output-format-schema}
The path to the file where the automatically generated schema will be saved in [Capn Proto](../../interfaces/formats.md#capnproto-capnproto) or [Protobuf](../../interfaces/formats.md#protobuf-protobuf) formats.
## output_format_enable_streaming {#output_format_enable_streaming}
Enable streaming in output formats that support it.
@ -1330,6 +1334,11 @@ When serializing Nullable columns with Google wrappers, serialize default values
Disabled by default.
### format_protobuf_use_autogenerated_schema {#format_capn_proto_use_autogenerated_schema}
Use autogenerated Protobuf schema when [format_schema](#formatschema-format-schema) is not set.
The schema is generated from ClickHouse table structure using function [structureToProtobufSchema](../../sql-reference/functions/other-functions.md#structure_to_protobuf_schema)
## Avro format settings {#avro-format-settings}
### input_format_avro_allow_missing_fields {#input_format_avro_allow_missing_fields}
@ -1626,6 +1635,11 @@ Possible values:
Default value: `'by_values'`.
### format_capn_proto_use_autogenerated_schema {#format_capn_proto_use_autogenerated_schema}
Use autogenerated CapnProto schema when [format_schema](#formatschema-format-schema) is not set.
The schema is generated from ClickHouse table structure using function [structureToCapnProtoSchema](../../sql-reference/functions/other-functions.md#structure_to_capnproto_schema)
## MySQLDump format settings {#musqldump-format-settings}
### input_format_mysql_dump_table_name (#input_format_mysql_dump_table_name)

View File

@ -39,7 +39,7 @@ Example:
<max_threads>8</max_threads>
</default>
<!-- Settings for quries from the user interface -->
<!-- Settings for queries from the user interface -->
<web>
<max_rows_to_read>1000000000</max_rows_to_read>
<max_bytes_to_read>100000000000</max_bytes_to_read>
@ -67,6 +67,8 @@ Example:
<max_ast_depth>50</max_ast_depth>
<max_ast_elements>100</max_ast_elements>
<max_sessions_for_user>4</max_sessions_for_user>
<readonly>1</readonly>
</web>
</profiles>

View File

@ -3468,11 +3468,11 @@ Possible values:
Default value: `0`.
## disable_url_encoding {#disable_url_encoding}
## enable_url_encoding {#enable_url_encoding}
Allows to disable decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables.
Allows to enable/disable decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables.
Disabled by default.
Enabled by default.
## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously}
@ -4578,3 +4578,28 @@ Type: Int64
Default: 0
## precise_float_parsing {#precise_float_parsing}
Switches [Float32/Float64](../../sql-reference/data-types/float.md) parsing algorithms:
* If the value is `1`, then precise method is used. It is slower than fast method, but it always returns a number that is the closest machine representable number to the input.
* Otherwise, fast method is used (default). It usually returns the same value as precise, but in rare cases result may differ by one or two least significant digits.
Possible values: `0`, `1`.
Default value: `0`.
Example:
```sql
SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 0;
┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐
│ 1.7090999999999998 │ 15008753.000000002 │
└─────────────────────┴──────────────────────────┘
SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 1;
┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐
│ 1.7091 │ 15008753 │
└─────────────────────┴──────────────────────────┘
```

View File

@ -10,6 +10,7 @@ Columns:
- `event` ([String](../../sql-reference/data-types/string.md)) — Event name.
- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of events occurred.
- `description` ([String](../../sql-reference/data-types/string.md)) — Event description.
- `name` ([String](../../sql-reference/data-types/string.md)) — Alias for `event`.
You can find all supported events in source file [src/Common/ProfileEvents.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/ProfileEvents.cpp).

View File

@ -10,6 +10,7 @@ Columns:
- `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name.
- `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — Metric value.
- `description` ([String](../../sql-reference/data-types/string.md)) — Metric description.
- `name` ([String](../../sql-reference/data-types/string.md)) — Alias for `metric`.
You can find all supported metrics in source file [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp).

View File

@ -48,7 +48,7 @@ Columns:
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number of rows read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_rows` includes the total number of rows read at all replicas. Each replica sends its `read_rows` value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value.
- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number of bytes read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_bytes` includes the total number of rows read at all replicas. Each replica sends its `read_bytes` value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value.
- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0.
- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0.
- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes (uncompressed). For other queries, the column value is 0.
- `result_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of rows in a result of the `SELECT` query, or a number of rows in the `INSERT` query.
- `result_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — RAM volume in bytes used to store a query result.
- `memory_usage` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Memory consumption by the query.

View File

@ -51,3 +51,7 @@ keeper foo bar
- `rmr <path>` -- Recursively deletes path. Confirmation required
- `flwc <command>` -- Executes four-letter-word command
- `help` -- Prints this message
- `get_stat [path]` -- Returns the node's stat (default `.`)
- `find_super_nodes <threshold> [path]` -- Finds nodes with number of children larger than some threshold for the given path (default `.`)
- `delete_stable_backups` -- Deletes ClickHouse nodes used for backups that are now inactive
- `find_big_family [path] [n]` -- Returns the top n nodes with the biggest family in the subtree (default path = `.` and n = 10)

View File

@ -140,8 +140,8 @@ Time shifts for multiple days. Some pacific islands changed their timezone offse
- [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md)
- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md)
- [Functions for working with arrays](../../sql-reference/functions/array-functions.md)
- [The `date_time_input_format` setting](../../operations/settings/settings.md#settings-date_time_input_format)
- [The `date_time_output_format` setting](../../operations/settings/settings.md#settings-date_time_output_format)
- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#settings-date_time_input_format)
- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#settings-date_time_output_format)
- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
- [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone)
- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime)

View File

@ -2552,3 +2552,187 @@ Result:
This function can be used together with [generateRandom](../../sql-reference/table-functions/generate.md) to generate completely random tables.
## structureToCapnProtoSchema {#structure_to_capn_proto_schema}
Converts ClickHouse table structure to CapnProto schema.
**Syntax**
``` sql
structureToCapnProtoSchema(structure)
```
**Arguments**
- `structure` — Table structure in a format `column1_name column1_type, column2_name column2_type, ...`.
- `root_struct_name` — Name for root struct in CapnProto schema. Default value - `Message`;
**Returned value**
- CapnProto schema
Type: [String](../../sql-reference/data-types/string.md).
**Examples**
Query:
``` sql
SELECT structureToCapnProtoSchema('column1 String, column2 UInt32, column3 Array(String)') FORMAT RawBLOB
```
Result:
``` text
@0xf96402dd754d0eb7;
struct Message
{
column1 @0 : Data;
column2 @1 : UInt32;
column3 @2 : List(Data);
}
```
Query:
``` sql
SELECT structureToCapnProtoSchema('column1 Nullable(String), column2 Tuple(element1 UInt32, element2 Array(String)), column3 Map(String, String)') FORMAT RawBLOB
```
Result:
``` text
@0xd1c8320fecad2b7f;
struct Message
{
struct Column1
{
union
{
value @0 : Data;
null @1 : Void;
}
}
column1 @0 : Column1;
struct Column2
{
element1 @0 : UInt32;
element2 @1 : List(Data);
}
column2 @1 : Column2;
struct Column3
{
struct Entry
{
key @0 : Data;
value @1 : Data;
}
entries @0 : List(Entry);
}
column3 @2 : Column3;
}
```
Query:
``` sql
SELECT structureToCapnProtoSchema('column1 String, column2 UInt32', 'Root') FORMAT RawBLOB
```
Result:
``` text
@0x96ab2d4ab133c6e1;
struct Root
{
column1 @0 : Data;
column2 @1 : UInt32;
}
```
## structureToProtobufSchema {#structure_to_protobuf_schema}
Converts ClickHouse table structure to Protobuf schema.
**Syntax**
``` sql
structureToProtobufSchema(structure)
```
**Arguments**
- `structure` — Table structure in a format `column1_name column1_type, column2_name column2_type, ...`.
- `root_message_name` — Name for root message in Protobuf schema. Default value - `Message`;
**Returned value**
- Protobuf schema
Type: [String](../../sql-reference/data-types/string.md).
**Examples**
Query:
``` sql
SELECT structureToProtobufSchema('column1 String, column2 UInt32, column3 Array(String)') FORMAT RawBLOB
```
Result:
``` text
syntax = "proto3";
message Message
{
bytes column1 = 1;
uint32 column2 = 2;
repeated bytes column3 = 3;
}
```
Query:
``` sql
SELECT structureToProtobufSchema('column1 Nullable(String), column2 Tuple(element1 UInt32, element2 Array(String)), column3 Map(String, String)') FORMAT RawBLOB
```
Result:
``` text
syntax = "proto3";
message Message
{
bytes column1 = 1;
message Column2
{
uint32 element1 = 1;
repeated bytes element2 = 2;
}
Column2 column2 = 2;
map<string, bytes> column3 = 3;
}
```
Query:
``` sql
SELECT structureToProtobufSchema('column1 String, column2 UInt32', 'Root') FORMAT RawBLOB
```
Result:
``` text
syntax = "proto3";
message Root
{
bytes column1 = 1;
uint32 column2 = 2;
}
```

View File

@ -56,7 +56,7 @@ Character `|` inside patterns is used to specify failover addresses. They are it
## Storage Settings {#storage-settings}
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
- [disable_url_encoding](/docs/en/operations/settings/settings.md#disable_url_encoding) - allows to disable decoding/encoding path in uri. Disabled by default.
- [enable_url_encoding](/docs/en/operations/settings/settings.md#enable_url_encoding) - allows to enable/disable decoding/encoding path in uri. Enabled by default.
**See Also**

View File

@ -314,3 +314,40 @@ FORMAT Null;
При вставке данных, ClickHouse вычисляет количество партиций во вставленном блоке. Если число партиций больше, чем `max_partitions_per_insert_block`, ClickHouse генерирует исключение со следующим текстом:
> «Too many partitions for single INSERT block (more than» + toString(max_parts) + «). The limit is controlled by max_partitions_per_insert_block setting. Large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).»
## max_sessions_for_user {#max-sessions-per-user}
Максимальное количество одновременных сессий на одного аутентифицированного пользователя.
Пример:
``` xml
<profiles>
<single_session_profile>
<max_sessions_for_user>1</max_sessions_for_user>
</single_session_profile>
<two_sessions_profile>
<max_sessions_for_user>2</max_sessions_for_user>
</two_sessions_profile>
<unlimited_sessions_profile>
<max_sessions_for_user>0</max_sessions_for_user>
</unlimited_sessions_profile>
</profiles>
<users>
<!-- Пользователь Alice может одновременно подключаться не
более одного раза к серверу ClickHouse. -->
<Alice>
<profile>single_session_profile</profile>
</Alice>
<!-- Пользователь Bob может использовать 2 одновременных сессии. -->
<Bob>
<profile>two_sessions_profile</profile>
</Bob>
<!-- Пользователь Charles может иметь любое количество одновременных сессий. -->
<Charles>
<profile>unlimited_sessions_profile</profile>
</Charles>
</users>
```
Значение по умолчанию: 0 (неограниченное количество сессий).

View File

@ -39,7 +39,7 @@ SET profile = 'web'
<max_threads>8</max_threads>
</default>
<!-- Settings for quries from the user interface -->
<!-- Settings for queries from the user interface -->
<web>
<max_rows_to_read>1000000000</max_rows_to_read>
<max_bytes_to_read>100000000000</max_bytes_to_read>
@ -67,6 +67,7 @@ SET profile = 'web'
<max_ast_depth>50</max_ast_depth>
<max_ast_elements>100</max_ast_elements>
<max_sessions_for_user>4</max_sessions_for_user>
<readonly>1</readonly>
</web>
</profiles>

View File

@ -4213,3 +4213,29 @@ SELECT *, timezone() FROM test_tz WHERE d = '2000-01-01 00:00:00' SETTINGS sessi
- Запрос: `SELECT * FROM file('sample.csv')`
Если чтение и обработка `sample.csv` прошли успешно, файл будет переименован в `processed_sample_1683473210851438.csv`.
## precise_float_parsing {#precise_float_parsing}
Позволяет выбрать алгоритм, используемый при парсинге [Float32/Float64](../../sql-reference/data-types/float.md):
* Если установлено значение `1`, то используется точный метод. Он более медленный, но всегда возвращает число, наиболее близкое к входному значению.
* В противном случае используется быстрый метод (поведение по умолчанию). Обычно результат его работы совпадает с результатом, полученным точным методом, однако в редких случаях он может отличаться на 1 или 2 наименее значимых цифры.
Возможные значения: `0`, `1`.
Значение по умолчанию: `0`.
Пример:
```sql
SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 0;
┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐
│ 1.7090999999999998 │ 15008753.000000002 │
└─────────────────────┴──────────────────────────┘
SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 1;
┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐
│ 1.7091 │ 15008753 │
└─────────────────────┴──────────────────────────┘
```

View File

@ -55,6 +55,9 @@ contents:
- src: clickhouse
dst: /usr/bin/clickhouse-keeper
type: symlink
- src: clickhouse
dst: /usr/bin/clickhouse-keeper-client
type: symlink
- src: root/usr/bin/clickhouse-report
dst: /usr/bin/clickhouse-report
- src: root/usr/bin/clickhouse-server

View File

@ -1,5 +1,6 @@
#include "Commands.h"
#include <queue>
#include "KeeperClient.h"
@ -24,8 +25,18 @@ void LSCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con
else
path = client->cwd;
for (const auto & child : client->zookeeper->getChildren(path))
std::cout << child << " ";
auto children = client->zookeeper->getChildren(path);
std::sort(children.begin(), children.end());
bool need_space = false;
for (const auto & child : children)
{
if (std::exchange(need_space, true))
std::cout << " ";
std::cout << child;
}
std::cout << "\n";
}
@ -130,6 +141,173 @@ void GetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co
std::cout << client->zookeeper->get(client->getAbsolutePath(query->args[0].safeGet<String>())) << "\n";
}
bool GetStatCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
return true;
node->args.push_back(std::move(arg));
return true;
}
void GetStatCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
Coordination::Stat stat;
String path;
if (!query->args.empty())
path = client->getAbsolutePath(query->args[0].safeGet<String>());
else
path = client->cwd;
client->zookeeper->get(path, &stat);
std::cout << "cZxid = " << stat.czxid << "\n";
std::cout << "mZxid = " << stat.mzxid << "\n";
std::cout << "pZxid = " << stat.pzxid << "\n";
std::cout << "ctime = " << stat.ctime << "\n";
std::cout << "mtime = " << stat.mtime << "\n";
std::cout << "version = " << stat.version << "\n";
std::cout << "cversion = " << stat.cversion << "\n";
std::cout << "aversion = " << stat.aversion << "\n";
std::cout << "ephemeralOwner = " << stat.ephemeralOwner << "\n";
std::cout << "dataLength = " << stat.dataLength << "\n";
std::cout << "numChildren = " << stat.numChildren << "\n";
}
bool FindSuperNodes::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
ASTPtr threshold;
if (!ParserUnsignedInteger{}.parse(pos, threshold, expected))
return false;
node->args.push_back(threshold->as<ASTLiteral &>().value);
String path;
if (!parseKeeperPath(pos, expected, path))
path = ".";
node->args.push_back(std::move(path));
return true;
}
void FindSuperNodes::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
auto threshold = query->args[0].safeGet<UInt64>();
auto path = client->getAbsolutePath(query->args[1].safeGet<String>());
Coordination::Stat stat;
client->zookeeper->get(path, &stat);
if (stat.numChildren >= static_cast<Int32>(threshold))
{
std::cout << static_cast<String>(path) << "\t" << stat.numChildren << "\n";
return;
}
auto children = client->zookeeper->getChildren(path);
std::sort(children.begin(), children.end());
for (const auto & child : children)
{
auto next_query = *query;
next_query.args[1] = DB::Field(path / child);
execute(&next_query, client);
}
}
bool DeleteStableBackups::parse(IParser::Pos & /* pos */, std::shared_ptr<ASTKeeperQuery> & /* node */, Expected & /* expected */) const
{
return true;
}
void DeleteStableBackups::execute(const ASTKeeperQuery * /* query */, KeeperClient * client) const
{
client->askConfirmation(
"You are going to delete all inactive backups in /clickhouse/backups.",
[client]
{
fs::path backup_root = "/clickhouse/backups";
auto backups = client->zookeeper->getChildren(backup_root);
std::sort(backups.begin(), backups.end());
for (const auto & child : backups)
{
auto backup_path = backup_root / child;
std::cout << "Found backup " << backup_path << ", checking if it's active\n";
String stage_path = backup_path / "stage";
auto stages = client->zookeeper->getChildren(stage_path);
bool is_active = false;
for (const auto & stage : stages)
{
if (startsWith(stage, "alive"))
{
is_active = true;
break;
}
}
if (is_active)
{
std::cout << "Backup " << backup_path << " is active, not going to delete\n";
continue;
}
std::cout << "Backup " << backup_path << " is not active, deleting it\n";
client->zookeeper->removeRecursive(backup_path);
}
});
}
bool FindBigFamily::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String path;
if (!parseKeeperPath(pos, expected, path))
path = ".";
node->args.push_back(std::move(path));
ASTPtr count;
if (ParserUnsignedInteger{}.parse(pos, count, expected))
node->args.push_back(count->as<ASTLiteral &>().value);
else
node->args.push_back(UInt64(10));
return true;
}
void FindBigFamily::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
auto path = client->getAbsolutePath(query->args[0].safeGet<String>());
auto n = query->args[1].safeGet<UInt64>();
std::vector<std::tuple<Int32, String>> result;
std::queue<fs::path> queue;
queue.push(path);
while (!queue.empty())
{
auto next_path = queue.front();
queue.pop();
auto children = client->zookeeper->getChildren(next_path);
std::transform(children.cbegin(), children.cend(), children.begin(), [&](const String & child) { return next_path / child; });
auto response = client->zookeeper->get(children);
for (size_t i = 0; i < response.size(); ++i)
{
result.emplace_back(response[i].stat.numChildren, children[i]);
queue.push(children[i]);
}
}
std::sort(result.begin(), result.end(), std::greater());
for (UInt64 i = 0; i < std::min(result.size(), static_cast<size_t>(n)); ++i)
std::cout << std::get<1>(result[i]) << "\t" << std::get<0>(result[i]) << "\n";
}
bool RMCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
@ -170,7 +348,7 @@ bool HelpCommand::parse(IParser::Pos & /* pos */, std::shared_ptr<ASTKeeperQuery
void HelpCommand::execute(const ASTKeeperQuery * /* query */, KeeperClient * /* client */) const
{
for (const auto & pair : KeeperClient::commands)
std::cout << pair.second->getHelpMessage() << "\n";
std::cout << pair.second->generateHelpString() << "\n";
}
bool FourLetterWordCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const

View File

@ -21,6 +21,12 @@ public:
virtual String getName() const = 0;
virtual ~IKeeperClientCommand() = default;
String generateHelpString() const
{
return fmt::vformat(getHelpMessage(), fmt::make_format_args(getName()));
}
};
using Command = std::shared_ptr<IKeeperClientCommand>;
@ -34,7 +40,7 @@ class LSCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "ls [path] -- Lists the nodes for the given path (default: cwd)"; }
String getHelpMessage() const override { return "{} [path] -- Lists the nodes for the given path (default: cwd)"; }
};
class CDCommand : public IKeeperClientCommand
@ -45,7 +51,7 @@ class CDCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "cd [path] -- Change the working path (default `.`)"; }
String getHelpMessage() const override { return "{} [path] -- Change the working path (default `.`)"; }
};
class SetCommand : public IKeeperClientCommand
@ -58,7 +64,7 @@ class SetCommand : public IKeeperClientCommand
String getHelpMessage() const override
{
return "set <path> <value> [version] -- Updates the node's value. Only update if version matches (default: -1)";
return "{} <path> <value> [version] -- Updates the node's value. Only update if version matches (default: -1)";
}
};
@ -70,7 +76,7 @@ class CreateCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "create <path> <value> -- Creates new node"; }
String getHelpMessage() const override { return "{} <path> <value> -- Creates new node"; }
};
class GetCommand : public IKeeperClientCommand
@ -81,9 +87,63 @@ class GetCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "get <path> -- Returns the node's value"; }
String getHelpMessage() const override { return "{} <path> -- Returns the node's value"; }
};
class GetStatCommand : public IKeeperClientCommand
{
String getName() const override { return "get_stat"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "{} [path] -- Returns the node's stat (default `.`)"; }
};
class FindSuperNodes : public IKeeperClientCommand
{
String getName() const override { return "find_super_nodes"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override
{
return "{} <threshold> [path] -- Finds nodes with number of children larger than some threshold for the given path (default `.`)";
}
};
class DeleteStableBackups : public IKeeperClientCommand
{
String getName() const override { return "delete_stable_backups"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override
{
return "{} -- Deletes ClickHouse nodes used for backups that are now inactive";
}
};
class FindBigFamily : public IKeeperClientCommand
{
String getName() const override { return "find_big_family"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override
{
return "{} [path] [n] -- Returns the top n nodes with the biggest family in the subtree (default path = `.` and n = 10)";
}
};
class RMCommand : public IKeeperClientCommand
{
String getName() const override { return "rm"; }
@ -92,7 +152,7 @@ class RMCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "remove <path> -- Remove the node"; }
String getHelpMessage() const override { return "{} <path> -- Remove the node"; }
};
class RMRCommand : public IKeeperClientCommand
@ -103,7 +163,7 @@ class RMRCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "rmr <path> -- Recursively deletes path. Confirmation required"; }
String getHelpMessage() const override { return "{} <path> -- Recursively deletes path. Confirmation required"; }
};
class HelpCommand : public IKeeperClientCommand
@ -114,7 +174,7 @@ class HelpCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "help -- Prints this message"; }
String getHelpMessage() const override { return "{} -- Prints this message"; }
};
class FourLetterWordCommand : public IKeeperClientCommand
@ -125,7 +185,7 @@ class FourLetterWordCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "flwc <command> -- Executes four-letter-word command"; }
String getHelpMessage() const override { return "{} <command> -- Executes four-letter-word command"; }
};
}

View File

@ -177,6 +177,10 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */)
std::make_shared<SetCommand>(),
std::make_shared<CreateCommand>(),
std::make_shared<GetCommand>(),
std::make_shared<GetStatCommand>(),
std::make_shared<FindSuperNodes>(),
std::make_shared<DeleteStableBackups>(),
std::make_shared<FindBigFamily>(),
std::make_shared<RMCommand>(),
std::make_shared<RMRCommand>(),
std::make_shared<HelpCommand>(),

View File

@ -58,6 +58,7 @@ bool KeeperParser::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
return false;
String command_name(pos->begin, pos->end);
std::transform(command_name.begin(), command_name.end(), command_name.begin(), [](unsigned char c) { return std::tolower(c); });
Command command;
auto iter = KeeperClient::commands.find(command_name);

View File

@ -13,10 +13,6 @@ set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES
library-bridge.cpp
)
if (OS_LINUX)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic")
endif ()
clickhouse_add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES})
target_link_libraries(clickhouse-library-bridge PRIVATE

View File

@ -15,12 +15,6 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES
validateODBCConnectionString.cpp
)
if (OS_LINUX)
# clickhouse-odbc-bridge is always a separate binary.
# Reason: it must not export symbols from SSL, mariadb-client, etc. to not break ABI compatibility with ODBC drivers.
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic")
endif ()
clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES})
target_link_libraries(clickhouse-odbc-bridge PRIVATE

View File

@ -1691,17 +1691,26 @@ try
global_context->initializeTraceCollector();
/// Set up server-wide memory profiler (for total memory tracker).
UInt64 total_memory_profiler_step = config().getUInt64("total_memory_profiler_step", 0);
if (total_memory_profiler_step)
if (server_settings.total_memory_profiler_step)
{
total_memory_tracker.setProfilerStep(total_memory_profiler_step);
total_memory_tracker.setProfilerStep(server_settings.total_memory_profiler_step);
}
double total_memory_tracker_sample_probability = config().getDouble("total_memory_tracker_sample_probability", 0);
if (total_memory_tracker_sample_probability > 0.0)
if (server_settings.total_memory_tracker_sample_probability > 0.0)
{
total_memory_tracker.setSampleProbability(total_memory_tracker_sample_probability);
total_memory_tracker.setSampleProbability(server_settings.total_memory_tracker_sample_probability);
}
if (server_settings.total_memory_profiler_sample_min_allocation_size)
{
total_memory_tracker.setSampleMinAllocationSize(server_settings.total_memory_profiler_sample_min_allocation_size);
}
if (server_settings.total_memory_profiler_sample_max_allocation_size)
{
total_memory_tracker.setSampleMaxAllocationSize(server_settings.total_memory_profiler_sample_max_allocation_size);
}
}
#endif
@ -2036,27 +2045,26 @@ void Server::createServers(
for (const auto & protocol : protocols)
{
if (!server_type.shouldStart(ServerType::Type::CUSTOM, protocol))
std::string prefix = "protocols." + protocol + ".";
std::string port_name = prefix + "port";
std::string description {"<undefined> protocol"};
if (config.has(prefix + "description"))
description = config.getString(prefix + "description");
if (!config.has(prefix + "port"))
continue;
if (!server_type.shouldStart(ServerType::Type::CUSTOM, port_name))
continue;
std::vector<std::string> hosts;
if (config.has("protocols." + protocol + ".host"))
hosts.push_back(config.getString("protocols." + protocol + ".host"));
if (config.has(prefix + "host"))
hosts.push_back(config.getString(prefix + "host"));
else
hosts = listen_hosts;
for (const auto & host : hosts)
{
std::string conf_name = "protocols." + protocol;
std::string prefix = conf_name + ".";
if (!config.has(prefix + "port"))
continue;
std::string description {"<undefined> protocol"};
if (config.has(prefix + "description"))
description = config.getString(prefix + "description");
std::string port_name = prefix + "port";
bool is_secure = false;
auto stack = buildProtocolStackFromConfig(config, protocol, http_params, async_metrics, is_secure);

View File

@ -328,9 +328,6 @@ void ContextAccess::setRolesInfo(const std::shared_ptr<const EnabledRolesInfo> &
enabled_row_policies = access_control->getEnabledRowPolicies(*params.user_id, roles_info->enabled_roles);
enabled_quota = access_control->getEnabledQuota(
*params.user_id, user_name, roles_info->enabled_roles, params.address, params.forwarded_address, params.quota_key);
enabled_settings = access_control->getEnabledSettings(
*params.user_id, user->settings, roles_info->enabled_roles, roles_info->settings_from_enabled_roles);
@ -416,19 +413,32 @@ RowPolicyFilterPtr ContextAccess::getRowPolicyFilter(const String & database, co
std::shared_ptr<const EnabledQuota> ContextAccess::getQuota() const
{
std::lock_guard lock{mutex};
if (enabled_quota)
return enabled_quota;
static const auto unlimited_quota = EnabledQuota::getUnlimitedQuota();
return unlimited_quota;
if (!enabled_quota)
{
if (roles_info)
{
enabled_quota = access_control->getEnabledQuota(*params.user_id,
user_name,
roles_info->enabled_roles,
params.address,
params.forwarded_address,
params.quota_key);
}
else
{
static const auto unlimited_quota = EnabledQuota::getUnlimitedQuota();
return unlimited_quota;
}
}
return enabled_quota;
}
std::optional<QuotaUsage> ContextAccess::getQuotaUsage() const
{
std::lock_guard lock{mutex};
if (enabled_quota)
return enabled_quota->getUsage();
return {};
return getQuota()->getUsage();
}

View File

@ -1,4 +1,5 @@
#include <string_view>
#include <unordered_map>
#include <Access/SettingsConstraints.h>
#include <Access/resolveSetting.h>
#include <Access/AccessControl.h>
@ -6,6 +7,7 @@
#include <Storages/MergeTree/MergeTreeSettings.h>
#include <Common/FieldVisitorToString.h>
#include <Common/FieldVisitorsAccurateComparison.h>
#include <Common/SettingSource.h>
#include <IO/WriteHelpers.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <boost/range/algorithm_ext/erase.hpp>
@ -20,6 +22,39 @@ namespace ErrorCodes
extern const int UNKNOWN_SETTING;
}
namespace
{
struct SettingSourceRestrictions
{
constexpr SettingSourceRestrictions() { allowed_sources.set(); }
constexpr SettingSourceRestrictions(std::initializer_list<SettingSource> allowed_sources_)
{
for (auto allowed_source : allowed_sources_)
setSourceAllowed(allowed_source, true);
}
constexpr bool isSourceAllowed(SettingSource source) { return allowed_sources[source]; }
constexpr void setSourceAllowed(SettingSource source, bool allowed) { allowed_sources[source] = allowed; }
std::bitset<SettingSource::COUNT> allowed_sources;
};
const std::unordered_map<std::string_view, SettingSourceRestrictions> SETTINGS_SOURCE_RESTRICTIONS = {
{"max_sessions_for_user", {SettingSource::PROFILE}},
};
SettingSourceRestrictions getSettingSourceRestrictions(std::string_view name)
{
auto settingConstraintIter = SETTINGS_SOURCE_RESTRICTIONS.find(name);
if (settingConstraintIter != SETTINGS_SOURCE_RESTRICTIONS.end())
return settingConstraintIter->second;
else
return SettingSourceRestrictions(); // allows everything
}
}
SettingsConstraints::SettingsConstraints(const AccessControl & access_control_) : access_control(&access_control_)
{
}
@ -98,7 +133,7 @@ void SettingsConstraints::merge(const SettingsConstraints & other)
}
void SettingsConstraints::check(const Settings & current_settings, const SettingsProfileElements & profile_elements) const
void SettingsConstraints::check(const Settings & current_settings, const SettingsProfileElements & profile_elements, SettingSource source) const
{
for (const auto & element : profile_elements)
{
@ -108,19 +143,19 @@ void SettingsConstraints::check(const Settings & current_settings, const Setting
if (element.value)
{
SettingChange value(element.setting_name, *element.value);
check(current_settings, value);
check(current_settings, value, source);
}
if (element.min_value)
{
SettingChange value(element.setting_name, *element.min_value);
check(current_settings, value);
check(current_settings, value, source);
}
if (element.max_value)
{
SettingChange value(element.setting_name, *element.max_value);
check(current_settings, value);
check(current_settings, value, source);
}
SettingConstraintWritability new_value = SettingConstraintWritability::WRITABLE;
@ -142,24 +177,24 @@ void SettingsConstraints::check(const Settings & current_settings, const Setting
}
}
void SettingsConstraints::check(const Settings & current_settings, const SettingChange & change) const
void SettingsConstraints::check(const Settings & current_settings, const SettingChange & change, SettingSource source) const
{
checkImpl(current_settings, const_cast<SettingChange &>(change), THROW_ON_VIOLATION);
checkImpl(current_settings, const_cast<SettingChange &>(change), THROW_ON_VIOLATION, source);
}
void SettingsConstraints::check(const Settings & current_settings, const SettingsChanges & changes) const
void SettingsConstraints::check(const Settings & current_settings, const SettingsChanges & changes, SettingSource source) const
{
for (const auto & change : changes)
check(current_settings, change);
check(current_settings, change, source);
}
void SettingsConstraints::check(const Settings & current_settings, SettingsChanges & changes) const
void SettingsConstraints::check(const Settings & current_settings, SettingsChanges & changes, SettingSource source) const
{
boost::range::remove_erase_if(
changes,
[&](SettingChange & change) -> bool
{
return !checkImpl(current_settings, const_cast<SettingChange &>(change), THROW_ON_VIOLATION);
return !checkImpl(current_settings, const_cast<SettingChange &>(change), THROW_ON_VIOLATION, source);
});
}
@ -174,13 +209,13 @@ void SettingsConstraints::check(const MergeTreeSettings & current_settings, cons
check(current_settings, change);
}
void SettingsConstraints::clamp(const Settings & current_settings, SettingsChanges & changes) const
void SettingsConstraints::clamp(const Settings & current_settings, SettingsChanges & changes, SettingSource source) const
{
boost::range::remove_erase_if(
changes,
[&](SettingChange & change) -> bool
{
return !checkImpl(current_settings, change, CLAMP_ON_VIOLATION);
return !checkImpl(current_settings, change, CLAMP_ON_VIOLATION, source);
});
}
@ -215,7 +250,10 @@ bool getNewValueToCheck(const T & current_settings, SettingChange & change, Fiel
return true;
}
bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingChange & change, ReactionOnViolation reaction) const
bool SettingsConstraints::checkImpl(const Settings & current_settings,
SettingChange & change,
ReactionOnViolation reaction,
SettingSource source) const
{
std::string_view setting_name = Settings::Traits::resolveName(change.name);
@ -247,7 +285,7 @@ bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingCh
if (!getNewValueToCheck(current_settings, change, new_value, reaction == THROW_ON_VIOLATION))
return false;
return getChecker(current_settings, setting_name).check(change, new_value, reaction);
return getChecker(current_settings, setting_name).check(change, new_value, reaction, source);
}
bool SettingsConstraints::checkImpl(const MergeTreeSettings & current_settings, SettingChange & change, ReactionOnViolation reaction) const
@ -255,10 +293,13 @@ bool SettingsConstraints::checkImpl(const MergeTreeSettings & current_settings,
Field new_value;
if (!getNewValueToCheck(current_settings, change, new_value, reaction == THROW_ON_VIOLATION))
return false;
return getMergeTreeChecker(change.name).check(change, new_value, reaction);
return getMergeTreeChecker(change.name).check(change, new_value, reaction, SettingSource::QUERY);
}
bool SettingsConstraints::Checker::check(SettingChange & change, const Field & new_value, ReactionOnViolation reaction) const
bool SettingsConstraints::Checker::check(SettingChange & change,
const Field & new_value,
ReactionOnViolation reaction,
SettingSource source) const
{
if (!explain.empty())
{
@ -326,6 +367,14 @@ bool SettingsConstraints::Checker::check(SettingChange & change, const Field & n
change.value = max_value;
}
if (!getSettingSourceRestrictions(setting_name).isSourceAllowed(source))
{
if (reaction == THROW_ON_VIOLATION)
throw Exception(ErrorCodes::READONLY, "Setting {} is not allowed to be set by {}", setting_name, toString(source));
else
return false;
}
return true;
}

View File

@ -2,6 +2,7 @@
#include <Access/SettingsProfileElement.h>
#include <Common/SettingsChanges.h>
#include <Common/SettingSource.h>
#include <unordered_map>
namespace Poco::Util
@ -73,17 +74,18 @@ public:
void merge(const SettingsConstraints & other);
/// Checks whether `change` violates these constraints and throws an exception if so.
void check(const Settings & current_settings, const SettingsProfileElements & profile_elements) const;
void check(const Settings & current_settings, const SettingChange & change) const;
void check(const Settings & current_settings, const SettingsChanges & changes) const;
void check(const Settings & current_settings, SettingsChanges & changes) const;
void check(const Settings & current_settings, const SettingsProfileElements & profile_elements, SettingSource source) const;
void check(const Settings & current_settings, const SettingChange & change, SettingSource source) const;
void check(const Settings & current_settings, const SettingsChanges & changes, SettingSource source) const;
void check(const Settings & current_settings, SettingsChanges & changes, SettingSource source) const;
/// Checks whether `change` violates these constraints and throws an exception if so. (setting short name is expected inside `changes`)
void check(const MergeTreeSettings & current_settings, const SettingChange & change) const;
void check(const MergeTreeSettings & current_settings, const SettingsChanges & changes) const;
/// Checks whether `change` violates these and clamps the `change` if so.
void clamp(const Settings & current_settings, SettingsChanges & changes) const;
void clamp(const Settings & current_settings, SettingsChanges & changes, SettingSource source) const;
friend bool operator ==(const SettingsConstraints & left, const SettingsConstraints & right);
friend bool operator !=(const SettingsConstraints & left, const SettingsConstraints & right) { return !(left == right); }
@ -133,7 +135,10 @@ private:
{}
// Perform checking
bool check(SettingChange & change, const Field & new_value, ReactionOnViolation reaction) const;
bool check(SettingChange & change,
const Field & new_value,
ReactionOnViolation reaction,
SettingSource source) const;
};
struct StringHash
@ -145,7 +150,11 @@ private:
}
};
bool checkImpl(const Settings & current_settings, SettingChange & change, ReactionOnViolation reaction) const;
bool checkImpl(const Settings & current_settings,
SettingChange & change,
ReactionOnViolation reaction,
SettingSource source) const;
bool checkImpl(const MergeTreeSettings & current_settings, SettingChange & change, ReactionOnViolation reaction) const;
Checker getChecker(const Settings & current_settings, std::string_view setting_name) const;

View File

@ -7,6 +7,7 @@
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/QueryNode.h>
#include <Analyzer/TableFunctionNode.h>
#include <Analyzer/UnionNode.h>
#include <Interpreters/Context.h>
@ -90,26 +91,25 @@ private:
template <typename Derived>
using ConstInDepthQueryTreeVisitor = InDepthQueryTreeVisitor<Derived, true /*const_visitor*/>;
/** Same as InDepthQueryTreeVisitor and additionally keeps track of current scope context.
/** Same as InDepthQueryTreeVisitor (but has a different interface) and additionally keeps track of current scope context.
* This can be useful if your visitor has special logic that depends on current scope context.
*
* To specify behavior of the visitor you can implement following methods in derived class:
* 1. needChildVisit This methods allows to skip subtree.
* 2. enterImpl This method is called before children are processed.
* 3. leaveImpl This method is called after children are processed.
*/
template <typename Derived, bool const_visitor = false>
class InDepthQueryTreeVisitorWithContext
{
public:
using VisitQueryTreeNodeType = std::conditional_t<const_visitor, const QueryTreeNodePtr, QueryTreeNodePtr>;
using VisitQueryTreeNodeType = QueryTreeNodePtr;
explicit InDepthQueryTreeVisitorWithContext(ContextPtr context, size_t initial_subquery_depth = 0)
: current_context(std::move(context))
, subquery_depth(initial_subquery_depth)
{}
/// Return true if visitor should traverse tree top to bottom, false otherwise
bool shouldTraverseTopToBottom() const
{
return true;
}
/// Return true if visitor should visit child, false otherwise
bool needChildVisit(VisitQueryTreeNodeType & parent [[maybe_unused]], VisitQueryTreeNodeType & child [[maybe_unused]])
{
@ -146,18 +146,16 @@ public:
++subquery_depth;
bool traverse_top_to_bottom = getDerived().shouldTraverseTopToBottom();
if (!traverse_top_to_bottom)
visitChildren(query_tree_node);
getDerived().enterImpl(query_tree_node);
getDerived().visitImpl(query_tree_node);
if (traverse_top_to_bottom)
visitChildren(query_tree_node);
visitChildren(query_tree_node);
getDerived().leaveImpl(query_tree_node);
}
void enterImpl(VisitQueryTreeNodeType & node [[maybe_unused]])
{}
void leaveImpl(VisitQueryTreeNodeType & node [[maybe_unused]])
{}
private:
@ -171,17 +169,31 @@ private:
return *static_cast<Derived *>(this);
}
bool shouldSkipSubtree(
VisitQueryTreeNodeType & parent,
VisitQueryTreeNodeType & child,
size_t subtree_index)
{
bool need_visit_child = getDerived().needChildVisit(parent, child);
if (!need_visit_child)
return true;
if (auto * table_function_node = parent->as<TableFunctionNode>())
{
const auto & unresolved_indexes = table_function_node->getUnresolvedArgumentIndexes();
return std::find(unresolved_indexes.begin(), unresolved_indexes.end(), subtree_index) != unresolved_indexes.end();
}
return false;
}
void visitChildren(VisitQueryTreeNodeType & expression)
{
size_t index = 0;
for (auto & child : expression->getChildren())
{
if (!child)
continue;
bool need_visit_child = getDerived().needChildVisit(expression, child);
if (need_visit_child)
if (child && !shouldSkipSubtree(expression, child, index))
visit(child);
++index;
}
}
@ -189,50 +201,4 @@ private:
size_t subquery_depth = 0;
};
template <typename Derived>
using ConstInDepthQueryTreeVisitorWithContext = InDepthQueryTreeVisitorWithContext<Derived, true /*const_visitor*/>;
/** Visitor that use another visitor to visit node only if condition for visiting node is true.
* For example, your visitor need to visit only query tree nodes or union nodes.
*
* Condition interface:
* struct Condition
* {
* bool operator()(VisitQueryTreeNodeType & node)
* {
* return shouldNestedVisitorVisitNode(node);
* }
* }
*/
template <typename Visitor, typename Condition, bool const_visitor = false>
class InDepthQueryTreeConditionalVisitor : public InDepthQueryTreeVisitor<InDepthQueryTreeConditionalVisitor<Visitor, Condition, const_visitor>, const_visitor>
{
public:
using Base = InDepthQueryTreeVisitor<InDepthQueryTreeConditionalVisitor<Visitor, Condition, const_visitor>, const_visitor>;
using VisitQueryTreeNodeType = typename Base::VisitQueryTreeNodeType;
explicit InDepthQueryTreeConditionalVisitor(Visitor & visitor_, Condition & condition_)
: visitor(visitor_)
, condition(condition_)
{
}
bool shouldTraverseTopToBottom() const
{
return visitor.shouldTraverseTopToBottom();
}
void visitImpl(VisitQueryTreeNodeType & query_tree_node)
{
if (condition(query_tree_node))
visitor.visit(query_tree_node);
}
Visitor & visitor;
Condition & condition;
};
template <typename Visitor, typename Condition>
using ConstInDepthQueryTreeConditionalVisitor = InDepthQueryTreeConditionalVisitor<Visitor, Condition, true /*const_visitor*/>;
}

View File

@ -51,13 +51,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<AggregateFunctionsArithmericOperationsVisitor>;
using Base::Base;
/// Traverse tree bottom to top
static bool shouldTraverseTopToBottom()
{
return false;
}
void visitImpl(QueryTreeNodePtr & node)
void leaveImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_arithmetic_operations_in_aggregate_functions)
return;

View File

@ -22,7 +22,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<RewriteArrayExistsToHasVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_rewrite_array_exists_to_has)
return;

View File

@ -20,7 +20,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<AutoFinalOnQueryPassVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().final)
return;

View File

@ -50,7 +50,7 @@ public:
&& settings.max_hyperscan_regexp_total_length == 0;
}
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
auto * function_node = node->as<FunctionNode>();
if (!function_node || function_node->getFunctionName() != "or")

View File

@ -688,7 +688,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<ConvertQueryToCNFVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
auto * query_node = node->as<QueryNode>();
if (!query_node)

View File

@ -22,7 +22,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<CountDistinctVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().count_distinct_optimization)
return;

View File

@ -193,7 +193,7 @@ public:
return true;
}
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!isEnabled())
return;

View File

@ -29,7 +29,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node) const
void enterImpl(QueryTreeNodePtr & node) const
{
if (!getSettings().optimize_functions_to_subcolumns)
return;

View File

@ -37,7 +37,7 @@ public:
, names_to_collect(names_to_collect_)
{}
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_syntax_fuse_functions)
return;

View File

@ -46,7 +46,7 @@ public:
{
}
void visitImpl(const QueryTreeNodePtr & node)
void enterImpl(const QueryTreeNodePtr & node)
{
auto * function_node = node->as<FunctionNode>();
if (!function_node || function_node->getFunctionName() != "grouping")

View File

@ -23,7 +23,7 @@ public:
, multi_if_function_ptr(std::move(multi_if_function_ptr_))
{}
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_if_chain_to_multiif)
return;

View File

@ -113,7 +113,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<ConvertStringsToEnumVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_if_transform_strings_to_enum)
return;

View File

@ -19,7 +19,7 @@ public:
: Base(std::move(context))
{}
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
auto * function_node = node->as<FunctionNode>();

View File

@ -21,7 +21,7 @@ public:
, if_function_ptr(std::move(if_function_ptr_))
{}
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_multiif_to_if)
return;

View File

@ -20,7 +20,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<NormalizeCountVariantsVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_normalize_count_variants)
return;

View File

@ -0,0 +1,221 @@
#include <Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.h>
#include <Functions/FunctionFactory.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ColumnNode.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Common/DateLUT.h>
#include <Common/DateLUTImpl.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
namespace
{
class OptimizeDateOrDateTimeConverterWithPreimageVisitor : public InDepthQueryTreeVisitorWithContext<OptimizeDateOrDateTimeConverterWithPreimageVisitor>
{
public:
using Base = InDepthQueryTreeVisitorWithContext<OptimizeDateOrDateTimeConverterWithPreimageVisitor>;
explicit OptimizeDateOrDateTimeConverterWithPreimageVisitor(ContextPtr context)
: Base(std::move(context))
{}
static bool needChildVisit(QueryTreeNodePtr & node, QueryTreeNodePtr & /*child*/)
{
const static std::unordered_set<String> relations = {
"equals",
"notEquals",
"less",
"greater",
"lessOrEquals",
"greaterOrEquals",
};
if (const auto * function = node->as<FunctionNode>())
{
return !relations.contains(function->getFunctionName());
}
return true;
}
void enterImpl(QueryTreeNodePtr & node) const
{
const static std::unordered_map<String, String> swap_relations = {
{"equals", "equals"},
{"notEquals", "notEquals"},
{"less", "greater"},
{"greater", "less"},
{"lessOrEquals", "greaterOrEquals"},
{"greaterOrEquals", "lessOrEquals"},
};
const auto * function = node->as<FunctionNode>();
if (!function || !swap_relations.contains(function->getFunctionName())) return;
if (function->getArguments().getNodes().size() != 2) return;
size_t func_id = function->getArguments().getNodes().size();
for (size_t i = 0; i < function->getArguments().getNodes().size(); i++)
{
if (const auto * func = function->getArguments().getNodes()[i]->as<FunctionNode>())
{
func_id = i;
}
}
if (func_id == function->getArguments().getNodes().size()) return;
size_t literal_id = 1 - func_id;
const auto * literal = function->getArguments().getNodes()[literal_id]->as<ConstantNode>();
if (!literal || literal->getValue().getType() != Field::Types::UInt64) return;
String comparator = literal_id > func_id ? function->getFunctionName(): swap_relations.at(function->getFunctionName());
const auto * func_node = function->getArguments().getNodes()[func_id]->as<FunctionNode>();
/// Currently we only handle single-argument functions.
if (!func_node || func_node->getArguments().getNodes().size() != 1) return;
const auto * column_id = func_node->getArguments().getNodes()[0]->as<ColumnNode>();
if (!column_id) return;
const auto * column_type = column_id->getColumnType().get();
if (!isDateOrDate32(column_type) && !isDateTime(column_type) && !isDateTime64(column_type)) return;
const auto & converter = FunctionFactory::instance().tryGet(func_node->getFunctionName(), getContext());
if (!converter) return;
ColumnsWithTypeAndName args;
args.emplace_back(column_id->getColumnType(), "tmp");
auto converter_base = converter->build(args);
if (!converter_base || !converter_base->hasInformationAboutPreimage()) return;
auto preimage_range = converter_base->getPreimage(*(column_id->getColumnType()), literal->getValue());
if (!preimage_range) return;
const auto new_node = generateOptimizedDateFilter(comparator, *column_id, *preimage_range);
if (!new_node) return;
node = new_node;
}
private:
QueryTreeNodePtr generateOptimizedDateFilter(const String & comparator, const ColumnNode & column_node, const std::pair<Field, Field>& range) const
{
const DateLUTImpl & date_lut = DateLUT::instance("UTC");
String start_date_or_date_time;
String end_date_or_date_time;
if (isDateOrDate32(column_node.getColumnType().get()))
{
start_date_or_date_time = date_lut.dateToString(range.first.get<DateLUTImpl::Time>());
end_date_or_date_time = date_lut.dateToString(range.second.get<DateLUTImpl::Time>());
}
else if (isDateTime(column_node.getColumnType().get()) || isDateTime64(column_node.getColumnType().get()))
{
start_date_or_date_time = date_lut.timeToString(range.first.get<DateLUTImpl::Time>());
end_date_or_date_time = date_lut.timeToString(range.second.get<DateLUTImpl::Time>());
}
else [[unlikely]] return {};
if (comparator == "equals")
{
const auto lhs = std::make_shared<FunctionNode>("greaterOrEquals");
lhs->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
lhs->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(start_date_or_date_time));
resolveOrdinaryFunctionNode(*lhs, lhs->getFunctionName());
const auto rhs = std::make_shared<FunctionNode>("less");
rhs->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
rhs->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(end_date_or_date_time));
resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName());
const auto new_date_filter = std::make_shared<FunctionNode>("and");
new_date_filter->getArguments().getNodes() = {lhs, rhs};
resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName());
return new_date_filter;
}
else if (comparator == "notEquals")
{
const auto lhs = std::make_shared<FunctionNode>("less");
lhs->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
lhs->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(start_date_or_date_time));
resolveOrdinaryFunctionNode(*lhs, lhs->getFunctionName());
const auto rhs = std::make_shared<FunctionNode>("greaterOrEquals");
rhs->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
rhs->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(end_date_or_date_time));
resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName());
const auto new_date_filter = std::make_shared<FunctionNode>("or");
new_date_filter->getArguments().getNodes() = {lhs, rhs};
resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName());
return new_date_filter;
}
else if (comparator == "greater")
{
const auto new_date_filter = std::make_shared<FunctionNode>("greaterOrEquals");
new_date_filter->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
new_date_filter->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(end_date_or_date_time));
resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName());
return new_date_filter;
}
else if (comparator == "lessOrEquals")
{
const auto new_date_filter = std::make_shared<FunctionNode>("less");
new_date_filter->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
new_date_filter->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(end_date_or_date_time));
resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName());
return new_date_filter;
}
else if (comparator == "less" || comparator == "greaterOrEquals")
{
const auto new_date_filter = std::make_shared<FunctionNode>(comparator);
new_date_filter->getArguments().getNodes().push_back(std::make_shared<ColumnNode>(column_node.getColumn(), column_node.getColumnSource()));
new_date_filter->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(start_date_or_date_time));
resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName());
return new_date_filter;
}
else [[unlikely]]
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Expected equals, notEquals, less, lessOrEquals, greater, greaterOrEquals. Actual {}",
comparator);
}
}
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
auto function = FunctionFactory::instance().get(function_name, getContext());
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
};
}
void OptimizeDateOrDateTimeConverterWithPreimagePass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
{
OptimizeDateOrDateTimeConverterWithPreimageVisitor visitor(std::move(context));
visitor.visit(query_tree_node);
}
}

View File

@ -0,0 +1,24 @@
#pragma once
#include <Analyzer/IQueryTreePass.h>
namespace DB
{
/** Replace predicate having Date/DateTime converters with their preimages to improve performance.
* Given a Date column c, toYear(c) = 2023 -> c >= '2023-01-01' AND c < '2024-01-01'
* Or if c is a DateTime column, toYear(c) = 2023 -> c >= '2023-01-01 00:00:00' AND c < '2024-01-01 00:00:00'.
* The similar optimization also applies to other converters.
*/
class OptimizeDateOrDateTimeConverterWithPreimagePass final : public IQueryTreePass
{
public:
String getName() override { return "OptimizeDateOrDateTimeConverterWithPreimagePass"; }
String getDescription() override { return "Replace predicate having Date/DateTime converters with their preimages"; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
};
}

View File

@ -26,7 +26,7 @@ public:
return !child->as<FunctionNode>();
}
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_group_by_function_keys)
return;

View File

@ -28,7 +28,7 @@ public:
return true;
}
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_redundant_functions_in_order_by)
return;

View File

@ -6451,7 +6451,7 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
table_function_ptr->parseArguments(table_function_ast, scope_context);
auto table_function_storage = scope_context->getQueryContext()->executeTableFunction(table_function_ast, table_function_ptr);
table_function_node_typed.resolve(std::move(table_function_ptr), std::move(table_function_storage), scope_context);
table_function_node_typed.resolve(std::move(table_function_ptr), std::move(table_function_storage), scope_context, std::move(skip_analysis_arguments_indexes));
}
/// Resolve array join node in scope
@ -6494,55 +6494,69 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif
resolveExpressionNode(array_join_expression, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
auto result_type = array_join_expression->getResultType();
bool is_array_type = isArray(result_type);
bool is_map_type = isMap(result_type);
if (!is_array_type && !is_map_type)
throw Exception(ErrorCodes::TYPE_MISMATCH,
"ARRAY JOIN {} requires expression {} with Array or Map type. Actual {}. In scope {}",
array_join_node_typed.formatASTForErrorMessage(),
array_join_expression->formatASTForErrorMessage(),
result_type->getName(),
scope.scope_node->formatASTForErrorMessage());
if (is_map_type)
result_type = assert_cast<const DataTypeMap &>(*result_type).getNestedType();
result_type = assert_cast<const DataTypeArray &>(*result_type).getNestedType();
String array_join_column_name;
if (!array_join_expression_alias.empty())
auto process_array_join_expression = [&](QueryTreeNodePtr & expression)
{
array_join_column_name = array_join_expression_alias;
}
else if (auto * array_join_expression_inner_column = array_join_expression->as<ColumnNode>())
auto result_type = expression->getResultType();
bool is_array_type = isArray(result_type);
bool is_map_type = isMap(result_type);
if (!is_array_type && !is_map_type)
throw Exception(ErrorCodes::TYPE_MISMATCH,
"ARRAY JOIN {} requires expression {} with Array or Map type. Actual {}. In scope {}",
array_join_node_typed.formatASTForErrorMessage(),
expression->formatASTForErrorMessage(),
result_type->getName(),
scope.scope_node->formatASTForErrorMessage());
if (is_map_type)
result_type = assert_cast<const DataTypeMap &>(*result_type).getNestedType();
result_type = assert_cast<const DataTypeArray &>(*result_type).getNestedType();
String array_join_column_name;
if (!array_join_expression_alias.empty())
{
array_join_column_name = array_join_expression_alias;
}
else if (auto * array_join_expression_inner_column = array_join_expression->as<ColumnNode>())
{
array_join_column_name = array_join_expression_inner_column->getColumnName();
}
else if (!identifier_full_name.empty())
{
array_join_column_name = identifier_full_name;
}
else
{
array_join_column_name = "__array_join_expression_" + std::to_string(array_join_expressions_counter);
++array_join_expressions_counter;
}
if (array_join_column_names.contains(array_join_column_name))
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"ARRAY JOIN {} multiple columns with name {}. In scope {}",
array_join_node_typed.formatASTForErrorMessage(),
array_join_column_name,
scope.scope_node->formatASTForErrorMessage());
array_join_column_names.emplace(array_join_column_name);
NameAndTypePair array_join_column(array_join_column_name, result_type);
auto array_join_column_node = std::make_shared<ColumnNode>(std::move(array_join_column), expression, array_join_node);
array_join_column_node->setAlias(array_join_expression_alias);
array_join_column_expressions.push_back(std::move(array_join_column_node));
};
// Support ARRAY JOIN COLUMNS(...). COLUMNS transformer is resolved to list of columns.
if (auto * columns_list = array_join_expression->as<ListNode>())
{
array_join_column_name = array_join_expression_inner_column->getColumnName();
}
else if (!identifier_full_name.empty())
{
array_join_column_name = identifier_full_name;
for (auto & array_join_subexpression : columns_list->getNodes())
process_array_join_expression(array_join_subexpression);
}
else
{
array_join_column_name = "__array_join_expression_" + std::to_string(array_join_expressions_counter);
++array_join_expressions_counter;
process_array_join_expression(array_join_expression);
}
if (array_join_column_names.contains(array_join_column_name))
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"ARRAY JOIN {} multiple columns with name {}. In scope {}",
array_join_node_typed.formatASTForErrorMessage(),
array_join_column_name,
scope.scope_node->formatASTForErrorMessage());
array_join_column_names.emplace(array_join_column_name);
NameAndTypePair array_join_column(array_join_column_name, result_type);
auto array_join_column_node = std::make_shared<ColumnNode>(std::move(array_join_column), array_join_expression, array_join_node);
array_join_column_node->setAlias(array_join_expression_alias);
array_join_column_expressions.push_back(std::move(array_join_column_node));
}
/** Allow to resolve ARRAY JOIN columns from aliases with types after ARRAY JOIN only after ARRAY JOIN expression list is resolved, because
@ -6554,11 +6568,9 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif
* And it is expected that `value_element` inside projection expression list will be resolved as `value_element` expression
* with type after ARRAY JOIN.
*/
for (size_t i = 0; i < array_join_nodes_size; ++i)
array_join_nodes = std::move(array_join_column_expressions);
for (auto & array_join_column_expression : array_join_nodes)
{
auto & array_join_column_expression = array_join_nodes[i];
array_join_column_expression = std::move(array_join_column_expressions[i]);
auto it = scope.alias_name_to_expression_node.find(array_join_column_expression->getAlias());
if (it != scope.alias_name_to_expression_node.end())
{

View File

@ -26,7 +26,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<RewriteAggregateFunctionWithIfVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_rewrite_aggregate_function_with_if)
return;

View File

@ -24,7 +24,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<ShardNumColumnToFunctionVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node) const
void enterImpl(QueryTreeNodePtr & node) const
{
auto * column_node = node->as<ColumnNode>();
if (!column_node)

View File

@ -26,7 +26,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<SumIfToCountIfVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_rewrite_sum_if_to_count_if)
return;

View File

@ -31,7 +31,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<UniqInjectiveFunctionsEliminationVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_injective_functions_inside_uniq)
return;

View File

@ -42,6 +42,7 @@
#include <Analyzer/Passes/CrossToInnerJoinPass.h>
#include <Analyzer/Passes/ShardNumColumnToFunctionPass.h>
#include <Analyzer/Passes/ConvertQueryToCNFPass.h>
#include <Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.h>
namespace DB
{
@ -278,6 +279,7 @@ void addQueryTreePasses(QueryTreePassManager & manager)
manager.addPass(std::make_unique<AutoFinalOnQueryPass>());
manager.addPass(std::make_unique<CrossToInnerJoinPass>());
manager.addPass(std::make_unique<ShardNumColumnToFunctionPass>());
manager.addPass(std::make_unique<OptimizeDateOrDateTimeConverterWithPreimagePass>());
}
}

View File

@ -27,12 +27,13 @@ TableFunctionNode::TableFunctionNode(String table_function_name_)
children[arguments_child_index] = std::make_shared<ListNode>();
}
void TableFunctionNode::resolve(TableFunctionPtr table_function_value, StoragePtr storage_value, ContextPtr context)
void TableFunctionNode::resolve(TableFunctionPtr table_function_value, StoragePtr storage_value, ContextPtr context, std::vector<size_t> unresolved_arguments_indexes_)
{
table_function = std::move(table_function_value);
storage = std::move(storage_value);
storage_id = storage->getStorageID();
storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), context);
unresolved_arguments_indexes = std::move(unresolved_arguments_indexes_);
}
const StorageID & TableFunctionNode::getStorageID() const
@ -132,6 +133,7 @@ QueryTreeNodePtr TableFunctionNode::cloneImpl() const
result->storage_snapshot = storage_snapshot;
result->table_expression_modifiers = table_expression_modifiers;
result->settings_changes = settings_changes;
result->unresolved_arguments_indexes = unresolved_arguments_indexes;
return result;
}

View File

@ -98,7 +98,7 @@ public:
}
/// Resolve table function with table function, storage and context
void resolve(TableFunctionPtr table_function_value, StoragePtr storage_value, ContextPtr context);
void resolve(TableFunctionPtr table_function_value, StoragePtr storage_value, ContextPtr context, std::vector<size_t> unresolved_arguments_indexes_);
/// Get storage id, throws exception if function node is not resolved
const StorageID & getStorageID() const;
@ -106,6 +106,11 @@ public:
/// Get storage snapshot, throws exception if function node is not resolved
const StorageSnapshotPtr & getStorageSnapshot() const;
const std::vector<size_t> & getUnresolvedArgumentIndexes() const
{
return unresolved_arguments_indexes;
}
/// Return true if table function node has table expression modifiers, false otherwise
bool hasTableExpressionModifiers() const
{
@ -164,6 +169,7 @@ private:
StoragePtr storage;
StorageID storage_id;
StorageSnapshotPtr storage_snapshot;
std::vector<size_t> unresolved_arguments_indexes;
std::optional<TableExpressionModifiers> table_expression_modifiers;
SettingsChanges settings_changes;

View File

@ -30,6 +30,7 @@ public:
String compression_method;
int compression_level = -1;
String password;
String s3_storage_class;
ContextPtr context;
bool is_internal_backup = false;
std::shared_ptr<IBackupCoordination> backup_coordination;

View File

@ -88,7 +88,7 @@ namespace
request.SetMaxKeys(1);
auto outcome = client.ListObjects(request);
if (!outcome.IsSuccess())
throw Exception::createDeprecated(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType());
return outcome.GetResult().GetContents();
}
@ -178,7 +178,7 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s
BackupWriterS3::BackupWriterS3(
const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ContextPtr & context_)
const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const String & storage_class_name, const ContextPtr & context_)
: BackupWriterDefault(&Poco::Logger::get("BackupWriterS3"), context_)
, s3_uri(s3_uri_)
, client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_))
@ -188,6 +188,7 @@ BackupWriterS3::BackupWriterS3(
request_settings.updateFromSettings(context_->getSettingsRef());
request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
request_settings.allow_native_copy = allow_s3_native_copy;
request_settings.setStorageClassName(storage_class_name);
}
void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
@ -271,7 +272,7 @@ void BackupWriterS3::removeFile(const String & file_name)
request.SetKey(fs::path(s3_uri.key) / file_name);
auto outcome = client->DeleteObject(request);
if (!outcome.IsSuccess() && !isNotFoundError(outcome.GetError().GetErrorType()))
throw Exception::createDeprecated(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType());
}
void BackupWriterS3::removeFiles(const Strings & file_names)
@ -329,7 +330,7 @@ void BackupWriterS3::removeFilesBatch(const Strings & file_names)
auto outcome = client->DeleteObjects(request);
if (!outcome.IsSuccess() && !isNotFoundError(outcome.GetError().GetErrorType()))
throw Exception::createDeprecated(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType());
}
}

View File

@ -38,7 +38,7 @@ private:
class BackupWriterS3 : public BackupWriterDefault
{
public:
BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ContextPtr & context_);
BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const String & storage_class_name, const ContextPtr & context_);
~BackupWriterS3() override;
bool fileExists(const String & file_name) override;

View File

@ -21,6 +21,7 @@ namespace ErrorCodes
M(String, id) \
M(String, compression_method) \
M(String, password) \
M(String, s3_storage_class) \
M(Bool, structure_only) \
M(Bool, async) \
M(Bool, decrypt_files_from_encrypted_disks) \

View File

@ -25,6 +25,9 @@ struct BackupSettings
/// Password used to encrypt the backup.
String password;
/// S3 storage class.
String s3_storage_class = "";
/// If this is set to true then only create queries will be written to backup,
/// without the data of tables.
bool structure_only = false;

View File

@ -344,6 +344,7 @@ void BackupsWorker::doBackup(
backup_create_params.compression_method = backup_settings.compression_method;
backup_create_params.compression_level = backup_settings.compression_level;
backup_create_params.password = backup_settings.password;
backup_create_params.s3_storage_class = backup_settings.s3_storage_class;
backup_create_params.is_internal_backup = backup_settings.internal;
backup_create_params.backup_coordination = backup_coordination;
backup_create_params.backup_uuid = backup_settings.backup_uuid;

View File

@ -112,7 +112,7 @@ void registerBackupEngineS3(BackupFactory & factory)
}
else
{
auto writer = std::make_shared<BackupWriterS3>(S3::URI{s3_uri}, access_key_id, secret_access_key, params.allow_s3_native_copy, params.context);
auto writer = std::make_shared<BackupWriterS3>(S3::URI{s3_uri}, access_key_id, secret_access_key, params.allow_s3_native_copy, params.s3_storage_class, params.context);
return std::make_unique<BackupImpl>(
backup_name_for_logging,
archive_params,

View File

@ -248,6 +248,7 @@ add_object_library(clickhouse_storages_distributed Storages/Distributed)
add_object_library(clickhouse_storages_mergetree Storages/MergeTree)
add_object_library(clickhouse_storages_liveview Storages/LiveView)
add_object_library(clickhouse_storages_windowview Storages/WindowView)
add_object_library(clickhouse_storages_s3queue Storages/S3Queue)
add_object_library(clickhouse_client Client)
add_object_library(clickhouse_bridge BridgeHelper)
add_object_library(clickhouse_server Server)

Some files were not shown because too many files have changed in this diff Show More