Merge branch 'master' into jepsen_for_nukeeper

This commit is contained in:
alesapin 2021-03-25 14:34:00 +03:00
commit 4c28c25a75
150 changed files with 2274 additions and 925 deletions

2
.gitmodules vendored
View File

@ -93,7 +93,7 @@
url = https://github.com/ClickHouse-Extras/libunwind.git
[submodule "contrib/simdjson"]
path = contrib/simdjson
url = https://github.com/ClickHouse-Extras/simdjson.git
url = https://github.com/simdjson/simdjson.git
[submodule "contrib/rapidjson"]
path = contrib/rapidjson
url = https://github.com/ClickHouse-Extras/rapidjson

View File

@ -8,7 +8,7 @@ ClickHouse® is an open-source column-oriented database management system that a
* [Tutorial](https://clickhouse.tech/docs/en/getting_started/tutorial/) shows how to set up and query small ClickHouse cluster.
* [Documentation](https://clickhouse.tech/docs/en/) provides more in-depth information.
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-ly9m4w1x-6j7x5Ts_pQZqrctAbRZ3cg) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time.
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-nwwakmk4-xOJ6cdy0sJC3It8j348~IA) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time.
* [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announcements and reports about events.
* [Code Browser](https://clickhouse.tech/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation.
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.

2
contrib/simdjson vendored

@ -1 +1 @@
Subproject commit 3190d66a49059092a1753dc35595923debfc1698
Subproject commit 95b4870e20be5f97d9dcf63b23b1c6f520c366c1

View File

@ -1,4 +1,5 @@
usr/bin/clickhouse
usr/bin/clickhouse-odbc-bridge
usr/bin/clickhouse-extract-from-config
usr/share/bash-completion/completions
etc/security/limits.d/clickhouse.conf

View File

@ -18,6 +18,7 @@ RUN apt-get update \
clickhouse-client=$version \
clickhouse-common-static=$version \
locales \
tzdata \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf \
&& apt-get clean

View File

@ -4,14 +4,26 @@ FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11
RUN apt-get update \
&& apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \
&& apt-get install \
apt-transport-https \
apt-utils \
ca-certificates \
dnsutils \
gnupg \
iputils-ping \
lsb-release \
wget \
--yes --no-install-recommends --verbose-versions \
&& cat /etc/resolv.conf \
&& echo "nameserver 1.1.1.1" >> /etc/resolv.conf \
&& nslookup -debug apt.llvm.org \
&& ping -c1 apt.llvm.org \
&& wget -nv --retry-connrefused --tries=10 -O /tmp/llvm-snapshot.gpg.key https://apt.llvm.org/llvm-snapshot.gpg.key \
&& export LLVM_PUBKEY_HASH="bda960a8da687a275a2078d43c111d66b1c6a893a3275271beedf266c1ff4a0cdecb429c7a5cccf9f486ea7aa43fd27f" \
&& wget -nv -O /tmp/llvm-snapshot.gpg.key https://apt.llvm.org/llvm-snapshot.gpg.key \
&& echo "${LLVM_PUBKEY_HASH} /tmp/llvm-snapshot.gpg.key" | sha384sum -c \
&& apt-key add /tmp/llvm-snapshot.gpg.key \
&& export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
&& echo "deb [trusted=yes] http://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
&& echo "deb [trusted=yes] https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
/etc/apt/sources.list
# initial packages
@ -24,7 +36,10 @@ RUN apt-get update \
software-properties-common \
--yes --no-install-recommends
RUN apt-get update \
RUN cat /etc/resolv.conf \
&& echo "nameserver 1.1.1.1" >> /etc/resolv.conf \
&& nslookup -debug apt.llvm.org \
&& apt-get update \
&& apt-get install \
bash \
cmake \

View File

@ -32,6 +32,7 @@ RUN groupadd -r clickhouse --gid=101 \
clickhouse-server=$version \
locales \
wget \
tzdata \
&& rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \

View File

@ -21,7 +21,9 @@ RUN addgroup -S -g 101 clickhouse \
&& chown clickhouse:clickhouse /var/lib/clickhouse \
&& chown root:clickhouse /var/log/clickhouse-server \
&& chmod +x /entrypoint.sh \
&& apk add --no-cache su-exec bash \
&& apk add --no-cache su-exec bash tzdata \
&& cp /usr/share/zoneinfo/UTC /etc/localtime \
&& echo "UTC" > /etc/timezone \
&& chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client
# we need to allow "others" access to clickhouse folder, because docker container

View File

@ -4,8 +4,9 @@ FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11
RUN apt-get update \
&& apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \
&& apt-get install apt-utils ca-certificates lsb-release wget gnupg apt-transport-https \
--yes --no-install-recommends --verbose-versions \
&& echo "nameserver 1.1.1.1" >> /etc/resolv.conf \
&& export LLVM_PUBKEY_HASH="bda960a8da687a275a2078d43c111d66b1c6a893a3275271beedf266c1ff4a0cdecb429c7a5cccf9f486ea7aa43fd27f" \
&& wget -nv -O /tmp/llvm-snapshot.gpg.key https://apt.llvm.org/llvm-snapshot.gpg.key \
&& echo "${LLVM_PUBKEY_HASH} /tmp/llvm-snapshot.gpg.key" | sha384sum -c \
@ -31,7 +32,8 @@ RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb \
&& chmod +x dpkg-deb \
&& cp dpkg-deb /usr/bin
RUN apt-get update \
RUN echo "nameserver 1.1.1.1" >> /etc/resolv.conf \
&& apt-get update \
&& apt-get install \
clang-${LLVM_VERSION} \
debhelper \

View File

@ -26,6 +26,7 @@ def process_result(result_folder):
with open(err_path, 'r') as f:
if 'AssertionError' in f.read():
summary.append((test, "FAIL"))
status = 'failure'
else:
summary.append((test, "OK"))

View File

@ -11,7 +11,7 @@ service clickhouse-server start && sleep 5
cd /sqlancer/sqlancer-master
export TIMEOUT=60
export TIMEOUT=300
export NUM_QUERIES=1000
( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPWhere | tee /test_output/TLPWhere.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPWhere.err

View File

@ -0,0 +1,284 @@
# CMake in ClickHouse
## TL; DR How to make ClickHouse compile and link faster?
Developer only! This command will likely fulfill most of your needs. Run before calling `ninja`.
```cmake
cmake .. \
-DCMAKE_C_COMPILER=/bin/clang-10 \
-DCMAKE_CXX_COMPILER=/bin/clang++-10 \
-DCMAKE_BUILD_TYPE=Debug \
-DENABLE_CLICKHOUSE_ALL=OFF \
-DENABLE_CLICKHOUSE_SERVER=ON \
-DENABLE_CLICKHOUSE_CLIENT=ON \
-DUSE_STATIC_LIBRARIES=OFF \
-DSPLIT_SHARED_LIBRARIES=ON \
-DENABLE_LIBRARIES=OFF \
-DUSE_UNWIND=ON \
-DENABLE_UTILS=OFF \
-DENABLE_TESTS=OFF
```
## CMake files types
1. ClickHouse's source CMake files (located in the root directory and in `/src`).
2. Arch-dependent CMake files (located in `/cmake/*os_name*`).
3. Libraries finders (search for contrib libraries, located in `/cmake/find`).
3. Contrib build CMake files (used instead of libraries' own CMake files, located in `/cmake/modules`)
## List of CMake flags
* This list is auto-generated by [this Python script](https://github.com/clickhouse/clickhouse/blob/master/docs/tools/cmake_in_clickhouse_generator.py).
* The flag name is a link to its position in the code.
* If an option's default value is itself an option, it's also a link to its position in this list.
### ClickHouse modes
| Name | Default value | Description | Comment |
|------|---------------|-------------|---------|
| <a name="enable-clickhouse-all"></a>[`ENABLE_CLICKHOUSE_ALL`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L8) | `ON` | Enable all ClickHouse modes by default | The `clickhouse` binary is a multi purpose tool that contains multiple execution modes (client, server, etc.), each of them may be built and linked as a separate library. If you do not know what modes you need, turn this option OFF and enable SERVER and CLIENT only. |
| <a name="enable-clickhouse-benchmark"></a>[`ENABLE_CLICKHOUSE_BENCHMARK`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L18) | `ENABLE_CLICKHOUSE_ALL` | Queries benchmarking mode | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-benchmark/ |
| <a name="enable-clickhouse-client"></a>[`ENABLE_CLICKHOUSE_CLIENT`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L11) | `ENABLE_CLICKHOUSE_ALL` | Client mode (interactive tui/shell that connects to the server) | |
| <a name="enable-clickhouse-compressor"></a>[`ENABLE_CLICKHOUSE_COMPRESSOR`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L23) | `ENABLE_CLICKHOUSE_ALL` | Data compressor and decompressor | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-compressor/ |
| <a name="enable-clickhouse-copier"></a>[`ENABLE_CLICKHOUSE_COPIER`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L26) | `ENABLE_CLICKHOUSE_ALL` | Inter-cluster data copying mode | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-copier/ |
| <a name="enable-clickhouse-extract-from-config"></a>[`ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L20) | `ENABLE_CLICKHOUSE_ALL` | Configs processor (extract values etc.) | |
| <a name="enable-clickhouse-format"></a>[`ENABLE_CLICKHOUSE_FORMAT`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L28) | `ENABLE_CLICKHOUSE_ALL` | Queries pretty-printer and formatter with syntax highlighting | |
| <a name="enable-clickhouse-git-import"></a>[`ENABLE_CLICKHOUSE_GIT_IMPORT`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L40) | `ENABLE_CLICKHOUSE_ALL` | A tool to analyze Git repositories | https://presentations.clickhouse.tech/matemarketing_2020/ |
| <a name="enable-clickhouse-install"></a>[`ENABLE_CLICKHOUSE_INSTALL`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L44) | `OFF` | Install ClickHouse without .deb/.rpm/.tgz packages (having the binary only) | |
| <a name="enable-clickhouse-local"></a>[`ENABLE_CLICKHOUSE_LOCAL`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L15) | `ENABLE_CLICKHOUSE_ALL` | Local files fast processing mode | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-local/ |
| <a name="enable-clickhouse-obfuscator"></a>[`ENABLE_CLICKHOUSE_OBFUSCATOR`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L32) | `ENABLE_CLICKHOUSE_ALL` | Table data obfuscator (convert real data to benchmark-ready one) | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-obfuscator/ |
| <a name="enable-clickhouse-odbc-bridge"></a>[`ENABLE_CLICKHOUSE_ODBC_BRIDGE`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L36) | `ENABLE_CLICKHOUSE_ALL` | HTTP-server working like a proxy to ODBC driver | https://clickhouse.tech/docs/en/operations/utilities/odbc-bridge/ |
| <a name="enable-clickhouse-server"></a>[`ENABLE_CLICKHOUSE_SERVER`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L10) | `ENABLE_CLICKHOUSE_ALL` | Server mode (main mode) | |
### External libraries
Note that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.
| Name | Default value | Description | Comment |
|------|---------------|-------------|---------|
| <a name="enable-amqpcpp"></a>[`ENABLE_AMQPCPP`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/amqpcpp.cmake#L1) | `ENABLE_LIBRARIES` | Enalbe AMQP-CPP | |
| <a name="enable-avro"></a>[`ENABLE_AVRO`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/avro.cmake#L2) | `ENABLE_LIBRARIES` | Enable Avro | Needed when using Apache Avro serialization format |
| <a name="enable-base"></a>[`ENABLE_BASE64`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/base64.cmake#L1) | `ENABLE_LIBRARIES` | Enable base64 | |
| <a name="enable-brotli"></a>[`ENABLE_BROTLI`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/brotli.cmake#L1) | `ENABLE_LIBRARIES` | Enable brotli | |
| <a name="enable-capnp"></a>[`ENABLE_CAPNP`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/capnp.cmake#L1) | `ENABLE_LIBRARIES` | Enable Cap'n Proto | |
| <a name="enable-cassandra"></a>[`ENABLE_CASSANDRA`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/cassandra.cmake#L1) | `ENABLE_LIBRARIES` | Enable Cassandra | |
| <a name="enable-ccache"></a>[`ENABLE_CCACHE`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ccache.cmake#L22) | `ENABLE_CCACHE_BY_DEFAULT` | Speedup re-compilations using ccache (external tool) | https://ccache.dev/ |
| <a name="enable-clang-tidy"></a>[`ENABLE_CLANG_TIDY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/analysis.cmake#L2) | `OFF` | Use clang-tidy static analyzer | https://clang.llvm.org/extra/clang-tidy/ |
| <a name="enable-curl"></a>[`ENABLE_CURL`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/curl.cmake#L1) | `ENABLE_LIBRARIES` | Enable curl | |
| <a name="enable-embedded-compiler"></a>[`ENABLE_EMBEDDED_COMPILER`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/llvm.cmake#L5) | `ENABLE_LIBRARIES` | Set to TRUE to enable support for 'compile_expressions' option for query execution | |
| <a name="enable-fastops"></a>[`ENABLE_FASTOPS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/fastops.cmake#L2) | `ENABLE_LIBRARIES` | Enable fast vectorized mathematical functions library by Mikhail Parakhin | |
| <a name="enable-gperf"></a>[`ENABLE_GPERF`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/gperf.cmake#L5) | `ENABLE_LIBRARIES` | Use gperf function hash generator tool | |
| <a name="enable-grpc"></a>[`ENABLE_GRPC`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/grpc.cmake#L8) | `ENABLE_GRPC_DEFAULT` | Use gRPC | |
| <a name="enable-gsasl-library"></a>[`ENABLE_GSASL_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/libgsasl.cmake#L1) | `ENABLE_LIBRARIES` | Enable gsasl library | |
| <a name="enable-h"></a>[`ENABLE_H3`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/h3.cmake#L1) | `ENABLE_LIBRARIES` | Enable H3 | |
| <a name="enable-hdfs"></a>[`ENABLE_HDFS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/hdfs3.cmake#L2) | `ENABLE_LIBRARIES` | Enable HDFS | |
| <a name="enable-icu"></a>[`ENABLE_ICU`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/icu.cmake#L2) | `ENABLE_LIBRARIES` | Enable ICU | |
| <a name="enable-ldap"></a>[`ENABLE_LDAP`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ldap.cmake#L5) | `ENABLE_LIBRARIES` | Enable LDAP | |
| <a name="enable-libpqxx"></a>[`ENABLE_LIBPQXX`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/libpqxx.cmake#L1) | `ENABLE_LIBRARIES` | Enalbe libpqxx | |
| <a name="enable-msgpack"></a>[`ENABLE_MSGPACK`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/msgpack.cmake#L1) | `ENABLE_LIBRARIES` | Enable msgpack library | |
| <a name="enable-mysql"></a>[`ENABLE_MYSQL`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/mysqlclient.cmake#L2) | `ENABLE_LIBRARIES` | Enable MySQL | |
| <a name="enable-nuraft"></a>[`ENABLE_NURAFT`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/nuraft.cmake#L1) | `ENABLE_LIBRARIES` | Enable NuRaft | |
| <a name="enable-odbc"></a>[`ENABLE_ODBC`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/odbc.cmake#L1) | `ENABLE_LIBRARIES` | Enable ODBC library | |
| <a name="enable-orc"></a>[`ENABLE_ORC`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/orc.cmake#L1) | `ENABLE_LIBRARIES` | Enable ORC | |
| <a name="enable-parquet"></a>[`ENABLE_PARQUET`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/parquet.cmake#L2) | `ENABLE_LIBRARIES` | Enable parquet | |
| <a name="enable-protobuf"></a>[`ENABLE_PROTOBUF`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/protobuf.cmake#L1) | `ENABLE_LIBRARIES` | Enable protobuf | |
| <a name="enable-rapidjson"></a>[`ENABLE_RAPIDJSON`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rapidjson.cmake#L1) | `ENABLE_LIBRARIES` | Use rapidjson | |
| <a name="enable-rdkafka"></a>[`ENABLE_RDKAFKA`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rdkafka.cmake#L1) | `ENABLE_LIBRARIES` | Enable kafka | |
| <a name="enable-rocksdb"></a>[`ENABLE_ROCKSDB`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rocksdb.cmake#L1) | `ENABLE_LIBRARIES` | Enable ROCKSDB | |
| <a name="enable-s"></a>[`ENABLE_S3`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/s3.cmake#L2) | `ENABLE_LIBRARIES` | Enable S3 | |
| <a name="enable-ssl"></a>[`ENABLE_SSL`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ssl.cmake#L3) | `ENABLE_LIBRARIES` | Enable ssl | Needed when securely connecting to an external server, e.g. clickhouse-client --host ... --secure |
| <a name="enable-stats"></a>[`ENABLE_STATS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/stats.cmake#L1) | `ENABLE_LIBRARIES` | Enalbe StatsLib library | |
### External libraries system/bundled mode
| Name | Default value | Description | Comment |
|------|---------------|-------------|---------|
| <a name="use-internal-avro-library"></a>[`USE_INTERNAL_AVRO_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/avro.cmake#L11) | `ON` | Set to FALSE to use system avro library instead of bundled | |
| <a name="use-internal-aws-s-library"></a>[`USE_INTERNAL_AWS_S3_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/s3.cmake#L14) | `ON` | Set to FALSE to use system S3 instead of bundled (experimental set to OFF on your own risk) | |
| <a name="use-internal-brotli-library"></a>[`USE_INTERNAL_BROTLI_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/brotli.cmake#L12) | `USE_STATIC_LIBRARIES` | Set to FALSE to use system libbrotli library instead of bundled | Many system ship only dynamic brotly libraries, so we back off to bundled by default |
| <a name="use-internal-capnp-library"></a>[`USE_INTERNAL_CAPNP_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/capnp.cmake#L10) | `NOT_UNBUNDLED` | Set to FALSE to use system capnproto library instead of bundled | |
| <a name="use-internal-curl"></a>[`USE_INTERNAL_CURL`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/curl.cmake#L10) | `NOT_UNBUNDLED` | Use internal curl library | |
| <a name="use-internal-grpc-library"></a>[`USE_INTERNAL_GRPC_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/grpc.cmake#L25) | `NOT_UNBUNDLED` | Set to FALSE to use system gRPC library instead of bundled. (Experimental. Set to OFF on your own risk) | Normally we use the internal gRPC framework. You can set USE_INTERNAL_GRPC_LIBRARY to OFF to force using the external gRPC framework, which should be installed in the system in this case. The external gRPC framework can be installed in the system by running sudo apt-get install libgrpc++-dev protobuf-compiler-grpc |
| <a name="use-internal-gtest-library"></a>[`USE_INTERNAL_GTEST_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/gtest.cmake#L3) | `NOT_UNBUNDLED` | Set to FALSE to use system Google Test instead of bundled | |
| <a name="use-internal-h-library"></a>[`USE_INTERNAL_H3_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/h3.cmake#L9) | `ON` | Set to FALSE to use system h3 library instead of bundled | |
| <a name="use-internal-hdfs-library"></a>[`USE_INTERNAL_HDFS3_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/hdfs3.cmake#L14) | `ON` | Set to FALSE to use system HDFS3 instead of bundled (experimental - set to OFF on your own risk) | |
| <a name="use-internal-icu-library"></a>[`USE_INTERNAL_ICU_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/icu.cmake#L15) | `NOT_UNBUNDLED` | Set to FALSE to use system ICU library instead of bundled | |
| <a name="use-internal-ldap-library"></a>[`USE_INTERNAL_LDAP_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ldap.cmake#L14) | `NOT_UNBUNDLED` | Set to FALSE to use system *LDAP library instead of bundled | |
| <a name="use-internal-libcxx-library"></a>[`USE_INTERNAL_LIBCXX_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/cxx.cmake#L15) | `USE_INTERNAL_LIBCXX_LIBRARY_DEFAULT` | Disable to use system libcxx and libcxxabi libraries instead of bundled | |
| <a name="use-internal-libgsasl-library"></a>[`USE_INTERNAL_LIBGSASL_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/libgsasl.cmake#L12) | `USE_STATIC_LIBRARIES` | Set to FALSE to use system libgsasl library instead of bundled | when USE_STATIC_LIBRARIES we usually need to pick up hell a lot of dependencies for libgsasl |
| <a name="use-internal-libxml-library"></a>[`USE_INTERNAL_LIBXML2_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/libxml2.cmake#L1) | `NOT_UNBUNDLED` | Set to FALSE to use system libxml2 library instead of bundled | |
| <a name="use-internal-llvm-library"></a>[`USE_INTERNAL_LLVM_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/llvm.cmake#L8) | `NOT_UNBUNDLED` | Use bundled or system LLVM library. | |
| <a name="use-internal-msgpack-library"></a>[`USE_INTERNAL_MSGPACK_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/msgpack.cmake#L10) | `NOT_UNBUNDLED` | Set to FALSE to use system msgpack library instead of bundled | |
| <a name="use-internal-mysql-library"></a>[`USE_INTERNAL_MYSQL_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/mysqlclient.cmake#L15) | `NOT_UNBUNDLED` | Set to FALSE to use system mysqlclient library instead of bundled | |
| <a name="use-internal-odbc-library"></a>[`USE_INTERNAL_ODBC_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/odbc.cmake#L22) | `NOT_UNBUNDLED` | Use internal ODBC library | |
| <a name="use-internal-orc-library"></a>[`USE_INTERNAL_ORC_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/orc.cmake#L11) | `ON` | Set to FALSE to use system ORC instead of bundled (experimental set to OFF on your own risk) | |
| <a name="use-internal-parquet-library"></a>[`USE_INTERNAL_PARQUET_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/parquet.cmake#L16) | `NOT_UNBUNDLED` | Set to FALSE to use system parquet library instead of bundled | |
| <a name="use-internal-poco-library"></a>[`USE_INTERNAL_POCO_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/poco.cmake#L1) | `ON` | Use internal Poco library | |
| <a name="use-internal-protobuf-library"></a>[`USE_INTERNAL_PROTOBUF_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/protobuf.cmake#L14) | `NOT_UNBUNDLED` | Set to FALSE to use system protobuf instead of bundled. (Experimental. Set to OFF on your own risk) | Normally we use the internal protobuf library. You can set USE_INTERNAL_PROTOBUF_LIBRARY to OFF to force using the external protobuf library, which should be installed in the system in this case. The external protobuf library can be installed in the system by running sudo apt-get install libprotobuf-dev protobuf-compiler libprotoc-dev |
| <a name="use-internal-rapidjson-library"></a>[`USE_INTERNAL_RAPIDJSON_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rapidjson.cmake#L9) | `NOT_UNBUNDLED` | Set to FALSE to use system rapidjson library instead of bundled | |
| <a name="use-internal-rdkafka-library"></a>[`USE_INTERNAL_RDKAFKA_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rdkafka.cmake#L10) | `NOT_UNBUNDLED` | Set to FALSE to use system librdkafka instead of the bundled | |
| <a name="use-internal-re-library"></a>[`USE_INTERNAL_RE2_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/re2.cmake#L1) | `NOT_UNBUNDLED` | Set to FALSE to use system re2 library instead of bundled [slower] | |
| <a name="use-internal-rocksdb-library"></a>[`USE_INTERNAL_ROCKSDB_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rocksdb.cmake#L10) | `NOT_UNBUNDLED` | Set to FALSE to use system ROCKSDB library instead of bundled | |
| <a name="use-internal-snappy-library"></a>[`USE_INTERNAL_SNAPPY_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/snappy.cmake#L10) | `NOT_UNBUNDLED` | Set to FALSE to use system snappy library instead of bundled | |
| <a name="use-internal-sparsehash-library"></a>[`USE_INTERNAL_SPARSEHASH_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/sparsehash.cmake#L1) | `ON` | Set to FALSE to use system sparsehash library instead of bundled | |
| <a name="use-internal-ssl-library"></a>[`USE_INTERNAL_SSL_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ssl.cmake#L12) | `NOT_UNBUNDLED` | Set to FALSE to use system *ssl library instead of bundled | |
| <a name="use-internal-zlib-library"></a>[`USE_INTERNAL_ZLIB_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/zlib.cmake#L1) | `NOT_UNBUNDLED` | Set to FALSE to use system zlib library instead of bundled | |
| <a name="use-internal-zstd-library"></a>[`USE_INTERNAL_ZSTD_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/zstd.cmake#L1) | `NOT_UNBUNDLED` | Set to FALSE to use system zstd library instead of bundled | |
### Other flags
| Name | Default value | Description | Comment |
|------|---------------|-------------|---------|
| <a name="add-gdb-index-for-gold"></a>[`ADD_GDB_INDEX_FOR_GOLD`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L195) | `OFF` | Add .gdb-index to resulting binaries for gold linker. | Ignored if `lld` is used |
| <a name="arch-native"></a>[`ARCH_NATIVE`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L248) | `OFF` | Add -march=native compiler flag | |
| <a name="clickhouse-split-binary"></a>[`CLICKHOUSE_SPLIT_BINARY`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L98) | `OFF` | Make several binaries (clickhouse-server, clickhouse-client etc.) instead of one bundled | |
| <a name="compiler-pipe"></a>[`COMPILER_PIPE`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L235) | `ON` | -pipe compiler option | Less `/tmp` usage, more RAM usage. |
| <a name="enable-check-heavy-builds"></a>[`ENABLE_CHECK_HEAVY_BUILDS`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L69) | `OFF` | Don't allow C++ translation units to compile too long or to take too much memory while compiling | |
| <a name="enable-fuzzing"></a>[`ENABLE_FUZZING`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L115) | `OFF` | Fuzzy testing using libfuzzer | Implies `WITH_COVERAGE` |
| <a name="enable-libraries"></a>[`ENABLE_LIBRARIES`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L357) | `ON` | Enable all external libraries by default | Turns on all external libs like s3, kafka, ODBC, ... |
| <a name="enable-multitarget-code"></a>[`ENABLE_MULTITARGET_CODE`](https://github.com/clickhouse/clickhouse/blob/master/src/Functions/CMakeLists.txt#L100) | `ON` | Enable platform-dependent code | ClickHouse developers may use platform-dependent code under some macro (e.g. `ifdef ENABLE_MULTITARGET`). If turned ON, this option defines such macro. See `src/Functions/TargetSpecific.h` |
| <a name="enable-tests"></a>[`ENABLE_TESTS`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L154) | `ON` | Provide unit_test_dbms target with Google.Test unit tests | If turned `ON`, assumes the user has either the system GTest library or the bundled one. |
| <a name="enable-thinlto"></a>[`ENABLE_THINLTO`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L313) | `ON` | Clang-specific link time optimization | https://clang.llvm.org/docs/ThinLTO.html Applies to clang only. Disabled when building with tests or sanitizers. |
| <a name="fail-on-unsupported-options-combination"></a>[`FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATION`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L32) | `ON` | Stop/Fail CMake configuration if some ENABLE_XXX option is defined (either ON or OFF) but is not possible to satisfy | If turned off: e.g. when ENABLE_FOO is ON, but FOO tool was not found, the CMake will continue. |
| <a name="glibc-compatibility"></a>[`GLIBC_COMPATIBILITY`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L159) | `ON` | Enable compatibility with older glibc libraries. | Only for Linux, x86_64. Implies `ENABLE_FASTMEMCPY` |
| <a name="linker-name"></a>[`LINKER_NAME`](https://github.com/clickhouse/clickhouse/blob/master/cmake/tools.cmake#L44) | `OFF` | Linker name or full path | Example values: `lld-10`, `gold`. |
| <a name="llvm-has-rtti"></a>[`LLVM_HAS_RTTI`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/llvm.cmake#L40) | `ON` | Enable if LLVM was build with RTTI enabled | |
| <a name="make-static-libraries"></a>[`MAKE_STATIC_LIBRARIES`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L91) | `USE_STATIC_LIBRARIES` | Disable to make shared libraries | |
| <a name="parallel-compile-jobs"></a>[`PARALLEL_COMPILE_JOBS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/limit_jobs.cmake#L10) | `""` | Maximum number of concurrent compilation jobs | 1 if not set |
| <a name="parallel-link-jobs"></a>[`PARALLEL_LINK_JOBS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/limit_jobs.cmake#L13) | `""` | Maximum number of concurrent link jobs | 1 if not set |
| <a name="sanitize"></a>[`SANITIZE`](https://github.com/clickhouse/clickhouse/blob/master/cmake/sanitize.cmake#L7) | `""` | Enable one of the code sanitizers | Possible values: - `address` (ASan) - `memory` (MSan) - `thread` (TSan) - `undefined` (UBSan) - "" (no sanitizing) |
| <a name="split-shared-libraries"></a>[`SPLIT_SHARED_LIBRARIES`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L96) | `OFF` | Keep all internal libraries as separate .so files | DEVELOPER ONLY. Faster linking if turned on. |
| <a name="strip-debug-symbols-functions"></a>[`STRIP_DEBUG_SYMBOLS_FUNCTIONS`](https://github.com/clickhouse/clickhouse/blob/master/src/Functions/CMakeLists.txt#L49) | `STRIP_DSF_DEFAULT` | Do not generate debugger info for ClickHouse functions | Provides faster linking and lower binary size. Tradeoff is the inability to debug some source files with e.g. gdb (empty stack frames and no local variables)." |
| <a name="unbundled"></a>[`UNBUNDLED`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L363) | `OFF` | Use system libraries instead of ones in contrib/ | We recommend avoiding this mode for production builds because we can't guarantee all needed libraries exist in your system. This mode exists for enthusiastic developers who are searching for trouble. Useful for maintainers of OS packages. |
| <a name="use-include-what-you-use"></a>[`USE_INCLUDE_WHAT_YOU_USE`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L418) | `OFF` | Automatically reduce unneeded includes in source code (external tool) | https://github.com/include-what-you-use/include-what-you-use |
| <a name="use-libcxx"></a>[`USE_LIBCXX`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/cxx.cmake#L1) | `NOT_UNBUNDLED` | Use libc++ and libc++abi instead of libstdc++ | |
| <a name="use-sentry"></a>[`USE_SENTRY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/sentry.cmake#L13) | `ENABLE_LIBRARIES` | Use Sentry | |
| <a name="use-simdjson"></a>[`USE_SIMDJSON`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/simdjson.cmake#L1) | `ENABLE_LIBRARIES` | Use simdjson | |
| <a name="use-snappy"></a>[`USE_SNAPPY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/snappy.cmake#L1) | `ENABLE_LIBRARIES` | Enable snappy library | |
| <a name="use-static-libraries"></a>[`USE_STATIC_LIBRARIES`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L90) | `ON` | Disable to use shared libraries | |
| <a name="use-unwind"></a>[`USE_UNWIND`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/unwind.cmake#L1) | `ENABLE_LIBRARIES` | Enable libunwind (better stacktraces) | |
| <a name="werror"></a>[`WERROR`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L373) | `OFF` | Enable -Werror compiler option | Using system libs can cause a lot of warnings in includes (on macro expansion). |
| <a name="weverything"></a>[`WEVERYTHING`](https://github.com/clickhouse/clickhouse/blob/master/cmake/warnings.cmake#L22) | `ON` | Enable -Weverything option with some exceptions. | Add some warnings that are not available even with -Wall -Wextra -Wpedantic. Intended for exploration of new compiler warnings that may be found useful. Applies to clang only |
| <a name="with-coverage"></a>[`WITH_COVERAGE`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L274) | `OFF` | Profile the resulting binary/binaries | Compiler-specific coverage flags e.g. -fcoverage-mapping for gcc |
## Developer's guide for adding new CMake options
### Don't be obvious. Be informative.
Bad:
```cmake
option (ENABLE_TESTS "Enables testing" OFF)
```
This description is quite useless as is neither gives the viewer any additional information nor explains the option purpose.
Better:
```cmake
option(ENABLE_TESTS "Provide unit_test_dbms target with Google.test unit tests" OFF)
```
If the option's purpose can't be guessed by its name, or the purpose guess may be misleading, or option has some
pre-conditions, leave a comment above the `option()` line and explain what it does.
The best way would be linking the docs page (if it exists).
The comment is parsed into a separate column (see below).
Even better:
```cmake
# implies ${TESTS_ARE_ENABLED}
# see tests/CMakeLists.txt for implementation detail.
option(ENABLE_TESTS "Provide unit_test_dbms target with Google.test unit tests" OFF)
```
### If the option's state could produce unwanted (or unusual) result, explicitly warn the user.
Suppose you have an option that may strip debug symbols from the ClickHouse's part.
This can speed up the linking process, but produces a binary that cannot be debugged.
In that case, prefer explicitly raising a warning telling the developer that he may be doing something wrong.
Also, such options should be disabled if applies.
Bad:
```cmake
option(STRIP_DEBUG_SYMBOLS_FUNCTIONS
"Do not generate debugger info for ClickHouse functions.
${STRIP_DSF_DEFAULT})
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
target_compile_options(clickhouse_functions PRIVATE "-g0")
endif()
```
Better:
```cmake
# Provides faster linking and lower binary size.
# Tradeoff is the inability to debug some source files with e.g. gdb
# (empty stack frames and no local variables)."
option(STRIP_DEBUG_SYMBOLS_FUNCTIONS
"Do not generate debugger info for ClickHouse functions."
${STRIP_DSF_DEFAULT})
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
message(WARNING "Not generating debugger info for ClickHouse functions")
target_compile_options(clickhouse_functions PRIVATE "-g0")
endif()
```
### In the option's description, explain WHAT the option does rather than WHY it does something.
The WHY explanation should be placed in the comment.
You may find that the option's name is self-descriptive.
Bad:
```cmake
option(ENABLE_THINLTO "Enable Thin LTO. Only applicable for clang. It's also suppressed when building with tests or sanitizers." ON)
```
Better:
```cmake
# Only applicable for clang.
# Turned off when building with tests or sanitizers.
option(ENABLE_THINLTO "Clang-specific link time optimisation" ON).
```
### Don't assume other developers know as much as you do.
In ClickHouse, there are many tools used that an ordinary developer may not know. If you are in doubt, give a link to
the tool's docs. It won't take much of your time.
Bad:
```cmake
option(ENABLE_THINLTO "Enable Thin LTO. Only applicable for clang. It's also suppressed when building with tests or sanitizers." ON)
```
Better (combined with the above hint):
```cmake
# https://clang.llvm.org/docs/ThinLTO.html
# Only applicable for clang.
# Turned off when building with tests or sanitizers.
option(ENABLE_THINLTO "Clang-specific link time optimisation" ON).
```
Other example, bad:
```cmake
option (USE_INCLUDE_WHAT_YOU_USE "Use 'include-what-you-use' tool" OFF)
```
Better:
```cmake
# https://github.com/include-what-you-use/include-what-you-use
option (USE_INCLUDE_WHAT_YOU_USE "Reduce unneeded #include s (external tool)" OFF)
```
### Prefer consistent default values.
CMake allows you to pass a plethora of values representing boolean `true/false`, e.g. `1, ON, YES, ...`.
Prefer the `ON/OFF` values, if possible.

View File

@ -52,7 +52,7 @@ Engines in the family:
- [ODBC](../../engines/table-engines/integrations/odbc.md#table-engine-odbc)
- [JDBC](../../engines/table-engines/integrations/jdbc.md#table-engine-jdbc)
- [HDFS](../../engines/table-engines/integrations/hdfs.md#hdfs)
- [S3](../../engines/table-engines/integrations/s3.md#table_engines-s3)
- [S3](../../engines/table-engines/integrations/s3.md#table-engine-s3)
### Special Engines {#special-engines}

View File

@ -3,23 +3,23 @@ toc_priority: 4
toc_title: S3
---
# S3 {#table_engines-s3}
# S3 Table Engine {#table-engine-s3}
This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ecosystem. This engine is similar
to the [HDFS](../../../engines/table-engines/integrations/hdfs.md#table_engines-hdfs) engine, but provides S3-specific features.
This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ecosystem. This engine is similar to the [HDFS](../../../engines/table-engines/special/file.md#table_engines-hdfs) engine, but provides S3-specific features.
## Usage {#usage}
## Create Table {#creating-a-table}
```sql
``` sql
CREATE TABLE s3_engine_table (name String, value UInt32)
ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression])
```
**Input parameters**
**Engine parameters**
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: *, ?, {abc,def} and {N..M} where N, M — numbers, `abc, def — strings.
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path).
- `format` — The [format](../../../interfaces/formats.md#formats) of the file.
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression`Parameter is optional. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. By default, it will autodetect compression by file extension.
- `compression`Compression type. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Parameter is optional. By default, it will autodetect compression by file extension.
**Example:**
@ -47,6 +47,12 @@ SELECT * FROM s3_engine_table LIMIT 2
│ two │ 2 │
└──────┴───────┘
```
## Virtual columns {#virtual-columns}
- `_path` — Path to the file.
- `_file` — Name of the file.
For more information about virtual columns see [here](../../../engines/table-engines/index.md#table_engines-virtual_columns).
## Implementation Details {#implementation-details}
@ -56,9 +62,9 @@ SELECT * FROM s3_engine_table LIMIT 2
- Indexes.
- Replication.
**Globs in path**
## Wildcards In Path {#wildcards-in-path}
Multiple path components can have globs. For being processed file should exist and match to the whole path pattern. Listing of files determines during `SELECT` (not at `CREATE` moment).
`path` argument can specify multiple files using bash-like wildcards. For being processed file should exist and match to the whole path pattern. Listing of files is determined during `SELECT` (not at `CREATE` moment).
- `*` — Substitutes any number of any characters except `/` including empty string.
- `?` — Substitutes any single character.
@ -67,80 +73,29 @@ Multiple path components can have globs. For being processed file should exist a
Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function.
**Example**
1. Suppose we have several files in CSV format with the following URIs on S3:
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv
2. There are several ways to make a table consisting of all six files:
<!-- -->
```sql
CREATE TABLE table_with_range (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV')
```
3. Another way:
```sql
CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV')
```
4. Table consists of all the files in both directories (all files should satisfy format and schema described in query):
```sql
CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV')
```
!!! warning "Warning"
If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
**Example**
Create table with files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
```sql
CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV')
```
## Virtual Columns {#virtual-columns}
- `_path` — Path to the file.
- `_file` — Name of the file.
**See Also**
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
## S3-related settings {#settings}
## S3-related Settings {#s3-settings}
The following settings can be set before query execution or placed into configuration file.
- `s3_max_single_part_upload_size`Default value is `64Mb`. The maximum size of object to upload using singlepart upload to S3.
- `s3_min_upload_part_size`Default value is `512Mb`. The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html).
- `s3_max_redirects`Default value is `10`. Max number of HTTP redirects S3 hops allowed.
- `s3_max_single_part_upload_size` — The maximum size of object to upload using singlepart upload to S3. Default value is `64Mb`.
- `s3_min_upload_part_size` — The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Default value is `512Mb`.
- `s3_max_redirects` — Max number of S3 redirects hops allowed. Default value is `10`.
Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration.
### Endpoint-based settings {#endpointsettings}
## Endpoint-based Settings {#endpoint-settings}
The following settings can be specified in configuration file for given endpoint (which will be matched by exact prefix of a URL):
- `endpoint`Mandatory. Specifies prefix of an endpoint.
- `access_key_id` and `secret_access_key`Optional. Specifies credentials to use with given endpoint.
- `use_environment_credentials`Optional, default value is `false`. If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint.
- `header`Optional, can be speficied multiple times. Adds specified HTTP header to a request to given endpoint.
- `server_side_encryption_customer_key_base64`Optional. If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
- `endpoint` — Specifies prefix of an endpoint. Mandatory.
- `access_key_id` and `secret_access_key` — Specifies credentials to use with given endpoint. Optional.
- `use_environment_credentials` — If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint. Optional, default value is `false`.
- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be speficied multiple times.
- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. Optional.
Example:
**Example:**
```
``` xml
<s3>
<endpoint-name>
<endpoint>https://storage.yandexcloud.net/my-test-bucket-768/</endpoint>
@ -152,5 +107,50 @@ Example:
</endpoint-name>
</s3>
```
## Usage {#usage-examples}
Suppose we have several files in TSV format with the following URIs on HDFS:
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv'
1. There are several ways to make a table consisting of all six files:
``` sql
CREATE TABLE table_with_range (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV');
```
2. Another way:
``` sql
CREATE TABLE table_with_question_mark (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV');
```
3. Table consists of all the files in both directories (all files should satisfy format and schema described in query):
``` sql
CREATE TABLE table_with_asterisk (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV');
```
!!! warning "Warning"
If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
4. Create table with files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
``` sql
CREATE TABLE big_table (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV');
```
## See also
- [S3 table function](../../../sql-reference/table-functions/s3.md)
[Original article](https://clickhouse.tech/docs/en/engines/table-engines/integrations/s3/) <!--hide-->

View File

@ -123,5 +123,6 @@ toc_title: Adopters
| <a href="https://mkb.ru/" class="favicon">МКБ</a> | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) |
| <a href="https://cft.ru/" class="favicon">ЦФТ</a> | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) |
| <a href="https://www.kakaocorp.com/" class="favicon">kakaocorp</a> | Internet company | — | — | — | [if(kakao)2020 conference](https://if.kakao.com/session/117) |
| <a href="https://www.tesla.com/" class="favicon">Tesla</a> | Electric vehicle and clean energy company | — | — | — | [Vacancy description, March 2021](https://news.ycombinator.com/item?id=26306170) |
[Original article](https://clickhouse.tech/docs/en/introduction/adopters/) <!--hide-->

View File

@ -11,5 +11,6 @@ ClickHouse supports authenticating and managing users using external services.
The following external authenticators and directories are supported:
- [LDAP](./ldap.md#external-authenticators-ldap) [Authenticator](./ldap.md#ldap-external-authenticator) and [Directory](./ldap.md#ldap-external-user-directory)
- Kerberos [Authenticator](./kerberos.md#external-authenticators-kerberos)
[Original article](https://clickhouse.tech/docs/en/operations/external-authenticators/index/) <!--hide-->

View File

@ -0,0 +1,115 @@
# Kerberos {#external-authenticators-kerberos}
Existing and properly configured ClickHouse users can be authenticated via Kerberos authentication protocol.
Currently, Kerberos can only be used as an external authenticator for existing users, which are defined in `users.xml` or in local access control paths. Those users may only use HTTP requests and must be able to authenticate using GSS-SPNEGO mechanism.
For this approach, Kerberos must be configured in the system and must be enabled in ClickHouse config.
## Enabling Kerberos in ClickHouse {#enabling-kerberos-in-clickhouse}
To enable Kerberos, one should include `kerberos` section in `config.xml`. This section may contain additional parameters.
#### Parameters:
- `principal` - canonical service principal name that will be acquired and used when accepting security contexts.
- This parameter is optional, if omitted, the default principal will be used.
- `realm` - a realm, that will be used to restrict authentication to only those requests whose initiator's realm matches it.
- This parameter is optional, if omitted, no additional filtering by realm will be applied.
Example (goes into `config.xml`):
```xml
<yandex>
<!- ... -->
<kerberos />
</yandex>
```
With principal specification:
```xml
<yandex>
<!- ... -->
<kerberos>
<principal>HTTP/clickhouse.example.com@EXAMPLE.COM</principal>
</kerberos>
</yandex>
```
With filtering by realm:
```xml
<yandex>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</yandex>
```
!!! warning "Note"
You can define only one `kerberos` section. The presence of multiple `kerberos` sections will force ClickHouse to disable Kerberos authentication.
!!! warning "Note"
`principal` and `realm` sections cannot be specified at the same time. The presence of both `principal` and `realm` sections will force ClickHouse to disable Kerberos authentication.
## Kerberos as an external authenticator for existing users {#kerberos-as-an-external-authenticator-for-existing-users}
Kerberos can be used as a method for verifying the identity of locally defined users (users defined in `users.xml` or in local access control paths). Currently, **only** requests over the HTTP interface can be *kerberized* (via GSS-SPNEGO mechanism).
Kerberos principal name format usually follows this pattern:
- *primary/instance@REALM*
The */instance* part may occur zero or more times. **The *primary* part of the canonical principal name of the initiator is expected to match the kerberized user name for authentication to succeed**.
### Enabling Kerberos in `users.xml` {#enabling-kerberos-in-users-xml}
In order to enable Kerberos authentication for the user, specify `kerberos` section instead of `password` or similar sections in the user definition.
Parameters:
- `realm` - a realm that will be used to restrict authentication to only those requests whose initiator's realm matches it.
- This parameter is optional, if omitted, no additional filtering by realm will be applied.
Example (goes into `users.xml`):
```xml
<yandex>
<!- ... -->
<users>
<!- ... -->
<my_user>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</my_user>
</users>
</yandex>
```
!!! warning "Warning"
Note that Kerberos authentication cannot be used alongside with any other authentication mechanism. The presence of any other sections like `password` alongside `kerberos` will force ClickHouse to shutdown.
!!! info "Reminder"
Note, that now, once user `my_user` uses `kerberos`, Kerberos must be enabled in the main `config.xml` file as described previously.
### Enabling Kerberos using SQL {#enabling-kerberos-using-sql}
When [SQL-driven Access Control and Account Management](../access-rights.md#access-control) is enabled in ClickHouse, users identified by Kerberos can also be created using SQL statements.
```sql
CREATE USER my_user IDENTIFIED WITH kerberos REALM 'EXAMPLE.COM'
```
...or, without filtering by realm:
```sql
CREATE USER my_user IDENTIFIED WITH kerberos
```

View File

@ -1883,6 +1883,53 @@ Possible values:
Default value: `0`.
## insert_shard_id {#insert_shard_id}
If not `0`, specifies the shard of [Distributed](../../engines/table-engines/special/distributed.md#distributed) table into which the data will be inserted synchronously.
If `insert_shard_id` value is incorrect, the server will throw an exception.
To get the number of shards on `requested_cluster`, you can check server config or use this query:
``` sql
SELECT uniq(shard_num) FROM system.clusters WHERE cluster = 'requested_cluster';
```
Possible values:
- 0 — Disabled.
- Any number from `1` to `shards_num` of corresponding [Distributed](../../engines/table-engines/special/distributed.md#distributed) table.
Default value: `0`.
**Example**
Query:
```sql
CREATE TABLE x AS system.numbers ENGINE = MergeTree ORDER BY number;
CREATE TABLE x_dist AS x ENGINE = Distributed('test_cluster_two_shards_localhost', currentDatabase(), x);
INSERT INTO x_dist SELECT * FROM numbers(5) SETTINGS insert_shard_id = 1;
SELECT * FROM x_dist ORDER BY number ASC;
```
Result:
``` text
┌─number─┐
│ 0 │
│ 0 │
│ 1 │
│ 1 │
│ 2 │
│ 2 │
│ 3 │
│ 3 │
│ 4 │
│ 4 │
└────────┘
```
## use_compact_format_in_distributed_parts_names {#use_compact_format_in_distributed_parts_names}
Uses compact format for storing blocks for async (`insert_distributed_sync`) INSERT into tables with `Distributed` engine.

View File

@ -9,7 +9,7 @@ Columns:
- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — the number of times this error has been happened.
- `last_error_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — time when the last error happened.
- `last_error_message` ([String](../../sql-reference/data-types/string.md)) — message for the last error.
- `last_error_stacktrace` ([String](../../sql-reference/data-types/string.md)) — stacktrace for the last error.
- `last_error_trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — A [stack trace](https://en.wikipedia.org/wiki/Stack_trace) which represents a list of physical addresses where the called methods are stored.
- `remote` ([UInt8](../../sql-reference/data-types/int-uint.md)) — remote exception (i.e. received during one of the distributed query).
**Example**
@ -25,3 +25,12 @@ LIMIT 1
│ CANNOT_OPEN_FILE │ 76 │ 1 │
└──────────────────┴──────┴───────┘
```
``` sql
WITH arrayMap(x -> demangle(addressToSymbol(x)), last_error_trace) AS all
SELECT name, arrayStringConcat(all, '\n') AS res
FROM system.errors
LIMIT 1
SETTINGS allow_introspection_functions=1\G
```

View File

@ -17,5 +17,3 @@ Columns:
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of rows read from all tables and table functions participated in queries.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of bytes read from all tables and table functions participated in queries.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum of the query execution time, in seconds.
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/quota_limits) <!--hide-->

View File

@ -28,5 +28,3 @@ Columns:
## See Also {#see-also}
- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement)
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/quota_usage) <!--hide-->

View File

@ -30,6 +30,4 @@ Columns:
## See Also {#see-also}
- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement)
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/quotas_usage) <!--hide-->
- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement)

View File

@ -4,16 +4,66 @@ toc_priority: 141
# deltaSum {#agg_functions-deltasum}
Syntax: `deltaSum(value)`
Sums the arithmetic difference between consecutive rows. If the difference is negative, it is ignored.
Adds the differences between consecutive rows. If the difference is negative, it is ignored.
`value` must be some integer or floating point type.
**Syntax**
Example:
```sql
select deltaSum(arrayJoin([1, 2, 3])); -- => 2
select deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3])); -- => 7
select deltaSum(arrayJoin([2.25, 3, 4.5])); -- => 2.25
``` sql
deltaSum(value)
```
**Arguments**
- `value` — Input values, must be [Integer](../../data-types/int-uint.md) or [Float](../../data-types/float.md) type.
**Returned value**
- A gained arithmetic difference of the `Integer` or `Float` type.
**Examples**
Query:
``` sql
SELECT deltaSum(arrayJoin([1, 2, 3]));
```
Result:
``` text
┌─deltaSum(arrayJoin([1, 2, 3]))─┐
│ 2 │
└────────────────────────────────┘
```
Query:
``` sql
SELECT deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3]));
```
Result:
``` text
┌─deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3]))─┐
│ 7 │
└───────────────────────────────────────────────┘
```
Query:
``` sql
SELECT deltaSum(arrayJoin([2.25, 3, 4.5]));
```
Result:
``` text
┌─deltaSum(arrayJoin([2.25, 3, 4.5]))─┐
│ 2.25 │
└─────────────────────────────────────┘
```
## See Also {#see-also}
- [runningDifference](../../functions/other-functions.md#other_functions-runningdifference)

View File

@ -36,4 +36,4 @@ For the default user limit the maximum execution time with half a second in 30 m
``` sql
ALTER QUOTA IF EXISTS qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
```
```

View File

@ -18,7 +18,7 @@ CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
```
Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
Parameters `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.

View File

@ -62,7 +62,7 @@ Note that materialized view is influenced by [optimize_on_insert](../../../opera
Views look the same as normal tables. For example, they are listed in the result of the `SHOW TABLES` query.
There isnt a separate query for deleting views. To delete a view, use [DROP TABLE](../../../sql-reference/statements/drop.md).
To delete a view, use [DROP VIEW](../../../sql-reference/statements/drop.md#drop-view). Although `DROP TABLE` works for VIEWs as well.
## Live View (Experimental) {#live-view}

View File

@ -3,17 +3,19 @@ toc_priority: 45
toc_title: s3
---
# s3 {#s3}
# S3 Table Function {#s3-table-function}
Provides table-like interface to select/insert files in S3. This table function is similar to [hdfs](../../sql-reference/table-functions/hdfs.md).
Provides table-like interface to select/insert files in [Amazon S3](https://aws.amazon.com/s3/). This table function is similar to [hdfs](../../sql-reference/table-functions/hdfs.md), but provides S3-specific features.
**Syntax**
``` sql
s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression])
```
**Input parameters**
**Arguments**
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: *, ?, {abc,def} and {N..M} where N, M — numbers, `abc, def — strings.
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression` — Parameter is optional. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. By default, it will autodetect compression by file extension.
@ -22,14 +24,14 @@ s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compres
A table with the specified structure for reading or writing data in the specified file.
**Example**
**Examples**
Table from S3 file `https://storage.yandexcloud.net/my-test-bucket-768/data.csv` and selection of the first two rows from it:
Selecting the first two rows from the table from S3 file `https://storage.yandexcloud.net/my-test-bucket-768/data.csv`:
``` sql
SELECT *
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32')
LIMIT 2
LIMIT 2;
```
``` text
@ -44,7 +46,7 @@ The similar but from file with `gzip` compression:
``` sql
SELECT *
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip')
LIMIT 2
LIMIT 2;
```
``` text
@ -54,33 +56,20 @@ LIMIT 2
└─────────┴─────────┴─────────┘
```
**Globs in path**
## Usage {#usage-examples}
Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix).
Suppose that we have several files with following URIs on S3:
- `*` — Substitutes any number of any characters except `/` including empty string.
- `?` — Substitutes any single character.
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
- `{N..M}` — Substitutes any number in range from N to M including both borders. N and M can have leading zeroes e.g. `000..078`.
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_4.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_4.csv'
Constructions with `{}` are similar to the [remote table function](../../sql-reference/table-functions/remote.md)).
**Example**
1. Suppose that we have several files with following URIs on S3:
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_4.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_4.csv
2. Query the amount of rows in files end with number from 1 to 3:
<!-- -->
Count the amount of rows in files ending with numbers from 1 to 3:
``` sql
SELECT count(*)
@ -93,9 +82,7 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi
└─────────┘
```
3. Query the amount of rows in all files of these two directories:
<!-- -->
Count the total amount of rows in all files in these two directories:
``` sql
SELECT count(*)
@ -108,17 +95,14 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi
└─────────┘
```
!!! warning "Warning"
If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
**Example**
Query the data from files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
``` sql
SELECT count(*)
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32')
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32');
```
``` text
@ -127,42 +111,22 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000
└─────────┘
```
**Data insert**
The S3 table function may be used for data insert as well.
**Example**
Insert a data into file `test-data.csv.gz`:
Insert data into file `test-data.csv.gz`:
``` sql
INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
VALUES ('test-data', 1), ('test-data-2', 2)
VALUES ('test-data', 1), ('test-data-2', 2);
```
Insert a data into file `test-data.csv.gz` from existing table:
Insert data into file `test-data.csv.gz` from existing table:
``` sql
INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
SELECT name, value FROM existing_table
SELECT name, value FROM existing_table;
```
## Virtual Columns {#virtual-columns}
- `_path` — Path to the file.
- `_file` — Name of the file.
## S3-related settings {#settings}
The following settings can be set before query execution or placed into configuration file.
- `s3_max_single_part_upload_size` — Default value is `64Mb`. The maximum size of object to upload using singlepart upload to S3.
- `s3_min_upload_part_size` — Default value is `512Mb`. The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html).
- `s3_max_redirects` — Default value is `10`. Max number of S3 redirects hops allowed.
Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration.
**See Also**
- [Virtual columns](../../engines/table-engines/index.md#table_engines-virtual_columns)
- [S3 engine](../../engines/table-engines/integrations/s3.md)
[Original article](https://clickhouse.tech/docs/en/sql-reference/table-functions/s3/) <!--hide-->

View File

@ -3,144 +3,92 @@ toc_priority: 4
toc_title: S3
---
# S3 {#table_engines-s3}
# Движок таблиц S3 {#table-engine-s3}
Этот движок обеспечивает интеграцию с экосистемой [Amazon S3](https://aws.amazon.com/s3/). Этот движок похож на
движок [HDFS](../../../engines/table-engines/integrations/hdfs.md#table_engines-hdfs), но предоставляет S3-специфичные функции.
Этот движок обеспечивает интеграцию с экосистемой [Amazon S3](https://aws.amazon.com/s3/). Он похож на движок [HDFS](../../../engines/table-engines/special/file.md#table_engines-hdfs), но обеспечивает специфические для S3 возможности.
## Использование {#usage}
## Создание таблицы {#creating-a-table}
```sql
``` sql
CREATE TABLE s3_engine_table (name String, value UInt32)
ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression])
```
**Параметры**
**Параметры движка**
- `path` — URL ссылающийся на файл расположенный в S3. В режиме для чтения можно читать несколько файлов как один, поддерживаются следующие шаблоны для указания маски пути к файлам: *, ?, {abc,def} и {N..M} где N, M — числа, `abc, def — строки.
- `format` — [Формат](../../../interfaces/formats.md#formats) файла.
- `structure`Структура таблицы. Формат `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression`Алгоритм сжатия, не обязятельный параметр. Поддерживаемые значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. По умолчанию, алгоритм сжатия будет автоматически применен в зависимости от расширения в имени файла.
- `path` — URL-адрес бакета с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": `*`, `?`, `{abc,def}` и `{N..M}` где `N`, `M` — числа, `'abc'`, `'def'` — строки. Подробнее смотри [ниже](#wildcards-in-path).
- `format` — [формат](../../../interfaces/formats.md#formats) файла.
- `structure`структура таблицы в формате `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression`тип сжатия. Возможные значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Необязательный параметр. Если не указано, то тип сжатия определяется автоматически по расширению файла.
**Пример:**
**Пример**
**1.** Создание таблицы `s3_engine_table` :
```sql
CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE=S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
``` sql
CREATE TABLE s3_engine_table (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip');
INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3);
SELECT * FROM s3_engine_table LIMIT 2;
```
**2.** Заполнение файла:
```sql
INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3)
```
**3.** Запрос данных:
```sql
SELECT * FROM s3_engine_table LIMIT 2
```
```text
``` text
┌─name─┬─value─┐
│ one │ 1 │
│ two │ 2 │
└──────┴───────┘
```
## Виртуальные столбцы {#virtual-columns}
- `_path` — путь к файлу.
- `_file` — имя файла.
Подробнее про виртуальные столбцы можно прочитать [здесь](../../../engines/table-engines/index.md#table_engines-virtual_columns).
## Детали реализации {#implementation-details}
- Чтение и запись могут быть одновременными и паралельными
- Не поддерживается:
- `ALTER` и `SELECT...SAMPLE` операции.
- Индексы.
- Репликация.
- Чтение и запись могут быть параллельными.
- Не поддерживаются:
- запросы `ALTER` и `SELECT...SAMPLE`,
- индексы,
- репликация.
**Поддержка шаблонов в параметре path**
## Символы подстановки {#wildcards-in-path}
Множество частей параметра `path` поддерживает шаблоны. Для того чтобы быть обработанным файл должен присутствовать в S3 и соответсвовать шаблону. Списки файлов определяются в момент `SELECT` (но не в момент `CREATE`).
Аргумент `path` может указывать на несколько файлов, используя подстановочные знаки. Для обработки файл должен существовать и соответствовать всему шаблону пути. Список файлов определяется во время выполнения запроса `SELECT` (не в момент выполнения запроса `CREATE`).
- `*`Заменяет любой количество любых символов кроме `/` включая пустые строки.
- `?`Заменяет один символ.
- `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`.
- `{N..M}`Заменяет любое числов в диапозоне от N до M включительно. N и M могут иметь лидирующие нули например `000..078`.
- `*`заменяет любое количество любых символов, кроме `/`, включая пустую строку.
- `?`заменяет любые одиночные символы.
- `{some_string, another_string, yet_another_one}` — заменяет любые строки `'some_string', 'another_string', 'yet_another_one'`.
- `{N..M}`заменяет любое число от N до M, включая обе границы. N и M могут иметь ведущие нули, например `000..078`.
Конструкции с`{}` работают также как в табличной функции [remote](../../../sql-reference/table-functions/remote.md).
Конструкции с `{}` аналогичны функции [remote](../../../sql-reference/table-functions/remote.md).
## Настройки движка S3 {#s3-settings}
Перед выполнением запроса или в конфигурационном файле могут быть установлены следующие настройки:
- `s3_max_single_part_upload_size` — максимальный размер объекта для загрузки с использованием однокомпонентной загрузки в S3. Значение по умолчанию — `64 Mб`.
- `s3_min_upload_part_size` — минимальный размер объекта для загрузки при многокомпонентной загрузке в [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Значение по умолчанию — `512 Mб`.
- `s3_max_redirects` — максимальное количество разрешенных переадресаций S3. Значение по умолчанию — `10`.
Соображение безопасности: если злонамеренный пользователь попробует указать произвольные URL-адреса S3, параметр `s3_max_redirects` должен быть установлен в ноль, чтобы избежать атак [SSRF] (https://en.wikipedia.org/wiki/Server-side_request_forgery). Как альтернатива, в конфигурации сервера должен быть указан `remote_host_filter`.
## Настройки конечных точек {#endpoint-settings}
Для конечной точки (которая соответствует точному префиксу URL-адреса) в конфигурационном файле могут быть заданы следующие настройки:
Обязательная настройка:
- `endpoint` — указывает префикс конечной точки.
Необязательные настройки:
- `access_key_id` и `secret_access_key` — указывают учетные данные для использования с данной конечной точкой.
- `use_environment_credentials` — если `true`, S3-клиент будет пытаться получить учетные данные из переменных среды и метаданных Amazon EC2 для данной конечной точки. Значение по умолчанию - `false`.
- `header` — добавляет указанный HTTP-заголовок к запросу на заданную конечную точку. Может быть определен несколько раз.
- `server_side_encryption_customer_key_base64` — устанавливает необходимые заголовки для доступа к объектам S3 с шифрованием SSE-C.
**Пример**
1. Предположим у нас есть некоторые файлы в CSV формате со следующими URIs в S3:
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv
2. Есть несколько способов сделать таблицу состяющую из всех шести файлов:
<!-- -->
```sql
CREATE TABLE table_with_range (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV')
```
3. Другой способ:
```sql
CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV')
```
4. Таблица состоящая из всех файлах в обоих каталогах (все файлы должны удовлетворять формату и схеме описанными в запросе):
```sql
CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV')
```
!!! warning "Предупреждение"
Если список файлов содержит диапозоны номеров с ведующими нулями, используйте конструкции со скобками для каждой цифры или используйте `?`.
**Пример**
Создание таблицы с именами файлов `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
```sql
CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV')
```
## Виртуальные колонки {#virtual-columns}
- `_path` — Path to the file.
- `_file` — Name of the file.
**Смотри также**
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
## S3-специфичные настройки {#settings}
Следующие настройки могут быть заданы при запуске запроса или установлены в конфигурационном файле для пользовательского профиля.
- `s3_max_single_part_upload_size` — По умолчанию `64Mb`. Максикальный размер куска данных для загрузки в S3 как singlepart.
- `s3_min_upload_part_size` — По умолчанию `512Mb`. Минимальный размер куска данных для загрузки в S3 с помощью [S3 Multipart загрузки](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html).
- `s3_max_redirects` — Значение по умолчанию `10`. Максимально допустимое количество HTTP перенаправлений от серверов S3.
Примечания для безопасности: если злоумышленник может указать произвольные ссылки на S3, то лучше выставить `s3_max_redirects` как ноль для избежания атак типа [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) ; или ограничить с помощью `remote_host_filter` список адресов по которым возможно взаимодействие с S3.
### Настройки специфичные для заданной конечной точки {#endpointsettings}
Следующие настройки могут быть указаны в конфигурационном файле для заданной конечной точки (которой будет сопоставлен точный конечный префик URL):
- `endpoint` — Обязательный параметр. Указывает префикс URL для конечной точки.
- `access_key_id` и `secret_access_key`Не обязательно. Задает параметры авторизации для заданной конечной точки.
- `use_environment_credentials`Не обязательный параметр, значение по умолчанию `false`. Если установлено как `true`, S3 клиент будет пытаться получить параметры авторизации из переменных окружения и Amazon EC2 метаданных для заданной конечной точки.
- `header`Не обязательный параметр, может быть указан несколько раз. Добавляет указанный HTTP заголовок к запросу для заданной в `endpoint` URL префикса.
- `server_side_encryption_customer_key_base64`Не обязательный параметр. Если указан, к запросам будут указаны заголовки необходимые для доступа к S3 объектам с SSE-C шифрованием.
Пример:
```
``` xml
<s3>
<endpoint-name>
<endpoint>https://storage.yandexcloud.net/my-test-bucket-768/</endpoint>
@ -153,3 +101,50 @@ CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.
</s3>
```
## Примеры использования {#usage-examples}
Предположим, у нас есть несколько файлов в формате TSV со следующими URL-адресами в HDFS:
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv'
1. Существует несколько способов создать таблицу, включающую в себя все шесть файлов:
``` sql
CREATE TABLE table_with_range (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV');
```
2. Другой способ:
``` sql
CREATE TABLE table_with_question_mark (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV');
```
3. Таблица содержит все файлы в обоих каталогах (все файлы должны соответствовать формату и схеме, описанным в запросе):
``` sql
CREATE TABLE table_with_asterisk (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV');
```
!!! warning "Warning"
Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`.
4. Создание таблицы из файлов с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
``` sql
CREATE TABLE big_table (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV');
```
**Смотрите также**
- [Табличная функция S3](../../../sql-reference/table-functions/s3.md)
[Оригинальная статья](https://clickhouse.tech/docs/ru/engines/table-engines/integrations/s3/) <!--hide-->

View File

@ -11,5 +11,6 @@ ClickHouse поддерживает аутентификацию и управл
Поддерживаются следующие внешние аутентификаторы и каталоги:
- [LDAP](./ldap.md#external-authenticators-ldap) [аутентификатор](./ldap.md#ldap-external-authenticator) и [каталог](./ldap.md#ldap-external-user-directory)
- Kerberos [аутентификатор](./kerberos.md#external-authenticators-kerberos)
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/external-authenticators/index/) <!--hide-->

View File

@ -0,0 +1,118 @@
# Kerberos {#external-authenticators-kerberos}
ClickHouse предоставляет возможность аутентификации существующих (и правильно сконфигурированных) пользователей с использованием Kerberos.
В настоящее время возможно использование Kerberos только как внешнего аутентификатора, то есть для аутентификации уже существующих пользователей с помощью Kerberos. Пользователи, настроенные для Kerberos-аутентификации, могут работать с ClickHouse только через HTTP-интерфейс, причём сами клиенты должны иметь возможность аутентификации с использованием механизма GSS-SPNEGO.
!!! info "!!!"
Для Kerberos-аутентификации необходимо предварительно корректно настроить Kerberos на стороне клиента, на сервере и в конфигурационных файлах самого ClickHouse. Ниже описана лишь конфигурация ClickHouse.
## Настройка Kerberos в ClickHouse {#enabling-kerberos-in-clickhouse}
Для того, чтобы задействовать Kerberos-аутентификацию в ClickHouse, в первую очередь необходимо добавить одну-единственную секцию `kerberos` в `config.xml`.
В секции могут быть указаны дополнительные параметры:
- `principal` &mdash; задаёт имя принципала (canonical service principal name, SPN), используемое при авторизации ClickHouse на Kerberos-сервере.
- Это опциональный параметр, при его отсутствии будет использовано стандартное имя.
- `realm` &mdash; обеспечивает фильтрацию по реалм (realm). Пользователям, чей реалм не совпадает с указанным, будет отказано в аутентификации.
- Это опциональный параметр, при его отсутствии фильтр по реалм применяться не будет.
Примеры, как должен выглядеть файл `config.xml`:
```xml
<yandex>
<!- ... -->
<kerberos />
</yandex>
```
Или, с указанием принципала:
```xml
<yandex>
<!- ... -->
<kerberos>
<principal>HTTP/clickhouse.example.com@EXAMPLE.COM</principal>
</kerberos>
</yandex>
```
Или, с фильтрацией по реалм:
```xml
<yandex>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</yandex>
```
!!! Warning "Важно"
В конфигурационном файле не могут быть указаны одновременно оба параметра. В противном случае, аутентификация с помощью Kerberos будет недоступна для всех пользователей.
!!! Warning "Важно"
В конфигурационном файле может быть не более одной секции `kerberos`. В противном случае, аутентификация с помощью Kerberos будет отключена для всех пользователей.
## Аутентификация пользователей с помощью Kerberos {#kerberos-as-an-external-authenticator-for-existing-users}
Уже существующие пользователи могут воспользоваться аутентификацией с помощью Kerberos. Однако, Kerberos-аутентификация возможна только при использовании HTTP-интерфейса.
Имя принципала (principal name) обычно имеет вид:
- *primary/instance@REALM*
Для успешной аутентификации необходимо, чтобы *primary* совпало с именем пользователя ClickHouse, настроенного для использования Kerberos.
### Настройка Kerberos в `users.xml` {#enabling-kerberos-in-users-xml}
Для того, чтобы пользователь имел возможность производить аутентификацию с помощью Kerberos, достаточно включить секцию `kerberos` в описание пользователя в `users.xml` (например, вместо секции `password` или аналогичной ей).
В секции могут быть указаны дополнительные параметры:
- `realm` &mdash; обеспечивает фильтрацию по реалм (realm): аутентификация будет возможна только при совпадении реалм клиента с указанным.
- Этот параметр является опциональным, при его отсутствии фильтрация применяться не будет.
Пример, как выглядит конфигурация Kerberos в `users.xml`:
```xml
<yandex>
<!- ... -->
<users>
<!- ... -->
<my_user>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</my_user>
</users>
</yandex>
```
!!! Warning "Важно"
Если пользователь настроен для Kerberos-аутентификации, другие виды уатентификации будут для него недоступны. Если наряду с `kerberos` в определении пользователя будет указан какой-либо другой способ аутентификации, ClickHouse завершит работу.
!!! info ""
Ещё раз отметим, что кроме `users.xml`, необходимо также включить Kerberos в `config.xml`.
### Настройка Kerberos через SQL {#enabling-kerberos-using-sql}
Пользователей, использующих Kerberos-аутентификацию, можно создать не только с помощью изменения конфигурационных файлов.
Если SQL-ориентированное управление доступом включено в ClickHouse, можно также создать пользователя, работающего через Kerberos, с помощью SQL.
```sql
CREATE USER my_user IDENTIFIED WITH kerberos REALM 'EXAMPLE.COM'
```
Или, без фильтрации по реалм:
```sql
CREATE USER my_user IDENTIFIED WITH kerberos
```

View File

@ -29,6 +29,8 @@ toc_title: "Квоты"
<!-- Без ограничений. Просто считать соответствующие данные за указанный интервал. -->
<queries>0</queries>
<query_selects>0</query_selects>
<query_inserts>0</query_inserts>
<errors>0</errors>
<result_rows>0</result_rows>
<read_rows>0</read_rows>
@ -48,6 +50,8 @@ toc_title: "Квоты"
<duration>3600</duration>
<queries>1000</queries>
<query_selects>100</query_selects>
<query_inserts>100</query_inserts>
<errors>100</errors>
<result_rows>1000000000</result_rows>
<read_rows>100000000000</read_rows>
@ -58,6 +62,8 @@ toc_title: "Квоты"
<duration>86400</duration>
<queries>10000</queries>
<query_selects>10000</query_selects>
<query_inserts>10000</query_inserts>
<errors>1000</errors>
<result_rows>5000000000</result_rows>
<read_rows>500000000000</read_rows>
@ -74,6 +80,10 @@ toc_title: "Квоты"
`queries` - общее количество запросов;
`query_selects` общее количество запросов `SELECT`.
`query_inserts` общее количество запросов `INSERT`.
`errors` - количество запросов, при выполнении которых было выкинуто исключение;
`result_rows` - суммарное количество строк, отданных в виде результата;

View File

@ -1759,6 +1759,54 @@ ClickHouse генерирует исключение
- [Движок Distributed](../../engines/table-engines/special/distributed.md#distributed)
- [Управление распределёнными таблицами](../../sql-reference/statements/system.md#query-language-system-distributed)
## insert_shard_id {#insert_shard_id}
Если не `0`, указывает, в какой шард [Distributed](../../engines/table-engines/special/distributed.md#distributed) таблицы данные будут вставлены синхронно.
Если значение настройки `insert_shard_id` указано неверно, сервер выдаст ошибку.
Узнать количество шардов `shard_num` на кластере `requested_cluster` можно из конфигурации сервера, либо используя запрос:
``` sql
SELECT uniq(shard_num) FROM system.clusters WHERE cluster = 'requested_cluster';
```
Возможные значения:
- 0 — выключено.
- Любое число от `1` до `shards_num` соответствующей [Distributed](../../engines/table-engines/special/distributed.md#distributed) таблицы.
Значение по умолчанию: `0`.
**Пример**
Запрос:
```sql
CREATE TABLE x AS system.numbers ENGINE = MergeTree ORDER BY number;
CREATE TABLE x_dist AS x ENGINE = Distributed('test_cluster_two_shards_localhost', currentDatabase(), x);
INSERT INTO x_dist SELECT * FROM numbers(5) SETTINGS insert_shard_id = 1;
SELECT * FROM x_dist ORDER BY number ASC;
```
Результат:
``` text
┌─number─┐
│ 0 │
│ 0 │
│ 1 │
│ 1 │
│ 2 │
│ 2 │
│ 3 │
│ 3 │
│ 4 │
│ 4 │
└────────┘
```
## validate_polygons {#validate_polygons}
Включает или отключает генерирование исключения в функции [pointInPolygon](../../sql-reference/functions/geo/index.md#pointinpolygon), если многоугольник самопересекающийся или самокасающийся.

View File

@ -4,16 +4,17 @@
Столбцы:
- `quota_name` ([String](../../sql-reference/data-types/string.md)) — Имя квоты.
- `duration` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Длина временного интервала для расчета потребления ресурсов, в секундах.
- `is_randomized_interval` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Логическое значение. Оно показывает, является ли интервал рандомизированным. Интервал всегда начинается в одно и то же время, если он не рандомизирован. Например, интервал в 1 минуту всегда начинается с целого числа минут (то есть он может начинаться в 11:20:00, но никогда не начинается в 11:20:01), интервал в один день всегда начинается в полночь UTC. Если интервал рандомизирован, то самый первый интервал начинается в произвольное время, а последующие интервалы начинаются один за другим. Значения:
- `0` — Интервал рандомизирован.
- `1` — Интервал не рандомизирован.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное число запросов.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество ошибок.
- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество строк результата.
- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальный объем оперативной памяти в байтах, используемый для хранения результата запроса.
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросе.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество байтов, считываемых из всех таблиц и табличных функций, участвующих в запросе.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Максимальное время выполнения запроса, в секундах.
- `quota_name` ([String](../../sql-reference/data-types/string.md)) — имя квоты.
- `duration` ([UInt32](../../sql-reference/data-types/int-uint.md)) — длина временного интервала для расчета потребления ресурсов, в секундах.
- `is_randomized_interval` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — логическое значение. Оно показывает, является ли интервал рандомизированным. Интервал всегда начинается в одно и то же время, если он не рандомизирован. Например, интервал в 1 минуту всегда начинается с целого числа минут (то есть он может начинаться в 11:20:00, но никогда не начинается в 11:20:01), интервал в один день всегда начинается в полночь UTC. Если интервал рандомизирован, то самый первый интервал начинается в произвольное время, а последующие интервалы начинаются один за другим. Значения:
- `0` — интервал рандомизирован.
- `1` — интервал не рандомизирован.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное число запросов.
- `max_query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное число запросов `SELECT`.
- `max_query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное число запросов `INSERT`.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество ошибок.
- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк результата.
- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальный объем оперативной памяти в байтах, используемый для хранения результата запроса.
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросе.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество байтов, считываемых из всех таблиц и табличных функций, участвующих в запросе.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса, в секундах.

View File

@ -4,27 +4,28 @@
Столбцы:
- `quota_name` ([String](../../sql-reference/data-types/string.md)) — Имя квоты.
- `quota_key`([String](../../sql-reference/data-types/string.md)) — Значение ключа. Например, если keys = `ip_address`, `quota_key` может иметь значение '192.168.1.1'.
- `start_time`([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Время начала расчета потребления ресурсов.
- `end_time`([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Время окончания расчета потребления ресурс
- `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Длина временного интервала для расчета потребления ресурсов, в секундах.
- `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Общее количество запросов на этом интервале.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество запросов.
- `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Число запросов, вызвавших ошибки.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное число ошибок.
- `result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Общее количество строк результата.
- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество строк результата.
- `result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Объем оперативной памяти в байтах, используемый для хранения результата запроса.
- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальный объем оперативной памяти, используемый для хранения результата запроса, в байтах.
- `read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Общее число исходных строк, считываемых из таблиц для выполнения запроса на всех удаленных серверах.
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросах.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Общее количество байт, считанных из всех таблиц и табличных функций, участвующих в запросах.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество байт, считываемых из всех таблиц и табличных функций.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Общее время выполнения запроса, в секундах.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Максимальное время выполнения запроса.
- `quota_name` ([String](../../sql-reference/data-types/string.md)) — имя квоты.
- `quota_key`([String](../../sql-reference/data-types/string.md)) — значение ключа. Например, если keys = `ip_address`, `quota_key` может иметь значение '192.168.1.1'.
- `start_time`([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md))) — время начала расчета потребления ресурсов.
- `end_time`([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md))) — время окончания расчета потребления ресурс
- `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — длина временного интервала для расчета потребления ресурсов, в секундах.
- `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество запросов на этом интервале.
- `query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество запросов `SELECT` на этом интервале.
- `query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество запросов `INSERT` на этом интервале.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество запросов.
- `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — число запросов, вызвавших ошибки.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное число ошибок.
- `result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество строк результата.
- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк результата.
- `result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — объем оперативной памяти в байтах, используемый для хранения результата запроса.
- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальный объем оперативной памяти, используемый для хранения результата запроса, в байтах.
- `read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее число исходных строк, считываемых из таблиц для выполнения запроса на всех удаленных серверах.
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросах.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество байт, считанных из всех таблиц и табличных функций, участвующих в запросах.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество байт, считываемых из всех таблиц и табличных функций.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — общее время выполнения запроса, в секундах.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса.
## Смотрите также {#see-also}
- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement)
- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement)

View File

@ -4,28 +4,31 @@
Столбцы:
- `quota_name` ([String](../../sql-reference/data-types/string.md)) — Имя квоты.
- `quota_key` ([String](../../sql-reference/data-types/string.md)) — Ключ квоты.
- `is_current` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Квота используется для текущего пользователя.
- `start_time` ([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md)))) — Время начала расчета потребления ресурсов.
- `end_time` ([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md)))) — Время окончания расчета потребления ресурсов.
- `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt32](../../sql-reference/data-types/int-uint.md))) — Длина временного интервала для расчета потребления ресурсов, в секундах.
- `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Общее количество запросов на этом интервале.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное число запросов.
- `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Число запросов, вызвавших ошибки.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное число ошибок.
- `result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of rows given as a result.
- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum of source rows read from tables.
- `result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Объем оперативной памяти в байтах, используемый для хранения результата запроса.
- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальный объем оперативной памяти, используемый для хранения результата запроса, в байтах.
- `read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Общее число исходных строк, считываемых из таблиц для выполнения запроса на всех удаленных серверах.
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросах.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Общее количество байт, считанных из всех таблиц и табличных функций, участвующих в запросах.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество байт, считываемых из всех таблиц и табличных функций.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Общее время выполнения запроса, в секундах.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Максимальное время выполнения запроса.
- `quota_name` ([String](../../sql-reference/data-types/string.md)) — имя квоты.
- `quota_key` ([String](../../sql-reference/data-types/string.md)) — ключ квоты.
- `is_current` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — квота используется для текущего пользователя.
- `start_time` ([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md)))) — время начала расчета потребления ресурсов.
- `end_time` ([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md)))) — время окончания расчета потребления ресурсов.
- `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt32](../../sql-reference/data-types/int-uint.md))) — длина временного интервала для расчета потребления ресурсов, в секундах.
- `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество запросов на этом интервале.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное число запросов.
- `query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество запросов `SELECT` на этом интервале.
- `max_query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество запросов `SELECT` на этом интервале.
- `query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество запросов `INSERT` на этом интервале.
- `max_query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество запросов `INSERT` на этом интервале.
- `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — число запросов, вызвавших ошибки.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное число ошибок.
- `result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество строк, приведенных в результате.
- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество исходных строк, считываемых из таблиц.
- `result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — объем оперативной памяти в байтах, используемый для хранения результата запроса.
- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальный объем оперативной памяти, используемый для хранения результата запроса, в байтах.
- `read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее число исходных строк, считываемых из таблиц для выполнения запроса на всех удаленных серверах.
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросах.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество байт, считанных из всех таблиц и табличных функций, участвующих в запросах.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество байт, считываемых из всех таблиц и табличных функций.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — общее время выполнения запроса, в секундах.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса.
## Смотрите также {#see-also}
- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement)
- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement)

View File

@ -0,0 +1,69 @@
---
toc_priority: 141
---
# deltaSum {#agg_functions-deltasum}
Суммирует арифметическую разницу между последовательными строками. Если разница отрицательна — она будет проигнорирована.
**Синтаксис**
``` sql
deltaSum(value)
```
**Аргументы**
- `value` — входные значения, должны быть типа [Integer](../../data-types/int-uint.md) или [Float](../../data-types/float.md).
**Возвращаемое значение**
- накопленная арифметическая разница, типа `Integer` или `Float`.
**Примеры**
Запрос:
``` sql
SELECT deltaSum(arrayJoin([1, 2, 3]));
```
Результат:
``` text
┌─deltaSum(arrayJoin([1, 2, 3]))─┐
│ 2 │
└────────────────────────────────┘
```
Запрос:
``` sql
SELECT deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3]));
```
Результат:
``` text
┌─deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3]))─┐
│ 7 │
└───────────────────────────────────────────────┘
```
Запрос:
``` sql
SELECT deltaSum(arrayJoin([2.25, 3, 4.5]));
```
Результат:
``` text
┌─deltaSum(arrayJoin([2.25, 3, 4.5]))─┐
│ 2.25 │
└─────────────────────────────────────┘
```
## Смотрите также {#see-also}
- [runningDifference](../../functions/other-functions.md#runningdifferencex)

View File

@ -14,14 +14,14 @@ ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name]
[RENAME TO new_name]
[KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED]
[FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
{MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
{MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
NO LIMITS | TRACKING ONLY} [,...]]
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
```
Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md).
Параметры `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).
@ -37,5 +37,4 @@ ALTER QUOTA IF EXISTS qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER
``` sql
ALTER QUOTA IF EXISTS qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
```
```

View File

@ -13,13 +13,13 @@ toc_title: "Квота"
CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
[KEYED BY {user_name | ip_address | client_key | client_key, user_name | client_key, ip_address} | NOT KEYED]
[FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
{MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
{MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
NO LIMITS | TRACKING ONLY} [,...]]
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
```
Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md).
Параметры `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).
@ -35,6 +35,4 @@ CREATE QUOTA qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER;
``` sql
CREATE QUOTA qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
```
<!--hide-->
```

View File

@ -60,5 +60,5 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na
Представления выглядят так же, как обычные таблицы. Например, они перечисляются в результате запроса `SHOW TABLES`.
Отсутствует отдельный запрос для удаления представлений. Чтобы удалить представление, следует использовать `DROP TABLE`.
Чтобы удалить представление, следует использовать [DROP VIEW](../../../sql-reference/statements/drop.md#drop-view). Впрочем, `DROP TABLE` тоже работает для представлений.

View File

@ -23,15 +23,15 @@ toc_title: "Введение"
| Функция | Описание |
|-----------------------|---------------------------------------------------------------------------------------------------------------------------------------|
| [file](../../sql-reference/table-functions/file.md) | Создаёт таблицу с движком [File](../../engines/table-engines/special/file.md). |
| [merge](../../sql-reference/table-functions/merge.md) | Создаёт таблицу с движком [Merge](../../engines/table-engines/special/merge.md). |
| [numbers](../../sql-reference/table-functions/numbers.md) | Создаёт таблицу с единственным столбцом, заполненным целыми числами. |
| [remote](../../sql-reference/table-functions/remote.md) | Предоставляет доступ к удалённым серверам, не создавая таблицу с движком [Distributed](../../engines/table-engines/special/distributed.md). |
| [url](../../sql-reference/table-functions/url.md) | Создаёт таблицу с движком [Url](../../engines/table-engines/special/url.md). |
| [mysql](../../sql-reference/table-functions/mysql.md) | Создаёт таблицу с движком [MySQL](../../engines/table-engines/integrations/mysql.md). |
| [postgresql](../../sql-reference/table-functions/postgresql.md) | Создаёт таблицу с движком [PostgreSQL](../../engines/table-engines/integrations/postgresql.md). |
| [jdbc](../../sql-reference/table-functions/jdbc.md) | Создаёт таблицу с движком [JDBC](../../engines/table-engines/integrations/jdbc.md). |
| [odbc](../../sql-reference/table-functions/odbc.md) | Создаёт таблицу с движком [ODBC](../../engines/table-engines/integrations/odbc.md). |
| [hdfs](../../sql-reference/table-functions/hdfs.md) | Создаёт таблицу с движком [HDFS](../../engines/table-engines/integrations/hdfs.md). |
| [s3](../../sql-reference/table-functions/s3.md) | Создаёт таблицу с движком [S3](../../engines/table-engines/integrations/s3.md). |
| [file](file.md) | Создаёт таблицу с движком [File](../../engines/table-engines/special/file.md). |
| [merge](merge.md) | Создаёт таблицу с движком [Merge](../../engines/table-engines/special/merge.md). |
| [numbers](numbers.md) | Создаёт таблицу с единственным столбцом, заполненным целыми числами. |
| [remote](remote.md) | Предоставляет доступ к удалённым серверам, не создавая таблицу с движком [Distributed](../../engines/table-engines/special/distributed.md). |
| [url](url.md) | Создаёт таблицу с движком [Url](../../engines/table-engines/special/url.md). |
| [mysql](mysql.md) | Создаёт таблицу с движком [MySQL](../../engines/table-engines/integrations/mysql.md). |
| [jdbc](jdbc.md) | Создаёт таблицу с дижком [JDBC](../../engines/table-engines/integrations/jdbc.md). |
| [odbc](odbc.md) | Создаёт таблицу с движком [ODBC](../../engines/table-engines/integrations/odbc.md). |
| [hdfs](hdfs.md) | Создаёт таблицу с движком [HDFS](../../engines/table-engines/integrations/hdfs.md). |
| [s3](s3.md) | Создаёт таблицу с движком [S3](../../engines/table-engines/integrations/s3.md). |
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/table-functions/) <!--hide-->

View File

@ -3,35 +3,41 @@ toc_priority: 45
toc_title: s3
---
# s3 {#s3}
# Табличная Функция S3 {#s3-table-function}
Provides table-like interface to select/insert files in S3. This table function is similar to [hdfs](../../sql-reference/table-functions/hdfs.md).
Предоставляет табличный интерфейс для выбора/вставки файлов в [Amazon S3](https://aws.amazon.com/s3/). Эта табличная функция похожа на [hdfs](../../sql-reference/table-functions/hdfs.md), но обеспечивает специфические для S3 возможности.
**Синтаксис**
``` sql
s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression])
```
**Input parameters**
**Aргументы**
- `path`Bucket url with path to file. Supports following wildcards in readonly mode: *, ?, {abc,def} and {N..M} where N, M — numbers, `abc, def — strings.
- `format`The [format](../../interfaces/formats.md#formats) of the file.
- `structure`Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression`Parameter is optional. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. By default, it will autodetect compression by file extension.
- `path`URL-адрес бакета с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": `*, ?, {abc,def} и {N..M}` где `N, M` — числа, `'abc', 'def'` — строки. Подробнее смотри [здесь](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
- `format`[формат](../../interfaces/formats.md#formats) файла.
- `structure`руктура таблицы. Формат `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression`автоматически обнаруживает сжатие по расширению файла. Возможные значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Необязательный параметр.
**Returned value**
**Возвращаемые значения**
A table with the specified structure for reading or writing data in the specified file.
Таблица с указанной структурой для чтения или записи данных в указанный файл.
**Example**
**Примеры**
Table from S3 file `https://storage.yandexcloud.net/my-test-bucket-768/data.csv` and selection of the first two rows from it:
Создание таблицы из файла S3 `https://storage.yandexcloud.net/my-test-bucket-768/data.csv` и выбор первых трех столбцов из нее:
Запрос:
``` sql
SELECT *
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32')
LIMIT 2
LIMIT 2;
```
Результат:
``` text
┌─column1─┬─column2─┬─column3─┐
│ 1 │ 2 │ 3 │
@ -39,14 +45,18 @@ LIMIT 2
└─────────┴─────────┴─────────┘
```
The similar but from file with `gzip` compression:
То же самое, но файл со сжатием `gzip`:
Запрос:
``` sql
SELECT *
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip')
LIMIT 2
SELECT *
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip')
LIMIT 2;
```
Результат:
``` text
┌─column1─┬─column2─┬─column3─┐
│ 1 │ 2 │ 3 │
@ -54,37 +64,24 @@ LIMIT 2
└─────────┴─────────┴─────────┘
```
**Globs in path**
## Примеры использования {#usage-examples}
Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix).
Предположим, у нас есть несколько файлов со следующими URI на S3:
- `*` — Substitutes any number of any characters except `/` including empty string.
- `?` — Substitutes any single character.
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
- `{N..M}` — Substitutes any number in range from N to M including both borders. N and M can have leading zeroes e.g. `000..078`.
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_4.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_4.csv'
Constructions with `{}` are similar to the [remote table function](../../sql-reference/table-functions/remote.md)).
**Example**
1. Suppose that we have several files with following URIs on S3:
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_4.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_4.csv
2. Query the amount of rows in files end with number from 1 to 3:
<!-- -->
Подсчитаем количество строк в файлах, заканчивающихся цифрами от 1 до 3:
``` sql
SELECT count(*)
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32')
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32');
```
``` text
@ -93,13 +90,11 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi
└─────────┘
```
3. Query the amount of rows in all files of these two directories:
<!-- -->
Подсчитаем общее количество строк во всех файлах этих двух каталогов:
``` sql
SELECT count(*)
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32')
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32');
```
``` text
@ -108,17 +103,14 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi
└─────────┘
```
!!! warning "Warning"
If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`.
**Example**
Query the data from files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
Подсчитаем общее количество строк в файлах с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
``` sql
SELECT count(*)
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32')
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32');
```
``` text
@ -127,42 +119,23 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000
└─────────┘
```
**Data insert**
The S3 table function may be used for data insert as well.
**Example**
Insert a data into file `test-data.csv.gz`:
Запишем данные в файл `test-data.csv.gz`:
``` sql
INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
VALUES ('test-data', 1), ('test-data-2', 2)
VALUES ('test-data', 1), ('test-data-2', 2);
```
Insert a data into file `test-data.csv.gz` from existing table:
Запишем данные из существующей таблицы в файл `test-data.csv.gz`:
``` sql
INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
SELECT name, value FROM existing_table
SELECT name, value FROM existing_table;
```
## Virtual Columns {#virtual-columns}
**Смотрите также**
- `_path` — Path to the file.
- `_file` — Name of the file.
- [Движок таблиц S3](../../engines/table-engines/integrations/s3.md)
## S3-related settings {#settings}
The following settings can be set before query execution or placed into configuration file.
- `s3_max_single_part_upload_size` — Default value is `64Mb`. The maximum size of object to upload using singlepart upload to S3.
- `s3_min_upload_part_size` — Default value is `512Mb`. The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html).
- `s3_max_redirects` — Default value is `10`. Max number of S3 redirects hops allowed.
Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration.
**See Also**
- [Virtual columns](../../engines/table-engines/index.md#table_engines-virtual_columns)
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/table-functions/s3/) <!--hide-->

View File

@ -51,5 +51,5 @@ The easiest way to see the result is to use `--livereload=8888` argument of buil
At the moment theres no easy way to do just that, but you can consider:
- To hit the “Watch” button on top of GitHub web interface to know as early as possible, even during pull request. Alternative to this is `#github-activity` channel of [public ClickHouse Slack](https://join.slack.com/t/clickhousedb/shared_invite/enQtOTUzMjM4ODQwNTc5LWJmMjE3Yjc2YmI1ZDBlZmI4ZTc3OWY3ZTIwYTljYzY4MzBlODM3YzBjZTc1YmYyODRlZTJkYTgzYzBiNTA2Yjk).
- To hit the “Watch” button on top of GitHub web interface to know as early as possible, even during pull request. Alternative to this is `#github-activity` channel of [public ClickHouse Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-nwwakmk4-xOJ6cdy0sJC3It8j348~IA).
- Some search engines allow to subscribe on specific website changes via email and you can opt-in for that for https://clickhouse.tech.

View File

@ -1,5 +1,5 @@
---
toc_folder_title: Interfaces
toc_folder_title: 接口
toc_priority: 14
toc_title: 客户端
---

View File

@ -17,7 +17,7 @@ toc_title: ClickHouse的特性
在一些列式数据库管理系统中(例如InfiniDB CE 和 MonetDB) 并没有使用数据压缩。但是, 若想达到比较优异的性能,数据压缩确实起到了至关重要的作用。
除了在磁盘空间和CPU消耗之间进行不同权衡的高效通用压缩编解码器之外ClickHouse还提供针对特定类型数据的[专用编解码器](../sql-reference/statements/create/table.md#create-query-specialized-codecs)这使得ClickHouse能够与更小的数据库(如时间序列数据库)竞争并超越它们。
除了在磁盘空间和CPU消耗之间进行不同权衡的高效通用压缩编解码器之外ClickHouse还提供针对特定类型数据的[专用编解码器](../sql-reference/statements/create.md#create-query-specialized-codecs)这使得ClickHouse能够与更小的数据库(如时间序列数据库)竞争并超越它们。
## 数据的磁盘存储 {#shu-ju-de-ci-pan-cun-chu}

View File

@ -1,7 +1,5 @@
---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
toc_folder_title: "\u5BFC\u8A00"
toc_folder_title: 引言
toc_priority: 1
---

View File

@ -1,7 +1,7 @@
---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
toc_folder_title: "\u53D1\u8A00"
toc_folder_title: "\u8BED\u53E5"
toc_priority: 31
---

View File

@ -1,4 +1,5 @@
---
toc_folder_title: 表函数
toc_priority: 34
toc_title: "\u5BFC\u8A00"
---

View File

@ -188,6 +188,7 @@ add_subdirectory (format)
add_subdirectory (obfuscator)
add_subdirectory (install)
add_subdirectory (git-import)
add_subdirectory (bash-completion)
if (ENABLE_CLICKHOUSE_ODBC_BRIDGE)
add_subdirectory (odbc-bridge)

View File

@ -0,0 +1 @@
add_subdirectory(completions)

View File

@ -0,0 +1,28 @@
macro(configure_bash_completion)
set(out "/usr/share/bash-completion/completions")
find_program(pkg-config PKG_CONFIG_BIN)
if (PKG_CONFIG_BIN)
execute_process(
COMMAND ${PKG_CONFIG_BIN} --variable=completionsdir bash-completion
OUTPUT_VARIABLE ${out}
OUTPUT_STRIP_TRAILING_WHITESPACE
)
endif()
string(REPLACE /usr "${CMAKE_INSTALL_PREFIX}" out "${out}")
message(STATUS "bash_completion will be written to ${out}")
endmacro()
configure_bash_completion()
foreach (name
# set of functions
clickhouse-bootstrap
# binaries that accept settings as command line argument
clickhouse-client
clickhouse-local
clickhouse-benchmark
clickhouse
)
install(FILES ${name} DESTINATION ${out})
endforeach()

View File

@ -0,0 +1,43 @@
[[ -v $_CLICKHOUSE_COMPLETION_LOADED ]] || source "$(dirname "${BASH_SOURCE[0]}")/clickhouse-bootstrap"
function _clickhouse_get_utils()
{
local cmd=$1 && shift
"$cmd" --help |& awk '/^clickhouse.*args/ { print $2 }'
}
function _complete_for_clickhouse_entrypoint_bin()
{
local cur prev cword words
eval local cmd="$( _clickhouse_quote "$1" )"
_clickhouse_bin_exist "$cmd" || return 0
COMPREPLY=()
_get_comp_words_by_ref cur prev cword words
local util="$cur"
# complete utils, until it will be finished
if [[ $cword -lt 2 ]]; then
COMPREPLY=( $(compgen -W "$(_clickhouse_get_utils "$cmd")" -- "$cur") )
return
fi
util="${words[1]}"
case "$prev" in
-C|--config-file|--config)
return
;;
# Argh... This looks like a bash bug...
# Redirections are passed to the completion function
# although it is managed by the shell directly...
'<'|'>'|'>>'|[12]'>'|[12]'>>')
return
;;
esac
COMPREPLY=( $(compgen -W "$(_clickhouse_get_options "$cmd" "$util")" -- "$cur") )
return 0
}
_complete_clickhouse_generic clickhouse _complete_for_clickhouse_entrypoint_bin

View File

@ -0,0 +1,2 @@
[[ -v $_CLICKHOUSE_COMPLETION_LOADED ]] || source "$(dirname "${BASH_SOURCE[0]}")/clickhouse-bootstrap"
_complete_clickhouse_generic clickhouse-benchmark

View File

@ -0,0 +1,81 @@
#
# bash autocomplete, that can work with:
# a) --help of program
#
# Also you may like:
# $ bind "set completion-ignore-case on"
# $ bind "set show-all-if-ambiguous on"
#
# It uses bash-completion dynamic loader.
# Known to work with bash 3.* with programmable completion and extended
# pattern matching enabled (use 'shopt -s extglob progcomp' to enable
# these if they are not already enabled).
shopt -s extglob
export _CLICKHOUSE_COMPLETION_LOADED=1
function _clickhouse_bin_exist()
{ [ -x "$1" ] || command -v "$1" >& /dev/null; }
function _clickhouse_quote()
{
local quoted=${1//\'/\'\\\'\'};
printf "'%s'" "$quoted"
}
# Extract every option (everything that starts with "-") from the --help dialog.
function _clickhouse_get_options()
{
"$@" --help 2>&1 | awk -F '[ ,=<>]' '{ for (i=1; i <= NF; ++i) { if (substr($i, 0, 1) == "-" && length($i) > 1) print $i; } }' | sort -u
}
function _complete_for_clickhouse_generic_bin()
{
local cur prev
eval local cmd="$( _clickhouse_quote "$1" )"
_clickhouse_bin_exist "$cmd" || return 0
COMPREPLY=()
_get_comp_words_by_ref cur prev
case "$prev" in
-C|--config-file|--config)
return
;;
# Argh... This looks like a bash bug...
# Redirections are passed to the completion function
# although it is managed by the shell directly...
'<'|'>'|'>>'|[12]'>'|[12]'>>')
return
;;
esac
COMPREPLY=( $(compgen -W "$(_clickhouse_get_options "$cmd")" -- "$cur") )
return 0
}
function _complete_clickhouse_generic()
{
local bin=$1 && shift
local f=${1:-_complete_for_clickhouse_generic_bin}
local o=(
-o default
-o bashdefault
-o nospace
-F "$f"
"$bin"
)
complete "${o[@]}"
}
function _complete_clickhouse_bootstrap_main()
{
local runtime=/usr/share/bash-completion/bash_completion
if ! type _get_comp_words_by_ref >& /dev/null && [[ -f $runtime ]]; then
source $runtime
fi
type _get_comp_words_by_ref >& /dev/null || return 0
}
_complete_clickhouse_bootstrap_main "$@"

View File

@ -0,0 +1,2 @@
[[ -v $_CLICKHOUSE_COMPLETION_LOADED ]] || source "$(dirname "${BASH_SOURCE[0]}")/clickhouse-bootstrap"
_complete_clickhouse_generic clickhouse-client

View File

@ -0,0 +1,2 @@
[[ -v $_CLICKHOUSE_COMPLETION_LOADED ]] || source "$(dirname "${BASH_SOURCE[0]}")/clickhouse-bootstrap"
_complete_clickhouse_generic clickhouse-local

View File

@ -63,7 +63,7 @@ enum class AccessType
M(ALTER_SETTINGS, "ALTER SETTING, ALTER MODIFY SETTING, MODIFY SETTING", TABLE, ALTER_TABLE) /* allows to execute ALTER MODIFY SETTING */\
M(ALTER_MOVE_PARTITION, "ALTER MOVE PART, MOVE PARTITION, MOVE PART", TABLE, ALTER_TABLE) \
M(ALTER_FETCH_PARTITION, "FETCH PARTITION", TABLE, ALTER_TABLE) \
M(ALTER_FREEZE_PARTITION, "FREEZE PARTITION", TABLE, ALTER_TABLE) \
M(ALTER_FREEZE_PARTITION, "FREEZE PARTITION, UNFREEZE", TABLE, ALTER_TABLE) \
\
M(ALTER_TABLE, "", GROUP, ALTER) \
\

View File

@ -413,7 +413,7 @@ public:
for (const Field & f : keys_to_keep_)
{
keys_to_keep.emplace(f.safeGet<NearestFieldType<T>>());
keys_to_keep.emplace(f.safeGet<T>());
}
}

View File

@ -255,7 +255,7 @@ public:
/// The constant value. It is valid even if the size of the column is 0.
template <typename T>
T getValue() const { return getField().safeGet<NearestFieldType<T>>(); }
T getValue() const { return getField().safeGet<T>(); }
bool isCollationSupported() const override { return data->isCollationSupported(); }
};

View File

@ -107,7 +107,7 @@ public:
{
data.resize_fill(data.size() + length);
}
void insert(const Field & x) override { data.push_back(DB::get<NearestFieldType<T>>(x)); }
void insert(const Field & x) override { data.push_back(DB::get<T>(x)); }
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
void popBack(size_t n) override

View File

@ -261,7 +261,7 @@ public:
void insert(const Field & x) override
{
data.push_back(DB::get<NearestFieldType<T>>(x));
data.push_back(DB::get<T>(x));
}
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;

View File

@ -52,7 +52,7 @@
M(RWLockActiveWriters, "Number of threads holding write lock in a table RWLock.") \
M(GlobalThread, "Number of threads in global thread pool.") \
M(GlobalThreadActive, "Number of threads in global thread pool running a task.") \
M(LocalThread, "Number of threads in local thread pools. Should be similar to GlobalThreadActive.") \
M(LocalThread, "Number of threads in local thread pools. The threads in local thread pools are taken from the global thread pool.") \
M(LocalThreadActive, "Number of threads in local thread pools running a task.") \
M(DistributedFilesToInsert, "Number of pending files to process for asynchronous insertion into Distributed tables. Number of files for every shard is summed.") \
M(TablesToDropQueueSize, "Number of dropped tables, that are waiting for background data removal.") \

View File

@ -560,7 +560,7 @@ namespace DB
{
namespace ErrorCodes
{
#define M(VALUE, NAME) extern const Value NAME = VALUE;
#define M(VALUE, NAME) extern const ErrorCode NAME = VALUE;
APPLY_FOR_ERROR_CODES(M)
#undef M
@ -587,7 +587,7 @@ namespace ErrorCodes
ErrorCode end() { return END + 1; }
void increment(ErrorCode error_code, bool remote, const std::string & message, const std::string & stacktrace)
void increment(ErrorCode error_code, bool remote, const std::string & message, const FramePointers & trace)
{
if (error_code >= end())
{
@ -596,10 +596,10 @@ namespace ErrorCodes
error_code = end() - 1;
}
values[error_code].increment(remote, message, stacktrace);
values[error_code].increment(remote, message, trace);
}
void ErrorPairHolder::increment(bool remote, const std::string & message, const std::string & stacktrace)
void ErrorPairHolder::increment(bool remote, const std::string & message, const FramePointers & trace)
{
const auto now = std::chrono::system_clock::now();
@ -609,7 +609,7 @@ namespace ErrorCodes
++error.count;
error.message = message;
error.stacktrace = stacktrace;
error.trace = trace;
error.error_time_ms = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count();
}
ErrorPair ErrorPairHolder::get()

View File

@ -1,11 +1,12 @@
#pragma once
#include <stddef.h>
#include <cstddef>
#include <cstdint>
#include <utility>
#include <mutex>
#include <common/types.h>
#include <string_view>
#include <vector>
#include <common/types.h>
/** Allows to count number of simultaneously happening error codes.
* See also Exception.cpp for incrementing part.
@ -19,6 +20,7 @@ namespace ErrorCodes
/// ErrorCode identifier (index in array).
using ErrorCode = int;
using Value = size_t;
using FramePointers = std::vector<void *>;
/// Get name of error_code by identifier.
/// Returns statically allocated string.
@ -33,7 +35,7 @@ namespace ErrorCodes
/// Message for the last error.
std::string message;
/// Stacktrace for the last error.
std::string stacktrace;
FramePointers trace;
};
struct ErrorPair
{
@ -46,7 +48,7 @@ namespace ErrorCodes
{
public:
ErrorPair get();
void increment(bool remote, const std::string & message, const std::string & stacktrace);
void increment(bool remote, const std::string & message, const FramePointers & trace);
private:
ErrorPair value;
@ -60,7 +62,7 @@ namespace ErrorCodes
ErrorCode end();
/// Add value for specified error_code.
void increment(ErrorCode error_code, bool remote, const std::string & message, const std::string & stacktrace);
void increment(ErrorCode error_code, bool remote, const std::string & message, const FramePointers & trace);
}
}

View File

@ -36,7 +36,7 @@ namespace ErrorCodes
/// - Aborts the process if error code is LOGICAL_ERROR.
/// - Increments error codes statistics.
void handle_error_code([[maybe_unused]] const std::string & msg, const std::string & stacktrace, int code, bool remote)
void handle_error_code([[maybe_unused]] const std::string & msg, int code, bool remote, const Exception::FramePointers & trace)
{
// In debug builds and builds with sanitizers, treat LOGICAL_ERROR as an assertion failure.
// Log the message before we fail.
@ -47,20 +47,21 @@ void handle_error_code([[maybe_unused]] const std::string & msg, const std::stri
abort();
}
#endif
ErrorCodes::increment(code, remote, msg, stacktrace);
ErrorCodes::increment(code, remote, msg, trace);
}
Exception::Exception(const std::string & msg, int code, bool remote_)
: Poco::Exception(msg, code)
, remote(remote_)
{
handle_error_code(msg, getStackTraceString(), code, remote);
handle_error_code(msg, code, remote, getStackFramePointers());
}
Exception::Exception(const std::string & msg, const Exception & nested, int code)
: Poco::Exception(msg, nested, code)
{
handle_error_code(msg, getStackTraceString(), code, remote);
handle_error_code(msg, code, remote, getStackFramePointers());
}
Exception::Exception(CreateFromPocoTag, const Poco::Exception & exc)
@ -101,6 +102,31 @@ std::string Exception::getStackTraceString() const
#endif
}
Exception::FramePointers Exception::getStackFramePointers() const
{
FramePointers frame_pointers;
#ifdef STD_EXCEPTION_HAS_STACK_TRACE
{
frame_pointers.resize(get_stack_trace_size());
for (size_t i = 0; i < frame_pointers.size(); ++i)
{
frame_pointers[i] = get_stack_trace_frames()[i];
}
}
#else
{
size_t stack_trace_size = trace.getSize();
size_t stack_trace_offset = trace.getOffset();
frame_pointers.reserve(stack_trace_size - stack_trace_offset);
for (size_t i = stack_trace_offset; i < stack_trace_size; ++i)
{
frame_pointers.push_back(trace.getFramePointers()[i]);
}
}
#endif
return frame_pointers;
}
void throwFromErrno(const std::string & s, int code, int the_errno)
{

View File

@ -24,6 +24,8 @@ namespace DB
class Exception : public Poco::Exception
{
public:
using FramePointers = std::vector<void *>;
Exception() = default;
Exception(const std::string & msg, int code, bool remote_ = false);
Exception(const std::string & msg, const Exception & nested, int code);
@ -66,6 +68,8 @@ public:
bool isRemoteException() const { return remote; }
std::string getStackTraceString() const;
/// Used for system.errors
FramePointers getStackFramePointers() const;
private:
#ifndef STD_EXCEPTION_HAS_STACK_TRACE

View File

@ -14,7 +14,7 @@ PEERDIR(
clickhouse/base/common
clickhouse/base/pcg-random
clickhouse/base/widechar_width
contrib/libs/libcpuid/libcpuid
contrib/libs/libcpuid
contrib/libs/openssl
contrib/libs/poco/NetSSL_OpenSSL
contrib/libs/re2

View File

@ -13,7 +13,7 @@ PEERDIR(
clickhouse/base/common
clickhouse/base/pcg-random
clickhouse/base/widechar_width
contrib/libs/libcpuid/libcpuid
contrib/libs/libcpuid
contrib/libs/openssl
contrib/libs/poco/NetSSL_OpenSSL
contrib/libs/re2

View File

@ -399,10 +399,10 @@ public:
template <typename T>
T & get();
NearestFieldType<std::decay_t<T>> & get();
template <typename T>
const T & get() const
const auto & get() const
{
auto mutable_this = const_cast<std::decay_t<decltype(*this)> *>(this);
return mutable_this->get<T>();
@ -436,21 +436,10 @@ public:
return true;
}
template <typename T> T & safeGet()
{
const Types::Which requested = TypeToEnum<std::decay_t<T>>::value;
if (which != requested)
throw Exception("Bad get: has " + std::string(getTypeName()) + ", requested " + std::string(Types::toString(requested)), ErrorCodes::BAD_GET);
return get<T>();
}
template <typename T> auto & safeGet() const
{ return const_cast<Field *>(this)->safeGet<T>(); }
template <typename T> const T & safeGet() const
{
const Types::Which requested = TypeToEnum<std::decay_t<T>>::value;
if (which != requested)
throw Exception("Bad get: has " + std::string(getTypeName()) + ", requested " + std::string(Types::toString(requested)), ErrorCodes::BAD_GET);
return get<T>();
}
template <typename T> auto & safeGet();
bool operator< (const Field & rhs) const
{
@ -778,22 +767,40 @@ inline constexpr bool isInt64FieldType(Field::Types::Which t)
// Field value getter with type checking in debug builds.
template <typename T>
T & Field::get()
NearestFieldType<std::decay_t<T>> & Field::get()
{
using ValueType = std::decay_t<T>;
// Before storing the value in the Field, we static_cast it to the field
// storage type, so here we return the value of storage type as well.
// Otherwise, it is easy to make a mistake of reinterpret_casting the stored
// value to a different and incompatible type.
// For example, a Float32 value is stored as Float64, and it is incorrect to
// return a reference to this value as Float32.
using StoredType = NearestFieldType<std::decay_t<T>>;
#ifndef NDEBUG
// Disregard signedness when converting between int64 types.
constexpr Field::Types::Which target = TypeToEnum<NearestFieldType<ValueType>>::value;
constexpr Field::Types::Which target = TypeToEnum<StoredType>::value;
if (target != which
&& (!isInt64FieldType(target) || !isInt64FieldType(which)))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid Field get from type {} to type {}", Types::toString(which), Types::toString(target));
#endif
ValueType * MAY_ALIAS ptr = reinterpret_cast<ValueType *>(&storage);
StoredType * MAY_ALIAS ptr = reinterpret_cast<StoredType *>(&storage);
return *ptr;
}
template <typename T>
auto & Field::safeGet()
{
const Types::Which requested = TypeToEnum<NearestFieldType<std::decay_t<T>>>::value;
if (which != requested)
throw Exception("Bad get: has " + std::string(getTypeName()) + ", requested " + std::string(Types::toString(requested)), ErrorCodes::BAD_GET);
return get<T>();
}
template <typename T>
T & Field::reinterpret()
{

View File

@ -220,8 +220,8 @@ class IColumn;
M(Milliseconds, stream_poll_timeout_ms, 500, "Timeout for polling data from/to streaming storages.", 0) \
\
/** Settings for testing hedged requests */ \
M(Int64, sleep_in_send_tables_status, 0, "Time to sleep in sending tables status response in TCPHandler", 0) \
M(Int64, sleep_in_send_data, 0, "Time to sleep in sending data in TCPHandler", 0) \
M(Milliseconds, sleep_in_send_tables_status_ms, 0, "Time to sleep in sending tables status response in TCPHandler", 0) \
M(Milliseconds, sleep_in_send_data_ms, 0, "Time to sleep in sending data in TCPHandler", 0) \
\
M(Bool, insert_allow_materialized_columns, 0, "If setting is enabled, Allow materialized columns in INSERT.", 0) \
M(Seconds, http_connection_timeout, DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT, "HTTP connection timeout.", 0) \

View File

@ -105,7 +105,7 @@ DataTypeEnum<Type>::DataTypeEnum(const Values & values_) : values{values_}
template <typename Type>
void DataTypeEnum<Type>::serializeBinary(const Field & field, WriteBuffer & ostr) const
{
const FieldType x = get<NearestFieldType<FieldType>>(field);
const FieldType x = get<FieldType>(field);
writeBinary(x, ostr);
}
@ -405,7 +405,7 @@ static DataTypePtr createExact(const ASTPtr & arguments)
ErrorCodes::UNEXPECTED_AST_STRUCTURE);
const String & field_name = name_literal->value.get<String>();
const auto value = value_literal->value.get<NearestFieldType<FieldType>>();
const auto value = value_literal->value.get<FieldType>();
if (value > std::numeric_limits<FieldType>::max() || value < std::numeric_limits<FieldType>::min())
throw Exception{"Value " + toString(value) + " for element '" + field_name + "' exceeds range of " + EnumName<FieldType>::value,

View File

@ -152,7 +152,7 @@ template <typename T>
void DataTypeNumberBase<T>::serializeBinary(const Field & field, WriteBuffer & ostr) const
{
/// ColumnVector<T>::ValueType is a narrower type. For example, UInt8, when the Field type is UInt64
typename ColumnVector<T>::ValueType x = get<NearestFieldType<FieldType>>(field);
typename ColumnVector<T>::ValueType x = get<FieldType>(field);
writeBinary(x, ostr);
}

View File

@ -338,7 +338,7 @@ void ComplexKeyHashedDictionary::calculateBytesAllocated()
template <typename T>
void ComplexKeyHashedDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
attribute.null_values = T(null_value.get<T>());
attribute.maps.emplace<ContainerType<T>>();
}
@ -450,7 +450,7 @@ bool ComplexKeyHashedDictionary::setAttributeValue(Attribute & attribute, const
}
}
result = setAttributeValueImpl<AttributeType>(attribute, key, value.get<NearestFieldType<AttributeType>>());
result = setAttributeValueImpl<AttributeType>(attribute, key, value.get<AttributeType>());
};
callOnDictionaryAttributeType(attribute.type, type_call);

View File

@ -370,7 +370,7 @@ void FlatDictionary::calculateBytesAllocated()
template <typename T>
void FlatDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
attribute.null_values = T(null_value.get<T>());
const auto & null_value_ref = std::get<T>(attribute.null_values);
attribute.arrays.emplace<ContainerType<T>>(initial_array_size, null_value_ref);
}
@ -478,7 +478,7 @@ void FlatDictionary::setAttributeValue(Attribute & attribute, const Key id, cons
}
}
setAttributeValueImpl<AttributeType>(attribute, id, value.get<NearestFieldType<AttributeType>>());
setAttributeValueImpl<AttributeType>(attribute, id, value.get<AttributeType>());
};
callOnDictionaryAttributeType(attribute.type, type_call);

View File

@ -451,7 +451,7 @@ void HashedDictionary::calculateBytesAllocated()
template <typename T>
void HashedDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
attribute.null_values = T(null_value.get<T>());
if (!sparse)
attribute.maps = std::make_unique<CollectionType<T>>();
else
@ -565,7 +565,7 @@ bool HashedDictionary::setAttributeValue(Attribute & attribute, const Key id, co
}
}
result = setAttributeValueImpl<AttributeType>(attribute, id, value.get<NearestFieldType<AttributeType>>());
result = setAttributeValueImpl<AttributeType>(attribute, id, value.get<AttributeType>());
};
callOnDictionaryAttributeType(attribute.type, type_call);

View File

@ -595,7 +595,7 @@ void IPAddressDictionary::calculateBytesAllocated()
template <typename T>
void IPAddressDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
attribute.null_values = null_value.isNull() ? T{} : T(null_value.get<NearestFieldType<T>>());
attribute.null_values = null_value.isNull() ? T{} : T(null_value.get<T>());
attribute.maps.emplace<ContainerType<T>>();
}
@ -786,7 +786,7 @@ void IPAddressDictionary::setAttributeValue(Attribute & attribute, const Field &
}
else
{
setAttributeValueImpl<AttributeType>(attribute, value.get<NearestFieldType<AttributeType>>());
setAttributeValueImpl<AttributeType>(attribute, value.get<AttributeType>());
}
};

View File

@ -350,7 +350,7 @@ void RangeHashedDictionary::calculateBytesAllocated()
template <typename T>
void RangeHashedDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
attribute.null_values = T(null_value.get<T>());
attribute.maps = std::make_unique<Collection<T>>();
}
@ -458,7 +458,7 @@ void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const K
}
else
{
value_to_insert = Value<ValueType>{ range, { value.get<NearestFieldType<ValueType>>() }};
value_to_insert = Value<ValueType>{ range, { value.get<ValueType>() }};
}
}

View File

@ -209,7 +209,13 @@ void DiskCacheWrapper::clearDirectory(const String & path)
void DiskCacheWrapper::moveDirectory(const String & from_path, const String & to_path)
{
if (cache_disk->exists(from_path))
{
/// Destination directory may not be empty if previous directory move attempt was failed.
if (cache_disk->exists(to_path) && cache_disk->isDirectory(to_path))
cache_disk->clearDirectory(to_path);
cache_disk->moveDirectory(from_path, to_path);
}
DiskDecorator::moveDirectory(from_path, to_path);
}

View File

@ -40,7 +40,8 @@ struct AddSecondsImpl
{
static constexpr auto name = "addSeconds";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64> execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &)
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &)
{
return {t.whole + delta, t.fractional};
}
@ -60,7 +61,8 @@ struct AddMinutesImpl
{
static constexpr auto name = "addMinutes";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64> execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &)
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &)
{
return {t.whole + delta * 60, t.fractional};
}
@ -80,7 +82,8 @@ struct AddHoursImpl
{
static constexpr auto name = "addHours";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64> execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &)
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &)
{
return {t.whole + delta * 3600, t.fractional};
}
@ -99,7 +102,8 @@ struct AddDaysImpl
{
static constexpr auto name = "addDays";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64> execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
{
return {time_zone.addDays(t.whole, delta), t.fractional};
}
@ -119,7 +123,8 @@ struct AddWeeksImpl
{
static constexpr auto name = "addWeeks";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64> execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
{
return {time_zone.addWeeks(t.whole, delta), t.fractional};
}
@ -139,7 +144,8 @@ struct AddMonthsImpl
{
static constexpr auto name = "addMonths";
static inline DecimalUtils::DecimalComponents<DateTime64> execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
static inline DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
{
return {time_zone.addMonths(t.whole, delta), t.fractional};
}
@ -159,7 +165,8 @@ struct AddQuartersImpl
{
static constexpr auto name = "addQuarters";
static inline DecimalUtils::DecimalComponents<DateTime64> execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
static inline DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
{
return {time_zone.addQuarters(t.whole, delta), t.fractional};
}
@ -179,7 +186,8 @@ struct AddYearsImpl
{
static constexpr auto name = "addYears";
static inline DecimalUtils::DecimalComponents<DateTime64> execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
static inline DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
{
return {time_zone.addYears(t.whole, delta), t.fractional};
}
@ -265,14 +273,16 @@ struct Adder
private:
template <typename FromVectorType, typename ToVectorType, typename DeltaColumnType>
NO_INLINE NO_SANITIZE_UNDEFINED void vectorVector(const FromVectorType & vec_from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, size_t size) const
NO_INLINE NO_SANITIZE_UNDEFINED void vectorVector(
const FromVectorType & vec_from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, size_t size) const
{
for (size_t i = 0; i < size; ++i)
vec_to[i] = transform.execute(vec_from[i], delta.getData()[i], time_zone);
}
template <typename FromType, typename ToVectorType, typename DeltaColumnType>
NO_INLINE NO_SANITIZE_UNDEFINED void constantVector(const FromType & from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, size_t size) const
NO_INLINE NO_SANITIZE_UNDEFINED void constantVector(
const FromType & from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, size_t size) const
{
for (size_t i = 0; i < size; ++i)
vec_to[i] = transform.execute(from, delta.getData()[i], time_zone);

View File

@ -2798,12 +2798,16 @@ private:
auto & out_data = static_cast<typename EnumType::ColumnType &>(*res).getData();
out_data.resize(size);
auto default_enum_value = result_type.getValues().front().second;
if (nullable_col)
{
for (const auto i : ext::range(0, size))
{
if (!nullable_col->isNullAt(i))
out_data[i] = result_type.getValue(col->getDataAt(i));
else
out_data[i] = default_enum_value;
}
}
else

View File

@ -42,11 +42,11 @@ struct SimdJSONParser
ALWAYS_INLINE bool isBool() const { return element.type() == simdjson::dom::element_type::BOOL; }
ALWAYS_INLINE bool isNull() const { return element.type() == simdjson::dom::element_type::NULL_VALUE; }
ALWAYS_INLINE Int64 getInt64() const { return element.get_int64().first; }
ALWAYS_INLINE UInt64 getUInt64() const { return element.get_uint64().first; }
ALWAYS_INLINE double getDouble() const { return element.get_double().first; }
ALWAYS_INLINE bool getBool() const { return element.get_bool().first; }
ALWAYS_INLINE std::string_view getString() const { return element.get_string().first; }
ALWAYS_INLINE Int64 getInt64() const { return element.get_int64().value_unsafe(); }
ALWAYS_INLINE UInt64 getUInt64() const { return element.get_uint64().value_unsafe(); }
ALWAYS_INLINE double getDouble() const { return element.get_double().value_unsafe(); }
ALWAYS_INLINE bool getBool() const { return element.get_bool().value_unsafe(); }
ALWAYS_INLINE std::string_view getString() const { return element.get_string().value_unsafe(); }
ALWAYS_INLINE Array getArray() const;
ALWAYS_INLINE Object getObject() const;
@ -75,7 +75,7 @@ struct SimdJSONParser
ALWAYS_INLINE Iterator begin() const { return array.begin(); }
ALWAYS_INLINE Iterator end() const { return array.end(); }
ALWAYS_INLINE size_t size() const { return array.size(); }
ALWAYS_INLINE Element operator[](size_t index) const { assert(index < size()); return array.at(index).first; }
ALWAYS_INLINE Element operator[](size_t index) const { assert(index < size()); return array.at(index).value_unsafe(); }
private:
simdjson::dom::array array;
@ -111,7 +111,7 @@ struct SimdJSONParser
if (x.error())
return false;
result = x.first;
result = x.value_unsafe();
return true;
}
@ -137,7 +137,7 @@ struct SimdJSONParser
if (document.error())
return false;
result = document.first;
result = document.value_unsafe();
return true;
}
@ -155,12 +155,12 @@ private:
inline ALWAYS_INLINE SimdJSONParser::Array SimdJSONParser::Element::getArray() const
{
return element.get_array().first;
return element.get_array().value_unsafe();
}
inline ALWAYS_INLINE SimdJSONParser::Object SimdJSONParser::Element::getObject() const
{
return element.get_object().first;
return element.get_object().value_unsafe();
}
}

View File

@ -49,8 +49,11 @@ public:
{}
template <typename ... Args>
inline auto execute(const DateTime64 & t, Args && ... args) const
inline auto NO_SANITIZE_UNDEFINED execute(const DateTime64 & t, Args && ... args) const
{
/// Type conversion from float to integer may be required.
/// We are Ok with implementation specific result for out of range and denormals conversion.
if constexpr (TransformHasExecuteOverload_v<DateTime64, decltype(scale_multiplier), Args...>)
{
return wrapped_transform.execute(t, scale_multiplier, std::forward<Args>(args)...);

View File

@ -872,15 +872,18 @@ bool FunctionArrayElement::matchKeyToIndexNumberConst(
if (!data_numeric)
return false;
bool is_integer_field = Field::dispatch([](const auto & value)
std::optional<DataType> index_as_integer;
Field::dispatch([&](const auto & value)
{
return is_integer_v<std::decay_t<decltype(value)>>;
using FieldType = std::decay_t<decltype(value)>;
if constexpr (is_integer_v<FieldType> && std::is_convertible_v<FieldType, DataType>)
index_as_integer = static_cast<DataType>(value);
}, index);
if (!is_integer_field)
if (!index_as_integer)
return false;
MatcherNumberConst<DataType> matcher{data_numeric->getData(), get<DataType>(index)};
MatcherNumberConst<DataType> matcher{data_numeric->getData(), *index_as_integer};
executeMatchKeyToIndex(offsets, matched_idxs, matcher);
return true;
}

View File

@ -49,11 +49,10 @@ struct DivideIntegralByConstantImpl
#pragma GCC diagnostic ignored "-Wsign-compare"
/// Division by -1. By the way, we avoid FPE by division of the largest negative number by -1.
/// And signed integer overflow is well defined in C++20.
if (unlikely(is_signed_v<B> && b == -1))
{
for (size_t i = 0; i < size; ++i)
c_pos[i] = -a_pos[i];
c_pos[i] = -make_unsigned_t<A>(a_pos[i]); /// Avoid UBSan report in signed integer overflow.
return;
}

View File

@ -41,7 +41,8 @@ void registerFunctionThrowIf(FunctionFactory &);
void registerFunctionVersion(FunctionFactory &);
void registerFunctionBuildId(FunctionFactory &);
void registerFunctionUptime(FunctionFactory &);
void registerFunctionTimeZone(FunctionFactory &);
void registerFunctionTimezone(FunctionFactory &);
void registerFunctionTimezoneOf(FunctionFactory &);
void registerFunctionRunningAccumulate(FunctionFactory &);
void registerFunctionRunningDifference(FunctionFactory &);
void registerFunctionRunningDifferenceStartingWithFirstValue(FunctionFactory &);
@ -111,7 +112,8 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
registerFunctionVersion(factory);
registerFunctionBuildId(factory);
registerFunctionUptime(factory);
registerFunctionTimeZone(factory);
registerFunctionTimezone(factory);
registerFunctionTimezoneOf(factory);
registerFunctionRunningAccumulate(factory);
registerFunctionRunningDifference(factory);
registerFunctionRunningDifferenceStartingWithFirstValue(factory);

View File

@ -12,13 +12,13 @@ namespace
/** Returns the server time zone.
*/
class FunctionTimeZone : public IFunction
class FunctionTimezone : public IFunction
{
public:
static constexpr auto name = "timezone";
static FunctionPtr create(const Context &)
{
return std::make_shared<FunctionTimeZone>();
return std::make_shared<FunctionTimezone>();
}
String getName() const override
@ -45,9 +45,10 @@ public:
}
void registerFunctionTimeZone(FunctionFactory & factory)
void registerFunctionTimezone(FunctionFactory & factory)
{
factory.registerFunction<FunctionTimeZone>();
factory.registerFunction<FunctionTimezone>();
factory.registerAlias("timeZone", "timezone");
}
}

View File

@ -0,0 +1,118 @@
#include <Functions/IFunctionImpl.h>
#include <Functions/FunctionFactory.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeDateTime.h>
#include <common/DateLUTImpl.h>
#include <Core/Field.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
namespace
{
/** timezoneOf(x) - get the name of the timezone of DateTime data type.
* Example: Europe/Moscow.
*/
class ExecutableFunctionTimezoneOf : public IExecutableFunctionImpl
{
public:
static constexpr auto name = "timezoneOf";
String getName() const override { return name; }
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
/// Execute the function on the columns.
ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
DataTypePtr type_no_nullable = removeNullable(arguments[0].type);
return DataTypeString().createColumnConst(input_rows_count,
dynamic_cast<const TimezoneMixin &>(*type_no_nullable).getTimeZone().getTimeZone());
}
};
class BaseFunctionTimezoneOf : public IFunctionBaseImpl
{
public:
BaseFunctionTimezoneOf(DataTypes argument_types_, DataTypePtr return_type_)
: argument_types(std::move(argument_types_)), return_type(std::move(return_type_)) {}
static constexpr auto name = "timezoneOf";
String getName() const override { return name; }
bool isDeterministic() const override { return true; }
bool isDeterministicInScopeOfQuery() const override { return true; }
const DataTypes & getArgumentTypes() const override { return argument_types; }
const DataTypePtr & getResultType() const override { return return_type; }
ExecutableFunctionImplPtr prepare(const ColumnsWithTypeAndName &) const override
{
return std::make_unique<ExecutableFunctionTimezoneOf>();
}
ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const ColumnsWithTypeAndName & arguments) const override
{
DataTypePtr type_no_nullable = removeNullable(arguments[0].type);
return DataTypeString().createColumnConst(1,
dynamic_cast<const TimezoneMixin &>(*type_no_nullable).getTimeZone().getTimeZone());
}
private:
DataTypes argument_types;
DataTypePtr return_type;
};
class FunctionTimezoneOfBuilder : public IFunctionOverloadResolverImpl
{
public:
static constexpr auto name = "timezoneOf";
String getName() const override { return name; }
static FunctionOverloadResolverImplPtr create(const Context &) { return std::make_unique<FunctionTimezoneOfBuilder>(); }
size_t getNumberOfArguments() const override { return 1; }
DataTypePtr getReturnType(const DataTypes & types) const override
{
DataTypePtr type_no_nullable = removeNullable(types[0]);
if (isDateTime(type_no_nullable) || isDateTime64(type_no_nullable))
return std::make_shared<DataTypeString>();
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad argument for function {}, should be DateTime or DateTime64", name);
}
FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
{
return std::make_unique<BaseFunctionTimezoneOf>(DataTypes{arguments[0].type}, return_type);
}
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t /*number_of_arguments*/) const override { return {0}; }
};
}
void registerFunctionTimezoneOf(FunctionFactory & factory)
{
factory.registerFunction<FunctionTimezoneOfBuilder>();
factory.registerAlias("timeZoneOf", "timezoneOf");
}
}

View File

@ -21,11 +21,11 @@ namespace
{
/// Just changes time zone information for data type. The calculation is free.
class FunctionToTimeZone : public IFunction
class FunctionToTimezone : public IFunction
{
public:
static constexpr auto name = "toTimeZone";
static FunctionPtr create(const Context &) { return std::make_shared<FunctionToTimeZone>(); }
static constexpr auto name = "toTimezone";
static FunctionPtr create(const Context &) { return std::make_shared<FunctionToTimezone>(); }
String getName() const override
{
@ -64,7 +64,8 @@ public:
void registerFunctionToTimeZone(FunctionFactory & factory)
{
factory.registerFunction<FunctionToTimeZone>();
factory.registerFunction<FunctionToTimezone>();
factory.registerAlias("toTimeZone", "toTimezone");
}
}

View File

@ -467,6 +467,7 @@ SRCS(
timeSlot.cpp
timeSlots.cpp
timezone.cpp
timezoneOf.cpp
timezoneOffset.cpp
toColumnTypeName.cpp
toCustomWeek.cpp
@ -506,7 +507,7 @@ SRCS(
toStartOfTenMinutes.cpp
toStartOfYear.cpp
toTime.cpp
toTimeZone.cpp
toTimezone.cpp
toTypeName.cpp
toUnixTimestamp64Micro.cpp
toUnixTimestamp64Milli.cpp

View File

@ -21,6 +21,7 @@ namespace ProfileEvents
extern const Event S3WriteBytes;
}
namespace DB
{
// S3 protocol does not allow to have multipart upload with more than 10000 parts.
@ -50,9 +51,9 @@ WriteBufferFromS3::WriteBufferFromS3(
, client_ptr(std::move(client_ptr_))
, minimum_upload_part_size(minimum_upload_part_size_)
, max_single_part_upload_size(max_single_part_upload_size_)
, temporary_buffer(Aws::MakeShared<Aws::StringStream>("temporary buffer"))
, last_part_size(0)
{ }
{
allocateBuffer();
}
void WriteBufferFromS3::nextImpl()
{
@ -72,11 +73,17 @@ void WriteBufferFromS3::nextImpl()
if (!multipart_upload_id.empty() && last_part_size > minimum_upload_part_size)
{
writePart();
last_part_size = 0;
temporary_buffer = Aws::MakeShared<Aws::StringStream>("temporary buffer");
allocateBuffer();
}
}
void WriteBufferFromS3::allocateBuffer()
{
temporary_buffer = Aws::MakeShared<Aws::StringStream>("temporary buffer");
temporary_buffer->exceptions(std::ios::badbit);
last_part_size = 0;
}
void WriteBufferFromS3::finalize()
{
/// FIXME move final flush into the caller
@ -130,17 +137,26 @@ void WriteBufferFromS3::createMultipartUpload()
if (outcome.IsSuccess())
{
multipart_upload_id = outcome.GetResult().GetUploadId();
LOG_DEBUG(log, "Multipart upload has created. Upload id: {}", multipart_upload_id);
LOG_DEBUG(log, "Multipart upload has created. Bucket: {}, Key: {}, Upload id: {}", bucket, key, multipart_upload_id);
}
else
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
}
void WriteBufferFromS3::writePart()
{
if (temporary_buffer->tellp() <= 0)
auto size = temporary_buffer->tellp();
LOG_DEBUG(log, "Writing part. Bucket: {}, Key: {}, Upload_id: {}, Size: {}", bucket, key, multipart_upload_id, size);
if (size < 0)
throw Exception("Failed to write part. Buffer in invalid state.", ErrorCodes::S3_ERROR);
if (size == 0)
{
LOG_DEBUG(log, "Skipping writing part. Buffer is empty.");
return;
}
if (part_tags.size() == S3_WARN_MAX_PARTS)
{
@ -154,18 +170,16 @@ void WriteBufferFromS3::writePart()
req.SetKey(key);
req.SetPartNumber(part_tags.size() + 1);
req.SetUploadId(multipart_upload_id);
req.SetContentLength(temporary_buffer->tellp());
req.SetContentLength(size);
req.SetBody(temporary_buffer);
auto outcome = client_ptr->UploadPart(req);
LOG_TRACE(log, "Writing part. Bucket: {}, Key: {}, Upload_id: {}, Data size: {}", bucket, key, multipart_upload_id, temporary_buffer->tellp());
if (outcome.IsSuccess())
{
auto etag = outcome.GetResult().GetETag();
part_tags.push_back(etag);
LOG_DEBUG(log, "Writing part finished. Total parts: {}, Upload_id: {}, Etag: {}", part_tags.size(), multipart_upload_id, etag);
LOG_DEBUG(log, "Writing part finished. Bucket: {}, Key: {}, Upload_id: {}, Etag: {}, Parts: {}", bucket, key, multipart_upload_id, etag, part_tags.size());
}
else
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
@ -173,7 +187,10 @@ void WriteBufferFromS3::writePart()
void WriteBufferFromS3::completeMultipartUpload()
{
LOG_DEBUG(log, "Completing multipart upload. Bucket: {}, Key: {}, Upload_id: {}", bucket, key, multipart_upload_id);
LOG_DEBUG(log, "Completing multipart upload. Bucket: {}, Key: {}, Upload_id: {}, Parts: {}", bucket, key, multipart_upload_id, part_tags.size());
if (part_tags.empty())
throw Exception("Failed to complete multipart upload. No parts have uploaded", ErrorCodes::S3_ERROR);
Aws::S3::Model::CompleteMultipartUploadRequest req;
req.SetBucket(bucket);
@ -192,22 +209,30 @@ void WriteBufferFromS3::completeMultipartUpload()
auto outcome = client_ptr->CompleteMultipartUpload(req);
if (outcome.IsSuccess())
LOG_DEBUG(log, "Multipart upload has completed. Upload_id: {}", multipart_upload_id);
LOG_DEBUG(log, "Multipart upload has completed. Bucket: {}, Key: {}, Upload_id: {}, Parts: {}", bucket, key, multipart_upload_id, part_tags.size());
else
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
}
void WriteBufferFromS3::makeSinglepartUpload()
{
if (temporary_buffer->tellp() <= 0)
return;
auto size = temporary_buffer->tellp();
LOG_DEBUG(log, "Making single part upload. Bucket: {}, Key: {}", bucket, key);
LOG_DEBUG(log, "Making single part upload. Bucket: {}, Key: {}, Size: {}", bucket, key, size);
if (size < 0)
throw Exception("Failed to make single part upload. Buffer in invalid state", ErrorCodes::S3_ERROR);
if (size == 0)
{
LOG_DEBUG(log, "Skipping single part upload. Buffer is empty.");
return;
}
Aws::S3::Model::PutObjectRequest req;
req.SetBucket(bucket);
req.SetKey(key);
req.SetContentLength(temporary_buffer->tellp());
req.SetContentLength(size);
req.SetBody(temporary_buffer);
if (object_metadata.has_value())
req.SetMetadata(object_metadata.value());
@ -215,7 +240,7 @@ void WriteBufferFromS3::makeSinglepartUpload()
auto outcome = client_ptr->PutObject(req);
if (outcome.IsSuccess())
LOG_DEBUG(log, "Single part upload has completed. Bucket: {}, Key: {}", bucket, key);
LOG_DEBUG(log, "Single part upload has completed. Bucket: {}, Key: {}, Object size: {}", bucket, key, req.GetContentLength());
else
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
}

View File

@ -69,6 +69,8 @@ public:
private:
bool finalized = false;
void allocateBuffer();
void createMultipartUpload();
void writePart();
void completeMultipartUpload();

View File

@ -41,8 +41,8 @@ String makeStringsEnum(const std::set<String> & values)
void changeIfArguments(ASTPtr & first, ASTPtr & second)
{
String first_value = first->as<ASTLiteral>()->value.get<NearestFieldType<String>>();
String second_value = second->as<ASTLiteral>()->value.get<NearestFieldType<String>>();
String first_value = first->as<ASTLiteral>()->value.get<String>();
String second_value = second->as<ASTLiteral>()->value.get<String>();
std::set<String> values;
values.insert(first_value);
@ -67,9 +67,9 @@ void changeTransformArguments(ASTPtr & array_to, ASTPtr & other)
{
std::set<String> values;
for (const auto & item : array_to->as<ASTLiteral>()->value.get<NearestFieldType<Array>>())
values.insert(item.get<NearestFieldType<String>>());
values.insert(other->as<ASTLiteral>()->value.get<NearestFieldType<String>>());
for (const auto & item : array_to->as<ASTLiteral>()->value.get<Array>())
values.insert(item.get<String>());
values.insert(other->as<ASTLiteral>()->value.get<String>());
String enum_string = makeStringsEnum(values);
@ -197,7 +197,7 @@ struct ConvertStringsToEnumMatcher
String(literal_other->value.getTypeName()) != "String")
return;
Array array_to = literal_to->value.get<NearestFieldType<Array>>();
Array array_to = literal_to->value.get<Array>();
if (array_to.size() == 0)
return;

View File

@ -818,12 +818,11 @@ private:
if (!min_id)
min_id = getMinIDToFinishLoading(forced_to_reload);
if (info->state_id >= min_id)
return true; /// stop
if (info->loading_id < min_id)
startLoading(*info, forced_to_reload, *min_id);
return false; /// wait for the next event
/// Wait for the next event if loading wasn't completed, or stop otherwise.
return (info->state_id >= min_id);
};
if (timeout == WAIT)
@ -848,12 +847,10 @@ private:
if (filter && !filter(name))
continue;
if (info.state_id >= min_id)
continue;
all_ready = false;
if (info.loading_id < min_id)
startLoading(info, forced_to_reload, *min_id);
all_ready &= (info.state_id >= min_id);
}
return all_ready;
};

View File

@ -299,7 +299,9 @@ AccessRightsElements InterpreterAlterQuery::getRequiredAccessForCommand(const AS
break;
}
case ASTAlterCommand::FREEZE_PARTITION: [[fallthrough]];
case ASTAlterCommand::FREEZE_ALL:
case ASTAlterCommand::FREEZE_ALL: [[fallthrough]];
case ASTAlterCommand::UNFREEZE_PARTITION: [[fallthrough]];
case ASTAlterCommand::UNFREEZE_ALL:
{
required_access.emplace_back(AccessType::ALTER_FREEZE_PARTITION, database, table);
break;

View File

@ -290,8 +290,6 @@ std::optional<Blocks> evaluateExpressionOverConstantCondition(const ASTPtr & nod
{
Blocks result;
// TODO: `node` may be always-false literal.
if (const auto * fn = node->as<ASTFunction>())
{
const auto dnf = analyzeFunction(fn, target_expr);
@ -350,6 +348,14 @@ std::optional<Blocks> evaluateExpressionOverConstantCondition(const ASTPtr & nod
}
}
}
else if (const auto * literal = node->as<ASTLiteral>())
{
// Check if it's always true or false.
if (literal->value.getType() == Field::Types::UInt64 && literal->value.get<UInt64>() == 0)
return {result};
else
return {};
}
return {result};
}

View File

@ -271,6 +271,27 @@ void ASTAlterCommand::formatImpl(
<< " " << DB::quote << with_name;
}
}
else if (type == ASTAlterCommand::UNFREEZE_PARTITION)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "UNFREEZE PARTITION " << (settings.hilite ? hilite_none : "");
partition->formatImpl(settings, state, frame);
if (!with_name.empty())
{
settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << "WITH NAME" << (settings.hilite ? hilite_none : "")
<< " " << DB::quote << with_name;
}
}
else if (type == ASTAlterCommand::UNFREEZE_ALL)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "UNFREEZE";
if (!with_name.empty())
{
settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << "WITH NAME" << (settings.hilite ? hilite_none : "")
<< " " << DB::quote << with_name;
}
}
else if (type == ASTAlterCommand::DELETE)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "DELETE" << (settings.hilite ? hilite_none : "");
@ -368,7 +389,8 @@ bool ASTAlterQuery::isSettingsAlter() const
bool ASTAlterQuery::isFreezeAlter() const
{
return isOneCommandTypeOnly(ASTAlterCommand::FREEZE_PARTITION) || isOneCommandTypeOnly(ASTAlterCommand::FREEZE_ALL);
return isOneCommandTypeOnly(ASTAlterCommand::FREEZE_PARTITION) || isOneCommandTypeOnly(ASTAlterCommand::FREEZE_ALL)
|| isOneCommandTypeOnly(ASTAlterCommand::UNFREEZE_PARTITION) || isOneCommandTypeOnly(ASTAlterCommand::UNFREEZE_ALL);
}
/** Get the text that identifies this element. */

View File

@ -54,6 +54,8 @@ public:
FETCH_PARTITION,
FREEZE_PARTITION,
FREEZE_ALL,
UNFREEZE_PARTITION,
UNFREEZE_ALL,
DELETE,
UPDATE,
@ -153,7 +155,9 @@ public:
*/
String from;
/** For FREEZE PARTITION - place local backup to directory with specified name.
/**
* For FREEZE PARTITION - place local backup to directory with specified name.
* For UNFREEZE - delete local backup at directory with specified name.
*/
String with_name;

View File

@ -63,6 +63,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
ParserKeyword s_fetch_partition("FETCH PARTITION");
ParserKeyword s_replace_partition("REPLACE PARTITION");
ParserKeyword s_freeze("FREEZE");
ParserKeyword s_unfreeze("UNFREEZE");
ParserKeyword s_partition("PARTITION");
ParserKeyword s_first("FIRST");
@ -454,6 +455,37 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
command->with_name = ast_with_name->as<ASTLiteral &>().value.get<const String &>();
}
}
else if (s_unfreeze.ignore(pos, expected))
{
if (s_partition.ignore(pos, expected))
{
if (!parser_partition.parse(pos, command->partition, expected))
return false;
command->type = ASTAlterCommand::UNFREEZE_PARTITION;
}
else
{
command->type = ASTAlterCommand::UNFREEZE_ALL;
}
/// WITH NAME 'name' - remove local backup to directory with specified name
if (s_with.ignore(pos, expected))
{
if (!s_name.ignore(pos, expected))
return false;
ASTPtr ast_with_name;
if (!parser_string_literal.parse(pos, ast_with_name, expected))
return false;
command->with_name = ast_with_name->as<ASTLiteral &>().value.get<const String &>();
}
else
{
return false;
}
}
else if (s_modify_column.ignore(pos, expected))
{
if (s_if_exists.ignore(pos, expected))

View File

@ -722,11 +722,11 @@ void TCPHandler::processTablesStatusRequest()
/// For testing hedged requests
const Settings & settings = query_context->getSettingsRef();
if (settings.sleep_in_send_tables_status)
if (settings.sleep_in_send_tables_status_ms.totalMilliseconds())
{
out->next();
std::chrono::seconds sec(settings.sleep_in_send_tables_status);
std::this_thread::sleep_for(sec);
std::chrono::milliseconds ms(settings.sleep_in_send_tables_status_ms.totalMilliseconds());
std::this_thread::sleep_for(ms);
}
response.write(*out, client_tcp_protocol_version);
@ -1415,11 +1415,11 @@ void TCPHandler::sendData(const Block & block)
/// For testing hedged requests
const Settings & settings = query_context->getSettingsRef();
if (block.rows() > 0 && settings.sleep_in_send_data)
if (block.rows() > 0 && settings.sleep_in_send_data_ms.totalMilliseconds())
{
out->next();
std::chrono::seconds sec(settings.sleep_in_send_data);
std::this_thread::sleep_for(sec);
std::chrono::milliseconds ms(settings.sleep_in_send_data_ms.totalMilliseconds());
std::this_thread::sleep_for(ms);
}
state.block_out->write(block);

View File

@ -1013,7 +1013,7 @@ void IMergeTreeDataPart::renameTo(const String & new_relative_path, bool remove_
}
volume->getDisk()->setLastModified(from, Poco::Timestamp::fromEpochTime(time(nullptr)));
volume->getDisk()->moveFile(from, to);
volume->getDisk()->moveDirectory(from, to);
relative_path = new_relative_path;
SyncGuardPtr sync_guard;
@ -1065,7 +1065,7 @@ void IMergeTreeDataPart::remove(bool keep_s3) const
try
{
volume->getDisk()->moveFile(from, to);
volume->getDisk()->moveDirectory(from, to);
}
catch (const Poco::FileNotFoundException &)
{

View File

@ -500,7 +500,7 @@ void MergeTreeData::checkPartitionKeyAndInitMinMax(const KeyDescription & new_pa
else
{
/// There is more than one DateTime column in partition key and we don't know which one to choose.
minmax_idx_time_column_pos = -1;
minmax_idx_time_column_pos = -1;
}
}
}
@ -1849,11 +1849,6 @@ void MergeTreeData::changeSettings(
}
}
PartitionCommandsResultInfo MergeTreeData::freezeAll(const String & with_name, const StorageMetadataPtr & metadata_snapshot, const Context & context, TableLockHolder &)
{
return freezePartitionsByMatcher([] (const DataPartPtr &) { return true; }, metadata_snapshot, with_name, context);
}
void MergeTreeData::PartsTemporaryRename::addPart(const String & old_name, const String & new_name)
{
old_and_new_names.push_back({old_name, new_name});
@ -2690,44 +2685,6 @@ void MergeTreeData::removePartContributionToColumnSizes(const DataPartPtr & part
}
}
PartitionCommandsResultInfo MergeTreeData::freezePartition(const ASTPtr & partition_ast, const StorageMetadataPtr & metadata_snapshot, const String & with_name, const Context & context, TableLockHolder &)
{
std::optional<String> prefix;
String partition_id;
if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
{
/// Month-partitioning specific - partition value can represent a prefix of the partition to freeze.
if (const auto * partition_lit = partition_ast->as<ASTPartition &>().value->as<ASTLiteral>())
prefix = partition_lit->value.getType() == Field::Types::UInt64
? toString(partition_lit->value.get<UInt64>())
: partition_lit->value.safeGet<String>();
else
partition_id = getPartitionIDFromQuery(partition_ast, context);
}
else
partition_id = getPartitionIDFromQuery(partition_ast, context);
if (prefix)
LOG_DEBUG(log, "Freezing parts with prefix {}", *prefix);
else
LOG_DEBUG(log, "Freezing parts with partition ID {}", partition_id);
return freezePartitionsByMatcher(
[&prefix, &partition_id](const DataPartPtr & part)
{
if (prefix)
return startsWith(part->info.partition_id, *prefix);
else
return part->info.partition_id == partition_id;
},
metadata_snapshot,
with_name,
context);
}
void MergeTreeData::checkAlterPartitionIsPossible(const PartitionCommands & commands, const StorageMetadataPtr & /*metadata_snapshot*/, const Settings & settings) const
{
for (const auto & command : commands)
@ -2957,6 +2914,21 @@ Pipe MergeTreeData::alterPartition(
current_command_results = freezeAll(command.with_name, metadata_snapshot, query_context, lock);
}
break;
case PartitionCommand::UNFREEZE_PARTITION:
{
auto lock = lockForShare(query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout);
current_command_results = unfreezePartition(command.partition, command.with_name, query_context, lock);
}
break;
case PartitionCommand::UNFREEZE_ALL_PARTITIONS:
{
auto lock = lockForShare(query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout);
current_command_results = unfreezeAll(command.with_name, query_context, lock);
}
break;
}
for (auto & command_result : current_command_results)
command_result.command_type = command.typeToString();
@ -3719,7 +3691,60 @@ MergeTreeData::PathsWithDisks MergeTreeData::getRelativeDataPathsWithDisks() con
return res;
}
PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher(MatcherFn matcher, const StorageMetadataPtr & metadata_snapshot, const String & with_name, const Context & context)
MergeTreeData::MatcherFn MergeTreeData::getPartitionMatcher(const ASTPtr & partition_ast, const Context & context) const
{
bool prefixed = false;
String id;
if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
{
/// Month-partitioning specific - partition value can represent a prefix of the partition to freeze.
if (const auto * partition_lit = partition_ast->as<ASTPartition &>().value->as<ASTLiteral>())
{
id = partition_lit->value.getType() == Field::Types::UInt64
? toString(partition_lit->value.get<UInt64>())
: partition_lit->value.safeGet<String>();
prefixed = true;
}
else
id = getPartitionIDFromQuery(partition_ast, context);
}
else
id = getPartitionIDFromQuery(partition_ast, context);
return [prefixed, id](const String & partition_id)
{
if (prefixed)
return startsWith(partition_id, id);
else
return id == partition_id;
};
}
PartitionCommandsResultInfo MergeTreeData::freezePartition(
const ASTPtr & partition_ast,
const StorageMetadataPtr & metadata_snapshot,
const String & with_name,
const Context & context,
TableLockHolder &)
{
return freezePartitionsByMatcher(getPartitionMatcher(partition_ast, context), metadata_snapshot, with_name, context);
}
PartitionCommandsResultInfo MergeTreeData::freezeAll(
const String & with_name,
const StorageMetadataPtr & metadata_snapshot,
const Context & context,
TableLockHolder &)
{
return freezePartitionsByMatcher([] (const String &) { return true; }, metadata_snapshot, with_name, context);
}
PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher(
MatcherFn matcher,
const StorageMetadataPtr & metadata_snapshot,
const String & with_name,
const Context & context)
{
String clickhouse_path = Poco::Path(context.getPath()).makeAbsolute().toString();
String default_shadow_path = clickhouse_path + "shadow/";
@ -3742,7 +3767,7 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher(MatcherFn m
size_t parts_processed = 0;
for (const auto & part : data_parts)
{
if (!matcher(part))
if (!matcher(part->info.partition_id))
continue;
LOG_DEBUG(log, "Freezing part {} snapshot will be placed at {}", part->name, backup_path);
@ -3772,6 +3797,70 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher(MatcherFn m
return result;
}
PartitionCommandsResultInfo MergeTreeData::unfreezePartition(
const ASTPtr & partition,
const String & backup_name,
const Context & context,
TableLockHolder &)
{
return unfreezePartitionsByMatcher(getPartitionMatcher(partition, context), backup_name, context);
}
PartitionCommandsResultInfo MergeTreeData::unfreezeAll(
const String & backup_name,
const Context & context,
TableLockHolder &)
{
return unfreezePartitionsByMatcher([] (const String &) { return true; }, backup_name, context);
}
PartitionCommandsResultInfo MergeTreeData::unfreezePartitionsByMatcher(MatcherFn matcher, const String & backup_name, const Context &)
{
auto backup_path = std::filesystem::path("shadow") / escapeForFileName(backup_name) / relative_data_path;
LOG_DEBUG(log, "Unfreezing parts by path {}", backup_path.generic_string());
PartitionCommandsResultInfo result;
for (const auto & disk : getStoragePolicy()->getDisks())
{
if (!disk->exists(backup_path))
continue;
for (auto it = disk->iterateDirectory(backup_path); it->isValid(); it->next())
{
const auto & partition_directory = it->name();
/// Partition ID is prefix of part directory name: <partition id>_<rest of part directory name>
auto found = partition_directory.find('_');
if (found == std::string::npos)
continue;
auto partition_id = partition_directory.substr(0, found);
if (!matcher(partition_id))
continue;
const auto & path = it->path();
disk->removeRecursive(path);
result.push_back(PartitionCommandResultInfo{
.partition_id = partition_id,
.part_name = partition_directory,
.backup_path = disk->getPath() + backup_path.generic_string(),
.part_backup_path = disk->getPath() + path,
.backup_name = backup_name,
});
LOG_DEBUG(log, "Unfreezed part by path {}", disk->getPath() + path);
}
}
LOG_DEBUG(log, "Unfreezed {} parts", result.size());
return result;
}
bool MergeTreeData::canReplacePartition(const DataPartPtr & src_part) const
{
const auto settings = getSettings();

Some files were not shown because too many files have changed in this diff Show More