Merge branch 'master' into fix-ubsan-add-month

This commit is contained in:
Alexey Milovidov 2021-03-24 22:37:06 +03:00
commit f9b93e968b
113 changed files with 1872 additions and 870 deletions

View File

@ -4,14 +4,26 @@ FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11
RUN apt-get update \
&& apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \
&& apt-get install \
apt-transport-https \
apt-utils \
ca-certificates \
dnsutils \
gnupg \
iputils-ping \
lsb-release \
wget \
--yes --no-install-recommends --verbose-versions \
&& cat /etc/resolv.conf \
&& echo "nameserver 1.1.1.1" >> /etc/resolv.conf \
&& nslookup -debug apt.llvm.org \
&& ping -c1 apt.llvm.org \
&& wget -nv --retry-connrefused --tries=10 -O /tmp/llvm-snapshot.gpg.key https://apt.llvm.org/llvm-snapshot.gpg.key \
&& export LLVM_PUBKEY_HASH="bda960a8da687a275a2078d43c111d66b1c6a893a3275271beedf266c1ff4a0cdecb429c7a5cccf9f486ea7aa43fd27f" \
&& wget -nv -O /tmp/llvm-snapshot.gpg.key https://apt.llvm.org/llvm-snapshot.gpg.key \
&& echo "${LLVM_PUBKEY_HASH} /tmp/llvm-snapshot.gpg.key" | sha384sum -c \
&& apt-key add /tmp/llvm-snapshot.gpg.key \
&& export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
&& echo "deb [trusted=yes] http://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
&& echo "deb [trusted=yes] https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
/etc/apt/sources.list
# initial packages
@ -24,7 +36,10 @@ RUN apt-get update \
software-properties-common \
--yes --no-install-recommends
RUN apt-get update \
RUN cat /etc/resolv.conf \
&& echo "nameserver 1.1.1.1" >> /etc/resolv.conf \
&& nslookup -debug apt.llvm.org \
&& apt-get update \
&& apt-get install \
bash \
cmake \

View File

@ -4,8 +4,9 @@ FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11
RUN apt-get update \
&& apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \
&& apt-get install apt-utils ca-certificates lsb-release wget gnupg apt-transport-https \
--yes --no-install-recommends --verbose-versions \
&& echo "nameserver 1.1.1.1" >> /etc/resolv.conf \
&& export LLVM_PUBKEY_HASH="bda960a8da687a275a2078d43c111d66b1c6a893a3275271beedf266c1ff4a0cdecb429c7a5cccf9f486ea7aa43fd27f" \
&& wget -nv -O /tmp/llvm-snapshot.gpg.key https://apt.llvm.org/llvm-snapshot.gpg.key \
&& echo "${LLVM_PUBKEY_HASH} /tmp/llvm-snapshot.gpg.key" | sha384sum -c \
@ -31,7 +32,8 @@ RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb \
&& chmod +x dpkg-deb \
&& cp dpkg-deb /usr/bin
RUN apt-get update \
RUN echo "nameserver 1.1.1.1" >> /etc/resolv.conf \
&& apt-get update \
&& apt-get install \
clang-${LLVM_VERSION} \
debhelper \

View File

@ -26,6 +26,7 @@ def process_result(result_folder):
with open(err_path, 'r') as f:
if 'AssertionError' in f.read():
summary.append((test, "FAIL"))
status = 'failure'
else:
summary.append((test, "OK"))

View File

@ -11,7 +11,7 @@ service clickhouse-server start && sleep 5
cd /sqlancer/sqlancer-master
export TIMEOUT=60
export TIMEOUT=300
export NUM_QUERIES=1000
( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPWhere | tee /test_output/TLPWhere.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPWhere.err

View File

@ -0,0 +1,284 @@
# CMake in ClickHouse
## TL; DR How to make ClickHouse compile and link faster?
Developer only! This command will likely fulfill most of your needs. Run before calling `ninja`.
```cmake
cmake .. \
-DCMAKE_C_COMPILER=/bin/clang-10 \
-DCMAKE_CXX_COMPILER=/bin/clang++-10 \
-DCMAKE_BUILD_TYPE=Debug \
-DENABLE_CLICKHOUSE_ALL=OFF \
-DENABLE_CLICKHOUSE_SERVER=ON \
-DENABLE_CLICKHOUSE_CLIENT=ON \
-DUSE_STATIC_LIBRARIES=OFF \
-DSPLIT_SHARED_LIBRARIES=ON \
-DENABLE_LIBRARIES=OFF \
-DUSE_UNWIND=ON \
-DENABLE_UTILS=OFF \
-DENABLE_TESTS=OFF
```
## CMake files types
1. ClickHouse's source CMake files (located in the root directory and in `/src`).
2. Arch-dependent CMake files (located in `/cmake/*os_name*`).
3. Libraries finders (search for contrib libraries, located in `/cmake/find`).
3. Contrib build CMake files (used instead of libraries' own CMake files, located in `/cmake/modules`)
## List of CMake flags
* This list is auto-generated by [this Python script](https://github.com/clickhouse/clickhouse/blob/master/docs/tools/cmake_in_clickhouse_generator.py).
* The flag name is a link to its position in the code.
* If an option's default value is itself an option, it's also a link to its position in this list.
### ClickHouse modes
| Name | Default value | Description | Comment |
|------|---------------|-------------|---------|
| <a name="enable-clickhouse-all"></a>[`ENABLE_CLICKHOUSE_ALL`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L8) | `ON` | Enable all ClickHouse modes by default | The `clickhouse` binary is a multi purpose tool that contains multiple execution modes (client, server, etc.), each of them may be built and linked as a separate library. If you do not know what modes you need, turn this option OFF and enable SERVER and CLIENT only. |
| <a name="enable-clickhouse-benchmark"></a>[`ENABLE_CLICKHOUSE_BENCHMARK`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L18) | `ENABLE_CLICKHOUSE_ALL` | Queries benchmarking mode | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-benchmark/ |
| <a name="enable-clickhouse-client"></a>[`ENABLE_CLICKHOUSE_CLIENT`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L11) | `ENABLE_CLICKHOUSE_ALL` | Client mode (interactive tui/shell that connects to the server) | |
| <a name="enable-clickhouse-compressor"></a>[`ENABLE_CLICKHOUSE_COMPRESSOR`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L23) | `ENABLE_CLICKHOUSE_ALL` | Data compressor and decompressor | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-compressor/ |
| <a name="enable-clickhouse-copier"></a>[`ENABLE_CLICKHOUSE_COPIER`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L26) | `ENABLE_CLICKHOUSE_ALL` | Inter-cluster data copying mode | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-copier/ |
| <a name="enable-clickhouse-extract-from-config"></a>[`ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L20) | `ENABLE_CLICKHOUSE_ALL` | Configs processor (extract values etc.) | |
| <a name="enable-clickhouse-format"></a>[`ENABLE_CLICKHOUSE_FORMAT`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L28) | `ENABLE_CLICKHOUSE_ALL` | Queries pretty-printer and formatter with syntax highlighting | |
| <a name="enable-clickhouse-git-import"></a>[`ENABLE_CLICKHOUSE_GIT_IMPORT`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L40) | `ENABLE_CLICKHOUSE_ALL` | A tool to analyze Git repositories | https://presentations.clickhouse.tech/matemarketing_2020/ |
| <a name="enable-clickhouse-install"></a>[`ENABLE_CLICKHOUSE_INSTALL`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L44) | `OFF` | Install ClickHouse without .deb/.rpm/.tgz packages (having the binary only) | |
| <a name="enable-clickhouse-local"></a>[`ENABLE_CLICKHOUSE_LOCAL`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L15) | `ENABLE_CLICKHOUSE_ALL` | Local files fast processing mode | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-local/ |
| <a name="enable-clickhouse-obfuscator"></a>[`ENABLE_CLICKHOUSE_OBFUSCATOR`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L32) | `ENABLE_CLICKHOUSE_ALL` | Table data obfuscator (convert real data to benchmark-ready one) | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-obfuscator/ |
| <a name="enable-clickhouse-odbc-bridge"></a>[`ENABLE_CLICKHOUSE_ODBC_BRIDGE`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L36) | `ENABLE_CLICKHOUSE_ALL` | HTTP-server working like a proxy to ODBC driver | https://clickhouse.tech/docs/en/operations/utilities/odbc-bridge/ |
| <a name="enable-clickhouse-server"></a>[`ENABLE_CLICKHOUSE_SERVER`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L10) | `ENABLE_CLICKHOUSE_ALL` | Server mode (main mode) | |
### External libraries
Note that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.
| Name | Default value | Description | Comment |
|------|---------------|-------------|---------|
| <a name="enable-amqpcpp"></a>[`ENABLE_AMQPCPP`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/amqpcpp.cmake#L1) | `ENABLE_LIBRARIES` | Enalbe AMQP-CPP | |
| <a name="enable-avro"></a>[`ENABLE_AVRO`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/avro.cmake#L2) | `ENABLE_LIBRARIES` | Enable Avro | Needed when using Apache Avro serialization format |
| <a name="enable-base"></a>[`ENABLE_BASE64`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/base64.cmake#L1) | `ENABLE_LIBRARIES` | Enable base64 | |
| <a name="enable-brotli"></a>[`ENABLE_BROTLI`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/brotli.cmake#L1) | `ENABLE_LIBRARIES` | Enable brotli | |
| <a name="enable-capnp"></a>[`ENABLE_CAPNP`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/capnp.cmake#L1) | `ENABLE_LIBRARIES` | Enable Cap'n Proto | |
| <a name="enable-cassandra"></a>[`ENABLE_CASSANDRA`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/cassandra.cmake#L1) | `ENABLE_LIBRARIES` | Enable Cassandra | |
| <a name="enable-ccache"></a>[`ENABLE_CCACHE`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ccache.cmake#L22) | `ENABLE_CCACHE_BY_DEFAULT` | Speedup re-compilations using ccache (external tool) | https://ccache.dev/ |
| <a name="enable-clang-tidy"></a>[`ENABLE_CLANG_TIDY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/analysis.cmake#L2) | `OFF` | Use clang-tidy static analyzer | https://clang.llvm.org/extra/clang-tidy/ |
| <a name="enable-curl"></a>[`ENABLE_CURL`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/curl.cmake#L1) | `ENABLE_LIBRARIES` | Enable curl | |
| <a name="enable-embedded-compiler"></a>[`ENABLE_EMBEDDED_COMPILER`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/llvm.cmake#L5) | `ENABLE_LIBRARIES` | Set to TRUE to enable support for 'compile_expressions' option for query execution | |
| <a name="enable-fastops"></a>[`ENABLE_FASTOPS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/fastops.cmake#L2) | `ENABLE_LIBRARIES` | Enable fast vectorized mathematical functions library by Mikhail Parakhin | |
| <a name="enable-gperf"></a>[`ENABLE_GPERF`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/gperf.cmake#L5) | `ENABLE_LIBRARIES` | Use gperf function hash generator tool | |
| <a name="enable-grpc"></a>[`ENABLE_GRPC`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/grpc.cmake#L8) | `ENABLE_GRPC_DEFAULT` | Use gRPC | |
| <a name="enable-gsasl-library"></a>[`ENABLE_GSASL_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/libgsasl.cmake#L1) | `ENABLE_LIBRARIES` | Enable gsasl library | |
| <a name="enable-h"></a>[`ENABLE_H3`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/h3.cmake#L1) | `ENABLE_LIBRARIES` | Enable H3 | |
| <a name="enable-hdfs"></a>[`ENABLE_HDFS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/hdfs3.cmake#L2) | `ENABLE_LIBRARIES` | Enable HDFS | |
| <a name="enable-icu"></a>[`ENABLE_ICU`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/icu.cmake#L2) | `ENABLE_LIBRARIES` | Enable ICU | |
| <a name="enable-ldap"></a>[`ENABLE_LDAP`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ldap.cmake#L5) | `ENABLE_LIBRARIES` | Enable LDAP | |
| <a name="enable-libpqxx"></a>[`ENABLE_LIBPQXX`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/libpqxx.cmake#L1) | `ENABLE_LIBRARIES` | Enalbe libpqxx | |
| <a name="enable-msgpack"></a>[`ENABLE_MSGPACK`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/msgpack.cmake#L1) | `ENABLE_LIBRARIES` | Enable msgpack library | |
| <a name="enable-mysql"></a>[`ENABLE_MYSQL`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/mysqlclient.cmake#L2) | `ENABLE_LIBRARIES` | Enable MySQL | |
| <a name="enable-nuraft"></a>[`ENABLE_NURAFT`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/nuraft.cmake#L1) | `ENABLE_LIBRARIES` | Enable NuRaft | |
| <a name="enable-odbc"></a>[`ENABLE_ODBC`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/odbc.cmake#L1) | `ENABLE_LIBRARIES` | Enable ODBC library | |
| <a name="enable-orc"></a>[`ENABLE_ORC`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/orc.cmake#L1) | `ENABLE_LIBRARIES` | Enable ORC | |
| <a name="enable-parquet"></a>[`ENABLE_PARQUET`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/parquet.cmake#L2) | `ENABLE_LIBRARIES` | Enable parquet | |
| <a name="enable-protobuf"></a>[`ENABLE_PROTOBUF`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/protobuf.cmake#L1) | `ENABLE_LIBRARIES` | Enable protobuf | |
| <a name="enable-rapidjson"></a>[`ENABLE_RAPIDJSON`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rapidjson.cmake#L1) | `ENABLE_LIBRARIES` | Use rapidjson | |
| <a name="enable-rdkafka"></a>[`ENABLE_RDKAFKA`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rdkafka.cmake#L1) | `ENABLE_LIBRARIES` | Enable kafka | |
| <a name="enable-rocksdb"></a>[`ENABLE_ROCKSDB`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rocksdb.cmake#L1) | `ENABLE_LIBRARIES` | Enable ROCKSDB | |
| <a name="enable-s"></a>[`ENABLE_S3`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/s3.cmake#L2) | `ENABLE_LIBRARIES` | Enable S3 | |
| <a name="enable-ssl"></a>[`ENABLE_SSL`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ssl.cmake#L3) | `ENABLE_LIBRARIES` | Enable ssl | Needed when securely connecting to an external server, e.g. clickhouse-client --host ... --secure |
| <a name="enable-stats"></a>[`ENABLE_STATS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/stats.cmake#L1) | `ENABLE_LIBRARIES` | Enalbe StatsLib library | |
### External libraries system/bundled mode
| Name | Default value | Description | Comment |
|------|---------------|-------------|---------|
| <a name="use-internal-avro-library"></a>[`USE_INTERNAL_AVRO_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/avro.cmake#L11) | `ON` | Set to FALSE to use system avro library instead of bundled | |
| <a name="use-internal-aws-s-library"></a>[`USE_INTERNAL_AWS_S3_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/s3.cmake#L14) | `ON` | Set to FALSE to use system S3 instead of bundled (experimental set to OFF on your own risk) | |
| <a name="use-internal-brotli-library"></a>[`USE_INTERNAL_BROTLI_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/brotli.cmake#L12) | `USE_STATIC_LIBRARIES` | Set to FALSE to use system libbrotli library instead of bundled | Many system ship only dynamic brotly libraries, so we back off to bundled by default |
| <a name="use-internal-capnp-library"></a>[`USE_INTERNAL_CAPNP_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/capnp.cmake#L10) | `NOT_UNBUNDLED` | Set to FALSE to use system capnproto library instead of bundled | |
| <a name="use-internal-curl"></a>[`USE_INTERNAL_CURL`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/curl.cmake#L10) | `NOT_UNBUNDLED` | Use internal curl library | |
| <a name="use-internal-grpc-library"></a>[`USE_INTERNAL_GRPC_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/grpc.cmake#L25) | `NOT_UNBUNDLED` | Set to FALSE to use system gRPC library instead of bundled. (Experimental. Set to OFF on your own risk) | Normally we use the internal gRPC framework. You can set USE_INTERNAL_GRPC_LIBRARY to OFF to force using the external gRPC framework, which should be installed in the system in this case. The external gRPC framework can be installed in the system by running sudo apt-get install libgrpc++-dev protobuf-compiler-grpc |
| <a name="use-internal-gtest-library"></a>[`USE_INTERNAL_GTEST_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/gtest.cmake#L3) | `NOT_UNBUNDLED` | Set to FALSE to use system Google Test instead of bundled | |
| <a name="use-internal-h-library"></a>[`USE_INTERNAL_H3_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/h3.cmake#L9) | `ON` | Set to FALSE to use system h3 library instead of bundled | |
| <a name="use-internal-hdfs-library"></a>[`USE_INTERNAL_HDFS3_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/hdfs3.cmake#L14) | `ON` | Set to FALSE to use system HDFS3 instead of bundled (experimental - set to OFF on your own risk) | |
| <a name="use-internal-icu-library"></a>[`USE_INTERNAL_ICU_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/icu.cmake#L15) | `NOT_UNBUNDLED` | Set to FALSE to use system ICU library instead of bundled | |
| <a name="use-internal-ldap-library"></a>[`USE_INTERNAL_LDAP_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ldap.cmake#L14) | `NOT_UNBUNDLED` | Set to FALSE to use system *LDAP library instead of bundled | |
| <a name="use-internal-libcxx-library"></a>[`USE_INTERNAL_LIBCXX_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/cxx.cmake#L15) | `USE_INTERNAL_LIBCXX_LIBRARY_DEFAULT` | Disable to use system libcxx and libcxxabi libraries instead of bundled | |
| <a name="use-internal-libgsasl-library"></a>[`USE_INTERNAL_LIBGSASL_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/libgsasl.cmake#L12) | `USE_STATIC_LIBRARIES` | Set to FALSE to use system libgsasl library instead of bundled | when USE_STATIC_LIBRARIES we usually need to pick up hell a lot of dependencies for libgsasl |
| <a name="use-internal-libxml-library"></a>[`USE_INTERNAL_LIBXML2_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/libxml2.cmake#L1) | `NOT_UNBUNDLED` | Set to FALSE to use system libxml2 library instead of bundled | |
| <a name="use-internal-llvm-library"></a>[`USE_INTERNAL_LLVM_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/llvm.cmake#L8) | `NOT_UNBUNDLED` | Use bundled or system LLVM library. | |
| <a name="use-internal-msgpack-library"></a>[`USE_INTERNAL_MSGPACK_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/msgpack.cmake#L10) | `NOT_UNBUNDLED` | Set to FALSE to use system msgpack library instead of bundled | |
| <a name="use-internal-mysql-library"></a>[`USE_INTERNAL_MYSQL_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/mysqlclient.cmake#L15) | `NOT_UNBUNDLED` | Set to FALSE to use system mysqlclient library instead of bundled | |
| <a name="use-internal-odbc-library"></a>[`USE_INTERNAL_ODBC_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/odbc.cmake#L22) | `NOT_UNBUNDLED` | Use internal ODBC library | |
| <a name="use-internal-orc-library"></a>[`USE_INTERNAL_ORC_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/orc.cmake#L11) | `ON` | Set to FALSE to use system ORC instead of bundled (experimental set to OFF on your own risk) | |
| <a name="use-internal-parquet-library"></a>[`USE_INTERNAL_PARQUET_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/parquet.cmake#L16) | `NOT_UNBUNDLED` | Set to FALSE to use system parquet library instead of bundled | |
| <a name="use-internal-poco-library"></a>[`USE_INTERNAL_POCO_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/poco.cmake#L1) | `ON` | Use internal Poco library | |
| <a name="use-internal-protobuf-library"></a>[`USE_INTERNAL_PROTOBUF_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/protobuf.cmake#L14) | `NOT_UNBUNDLED` | Set to FALSE to use system protobuf instead of bundled. (Experimental. Set to OFF on your own risk) | Normally we use the internal protobuf library. You can set USE_INTERNAL_PROTOBUF_LIBRARY to OFF to force using the external protobuf library, which should be installed in the system in this case. The external protobuf library can be installed in the system by running sudo apt-get install libprotobuf-dev protobuf-compiler libprotoc-dev |
| <a name="use-internal-rapidjson-library"></a>[`USE_INTERNAL_RAPIDJSON_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rapidjson.cmake#L9) | `NOT_UNBUNDLED` | Set to FALSE to use system rapidjson library instead of bundled | |
| <a name="use-internal-rdkafka-library"></a>[`USE_INTERNAL_RDKAFKA_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rdkafka.cmake#L10) | `NOT_UNBUNDLED` | Set to FALSE to use system librdkafka instead of the bundled | |
| <a name="use-internal-re-library"></a>[`USE_INTERNAL_RE2_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/re2.cmake#L1) | `NOT_UNBUNDLED` | Set to FALSE to use system re2 library instead of bundled [slower] | |
| <a name="use-internal-rocksdb-library"></a>[`USE_INTERNAL_ROCKSDB_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rocksdb.cmake#L10) | `NOT_UNBUNDLED` | Set to FALSE to use system ROCKSDB library instead of bundled | |
| <a name="use-internal-snappy-library"></a>[`USE_INTERNAL_SNAPPY_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/snappy.cmake#L10) | `NOT_UNBUNDLED` | Set to FALSE to use system snappy library instead of bundled | |
| <a name="use-internal-sparsehash-library"></a>[`USE_INTERNAL_SPARSEHASH_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/sparsehash.cmake#L1) | `ON` | Set to FALSE to use system sparsehash library instead of bundled | |
| <a name="use-internal-ssl-library"></a>[`USE_INTERNAL_SSL_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ssl.cmake#L12) | `NOT_UNBUNDLED` | Set to FALSE to use system *ssl library instead of bundled | |
| <a name="use-internal-zlib-library"></a>[`USE_INTERNAL_ZLIB_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/zlib.cmake#L1) | `NOT_UNBUNDLED` | Set to FALSE to use system zlib library instead of bundled | |
| <a name="use-internal-zstd-library"></a>[`USE_INTERNAL_ZSTD_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/zstd.cmake#L1) | `NOT_UNBUNDLED` | Set to FALSE to use system zstd library instead of bundled | |
### Other flags
| Name | Default value | Description | Comment |
|------|---------------|-------------|---------|
| <a name="add-gdb-index-for-gold"></a>[`ADD_GDB_INDEX_FOR_GOLD`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L195) | `OFF` | Add .gdb-index to resulting binaries for gold linker. | Ignored if `lld` is used |
| <a name="arch-native"></a>[`ARCH_NATIVE`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L248) | `OFF` | Add -march=native compiler flag | |
| <a name="clickhouse-split-binary"></a>[`CLICKHOUSE_SPLIT_BINARY`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L98) | `OFF` | Make several binaries (clickhouse-server, clickhouse-client etc.) instead of one bundled | |
| <a name="compiler-pipe"></a>[`COMPILER_PIPE`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L235) | `ON` | -pipe compiler option | Less `/tmp` usage, more RAM usage. |
| <a name="enable-check-heavy-builds"></a>[`ENABLE_CHECK_HEAVY_BUILDS`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L69) | `OFF` | Don't allow C++ translation units to compile too long or to take too much memory while compiling | |
| <a name="enable-fuzzing"></a>[`ENABLE_FUZZING`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L115) | `OFF` | Fuzzy testing using libfuzzer | Implies `WITH_COVERAGE` |
| <a name="enable-libraries"></a>[`ENABLE_LIBRARIES`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L357) | `ON` | Enable all external libraries by default | Turns on all external libs like s3, kafka, ODBC, ... |
| <a name="enable-multitarget-code"></a>[`ENABLE_MULTITARGET_CODE`](https://github.com/clickhouse/clickhouse/blob/master/src/Functions/CMakeLists.txt#L100) | `ON` | Enable platform-dependent code | ClickHouse developers may use platform-dependent code under some macro (e.g. `ifdef ENABLE_MULTITARGET`). If turned ON, this option defines such macro. See `src/Functions/TargetSpecific.h` |
| <a name="enable-tests"></a>[`ENABLE_TESTS`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L154) | `ON` | Provide unit_test_dbms target with Google.Test unit tests | If turned `ON`, assumes the user has either the system GTest library or the bundled one. |
| <a name="enable-thinlto"></a>[`ENABLE_THINLTO`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L313) | `ON` | Clang-specific link time optimization | https://clang.llvm.org/docs/ThinLTO.html Applies to clang only. Disabled when building with tests or sanitizers. |
| <a name="fail-on-unsupported-options-combination"></a>[`FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATION`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L32) | `ON` | Stop/Fail CMake configuration if some ENABLE_XXX option is defined (either ON or OFF) but is not possible to satisfy | If turned off: e.g. when ENABLE_FOO is ON, but FOO tool was not found, the CMake will continue. |
| <a name="glibc-compatibility"></a>[`GLIBC_COMPATIBILITY`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L159) | `ON` | Enable compatibility with older glibc libraries. | Only for Linux, x86_64. Implies `ENABLE_FASTMEMCPY` |
| <a name="linker-name"></a>[`LINKER_NAME`](https://github.com/clickhouse/clickhouse/blob/master/cmake/tools.cmake#L44) | `OFF` | Linker name or full path | Example values: `lld-10`, `gold`. |
| <a name="llvm-has-rtti"></a>[`LLVM_HAS_RTTI`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/llvm.cmake#L40) | `ON` | Enable if LLVM was build with RTTI enabled | |
| <a name="make-static-libraries"></a>[`MAKE_STATIC_LIBRARIES`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L91) | `USE_STATIC_LIBRARIES` | Disable to make shared libraries | |
| <a name="parallel-compile-jobs"></a>[`PARALLEL_COMPILE_JOBS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/limit_jobs.cmake#L10) | `""` | Maximum number of concurrent compilation jobs | 1 if not set |
| <a name="parallel-link-jobs"></a>[`PARALLEL_LINK_JOBS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/limit_jobs.cmake#L13) | `""` | Maximum number of concurrent link jobs | 1 if not set |
| <a name="sanitize"></a>[`SANITIZE`](https://github.com/clickhouse/clickhouse/blob/master/cmake/sanitize.cmake#L7) | `""` | Enable one of the code sanitizers | Possible values: - `address` (ASan) - `memory` (MSan) - `thread` (TSan) - `undefined` (UBSan) - "" (no sanitizing) |
| <a name="split-shared-libraries"></a>[`SPLIT_SHARED_LIBRARIES`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L96) | `OFF` | Keep all internal libraries as separate .so files | DEVELOPER ONLY. Faster linking if turned on. |
| <a name="strip-debug-symbols-functions"></a>[`STRIP_DEBUG_SYMBOLS_FUNCTIONS`](https://github.com/clickhouse/clickhouse/blob/master/src/Functions/CMakeLists.txt#L49) | `STRIP_DSF_DEFAULT` | Do not generate debugger info for ClickHouse functions | Provides faster linking and lower binary size. Tradeoff is the inability to debug some source files with e.g. gdb (empty stack frames and no local variables)." |
| <a name="unbundled"></a>[`UNBUNDLED`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L363) | `OFF` | Use system libraries instead of ones in contrib/ | We recommend avoiding this mode for production builds because we can't guarantee all needed libraries exist in your system. This mode exists for enthusiastic developers who are searching for trouble. Useful for maintainers of OS packages. |
| <a name="use-include-what-you-use"></a>[`USE_INCLUDE_WHAT_YOU_USE`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L418) | `OFF` | Automatically reduce unneeded includes in source code (external tool) | https://github.com/include-what-you-use/include-what-you-use |
| <a name="use-libcxx"></a>[`USE_LIBCXX`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/cxx.cmake#L1) | `NOT_UNBUNDLED` | Use libc++ and libc++abi instead of libstdc++ | |
| <a name="use-sentry"></a>[`USE_SENTRY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/sentry.cmake#L13) | `ENABLE_LIBRARIES` | Use Sentry | |
| <a name="use-simdjson"></a>[`USE_SIMDJSON`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/simdjson.cmake#L1) | `ENABLE_LIBRARIES` | Use simdjson | |
| <a name="use-snappy"></a>[`USE_SNAPPY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/snappy.cmake#L1) | `ENABLE_LIBRARIES` | Enable snappy library | |
| <a name="use-static-libraries"></a>[`USE_STATIC_LIBRARIES`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L90) | `ON` | Disable to use shared libraries | |
| <a name="use-unwind"></a>[`USE_UNWIND`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/unwind.cmake#L1) | `ENABLE_LIBRARIES` | Enable libunwind (better stacktraces) | |
| <a name="werror"></a>[`WERROR`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L373) | `OFF` | Enable -Werror compiler option | Using system libs can cause a lot of warnings in includes (on macro expansion). |
| <a name="weverything"></a>[`WEVERYTHING`](https://github.com/clickhouse/clickhouse/blob/master/cmake/warnings.cmake#L22) | `ON` | Enable -Weverything option with some exceptions. | Add some warnings that are not available even with -Wall -Wextra -Wpedantic. Intended for exploration of new compiler warnings that may be found useful. Applies to clang only |
| <a name="with-coverage"></a>[`WITH_COVERAGE`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L274) | `OFF` | Profile the resulting binary/binaries | Compiler-specific coverage flags e.g. -fcoverage-mapping for gcc |
## Developer's guide for adding new CMake options
### Don't be obvious. Be informative.
Bad:
```cmake
option (ENABLE_TESTS "Enables testing" OFF)
```
This description is quite useless as is neither gives the viewer any additional information nor explains the option purpose.
Better:
```cmake
option(ENABLE_TESTS "Provide unit_test_dbms target with Google.test unit tests" OFF)
```
If the option's purpose can't be guessed by its name, or the purpose guess may be misleading, or option has some
pre-conditions, leave a comment above the `option()` line and explain what it does.
The best way would be linking the docs page (if it exists).
The comment is parsed into a separate column (see below).
Even better:
```cmake
# implies ${TESTS_ARE_ENABLED}
# see tests/CMakeLists.txt for implementation detail.
option(ENABLE_TESTS "Provide unit_test_dbms target with Google.test unit tests" OFF)
```
### If the option's state could produce unwanted (or unusual) result, explicitly warn the user.
Suppose you have an option that may strip debug symbols from the ClickHouse's part.
This can speed up the linking process, but produces a binary that cannot be debugged.
In that case, prefer explicitly raising a warning telling the developer that he may be doing something wrong.
Also, such options should be disabled if applies.
Bad:
```cmake
option(STRIP_DEBUG_SYMBOLS_FUNCTIONS
"Do not generate debugger info for ClickHouse functions.
${STRIP_DSF_DEFAULT})
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
target_compile_options(clickhouse_functions PRIVATE "-g0")
endif()
```
Better:
```cmake
# Provides faster linking and lower binary size.
# Tradeoff is the inability to debug some source files with e.g. gdb
# (empty stack frames and no local variables)."
option(STRIP_DEBUG_SYMBOLS_FUNCTIONS
"Do not generate debugger info for ClickHouse functions."
${STRIP_DSF_DEFAULT})
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
message(WARNING "Not generating debugger info for ClickHouse functions")
target_compile_options(clickhouse_functions PRIVATE "-g0")
endif()
```
### In the option's description, explain WHAT the option does rather than WHY it does something.
The WHY explanation should be placed in the comment.
You may find that the option's name is self-descriptive.
Bad:
```cmake
option(ENABLE_THINLTO "Enable Thin LTO. Only applicable for clang. It's also suppressed when building with tests or sanitizers." ON)
```
Better:
```cmake
# Only applicable for clang.
# Turned off when building with tests or sanitizers.
option(ENABLE_THINLTO "Clang-specific link time optimisation" ON).
```
### Don't assume other developers know as much as you do.
In ClickHouse, there are many tools used that an ordinary developer may not know. If you are in doubt, give a link to
the tool's docs. It won't take much of your time.
Bad:
```cmake
option(ENABLE_THINLTO "Enable Thin LTO. Only applicable for clang. It's also suppressed when building with tests or sanitizers." ON)
```
Better (combined with the above hint):
```cmake
# https://clang.llvm.org/docs/ThinLTO.html
# Only applicable for clang.
# Turned off when building with tests or sanitizers.
option(ENABLE_THINLTO "Clang-specific link time optimisation" ON).
```
Other example, bad:
```cmake
option (USE_INCLUDE_WHAT_YOU_USE "Use 'include-what-you-use' tool" OFF)
```
Better:
```cmake
# https://github.com/include-what-you-use/include-what-you-use
option (USE_INCLUDE_WHAT_YOU_USE "Reduce unneeded #include s (external tool)" OFF)
```
### Prefer consistent default values.
CMake allows you to pass a plethora of values representing boolean `true/false`, e.g. `1, ON, YES, ...`.
Prefer the `ON/OFF` values, if possible.

View File

@ -52,7 +52,7 @@ Engines in the family:
- [ODBC](../../engines/table-engines/integrations/odbc.md#table-engine-odbc)
- [JDBC](../../engines/table-engines/integrations/jdbc.md#table-engine-jdbc)
- [HDFS](../../engines/table-engines/integrations/hdfs.md#hdfs)
- [S3](../../engines/table-engines/integrations/s3.md#table_engines-s3)
- [S3](../../engines/table-engines/integrations/s3.md#table-engine-s3)
### Special Engines {#special-engines}

View File

@ -3,23 +3,23 @@ toc_priority: 4
toc_title: S3
---
# S3 {#table_engines-s3}
# S3 Table Engine {#table-engine-s3}
This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ecosystem. This engine is similar
to the [HDFS](../../../engines/table-engines/integrations/hdfs.md#table_engines-hdfs) engine, but provides S3-specific features.
This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ecosystem. This engine is similar to the [HDFS](../../../engines/table-engines/special/file.md#table_engines-hdfs) engine, but provides S3-specific features.
## Usage {#usage}
## Create Table {#creating-a-table}
```sql
``` sql
CREATE TABLE s3_engine_table (name String, value UInt32)
ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression])
```
**Input parameters**
**Engine parameters**
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: *, ?, {abc,def} and {N..M} where N, M — numbers, `abc, def — strings.
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path).
- `format` — The [format](../../../interfaces/formats.md#formats) of the file.
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression`Parameter is optional. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. By default, it will autodetect compression by file extension.
- `compression`Compression type. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Parameter is optional. By default, it will autodetect compression by file extension.
**Example:**
@ -47,6 +47,12 @@ SELECT * FROM s3_engine_table LIMIT 2
│ two │ 2 │
└──────┴───────┘
```
## Virtual columns {#virtual-columns}
- `_path` — Path to the file.
- `_file` — Name of the file.
For more information about virtual columns see [here](../../../engines/table-engines/index.md#table_engines-virtual_columns).
## Implementation Details {#implementation-details}
@ -56,9 +62,9 @@ SELECT * FROM s3_engine_table LIMIT 2
- Indexes.
- Replication.
**Globs in path**
## Wildcards In Path {#wildcards-in-path}
Multiple path components can have globs. For being processed file should exist and match to the whole path pattern. Listing of files determines during `SELECT` (not at `CREATE` moment).
`path` argument can specify multiple files using bash-like wildcards. For being processed file should exist and match to the whole path pattern. Listing of files is determined during `SELECT` (not at `CREATE` moment).
- `*` — Substitutes any number of any characters except `/` including empty string.
- `?` — Substitutes any single character.
@ -67,80 +73,29 @@ Multiple path components can have globs. For being processed file should exist a
Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function.
**Example**
1. Suppose we have several files in CSV format with the following URIs on S3:
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv
2. There are several ways to make a table consisting of all six files:
<!-- -->
```sql
CREATE TABLE table_with_range (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV')
```
3. Another way:
```sql
CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV')
```
4. Table consists of all the files in both directories (all files should satisfy format and schema described in query):
```sql
CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV')
```
!!! warning "Warning"
If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
**Example**
Create table with files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
```sql
CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV')
```
## Virtual Columns {#virtual-columns}
- `_path` — Path to the file.
- `_file` — Name of the file.
**See Also**
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
## S3-related settings {#settings}
## S3-related Settings {#s3-settings}
The following settings can be set before query execution or placed into configuration file.
- `s3_max_single_part_upload_size`Default value is `64Mb`. The maximum size of object to upload using singlepart upload to S3.
- `s3_min_upload_part_size`Default value is `512Mb`. The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html).
- `s3_max_redirects`Default value is `10`. Max number of HTTP redirects S3 hops allowed.
- `s3_max_single_part_upload_size` — The maximum size of object to upload using singlepart upload to S3. Default value is `64Mb`.
- `s3_min_upload_part_size` — The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Default value is `512Mb`.
- `s3_max_redirects` — Max number of S3 redirects hops allowed. Default value is `10`.
Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration.
### Endpoint-based settings {#endpointsettings}
## Endpoint-based Settings {#endpoint-settings}
The following settings can be specified in configuration file for given endpoint (which will be matched by exact prefix of a URL):
- `endpoint`Mandatory. Specifies prefix of an endpoint.
- `access_key_id` and `secret_access_key`Optional. Specifies credentials to use with given endpoint.
- `use_environment_credentials`Optional, default value is `false`. If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint.
- `header`Optional, can be speficied multiple times. Adds specified HTTP header to a request to given endpoint.
- `server_side_encryption_customer_key_base64`Optional. If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
- `endpoint` — Specifies prefix of an endpoint. Mandatory.
- `access_key_id` and `secret_access_key` — Specifies credentials to use with given endpoint. Optional.
- `use_environment_credentials` — If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint. Optional, default value is `false`.
- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be speficied multiple times.
- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. Optional.
Example:
**Example:**
```
``` xml
<s3>
<endpoint-name>
<endpoint>https://storage.yandexcloud.net/my-test-bucket-768/</endpoint>
@ -152,5 +107,50 @@ Example:
</endpoint-name>
</s3>
```
## Usage {#usage-examples}
Suppose we have several files in TSV format with the following URIs on HDFS:
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv'
1. There are several ways to make a table consisting of all six files:
``` sql
CREATE TABLE table_with_range (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV');
```
2. Another way:
``` sql
CREATE TABLE table_with_question_mark (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV');
```
3. Table consists of all the files in both directories (all files should satisfy format and schema described in query):
``` sql
CREATE TABLE table_with_asterisk (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV');
```
!!! warning "Warning"
If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
4. Create table with files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
``` sql
CREATE TABLE big_table (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV');
```
## See also
- [S3 table function](../../../sql-reference/table-functions/s3.md)
[Original article](https://clickhouse.tech/docs/en/engines/table-engines/integrations/s3/) <!--hide-->

View File

@ -123,5 +123,6 @@ toc_title: Adopters
| <a href="https://mkb.ru/" class="favicon">МКБ</a> | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) |
| <a href="https://cft.ru/" class="favicon">ЦФТ</a> | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) |
| <a href="https://www.kakaocorp.com/" class="favicon">kakaocorp</a> | Internet company | — | — | — | [if(kakao)2020 conference](https://if.kakao.com/session/117) |
| <a href="https://www.tesla.com/" class="favicon">Tesla</a> | Electric vehicle and clean energy company | — | — | — | [Vacancy description, March 2021](https://news.ycombinator.com/item?id=26306170) |
[Original article](https://clickhouse.tech/docs/en/introduction/adopters/) <!--hide-->

View File

@ -11,5 +11,6 @@ ClickHouse supports authenticating and managing users using external services.
The following external authenticators and directories are supported:
- [LDAP](./ldap.md#external-authenticators-ldap) [Authenticator](./ldap.md#ldap-external-authenticator) and [Directory](./ldap.md#ldap-external-user-directory)
- Kerberos [Authenticator](./kerberos.md#external-authenticators-kerberos)
[Original article](https://clickhouse.tech/docs/en/operations/external-authenticators/index/) <!--hide-->

View File

@ -0,0 +1,115 @@
# Kerberos {#external-authenticators-kerberos}
Existing and properly configured ClickHouse users can be authenticated via Kerberos authentication protocol.
Currently, Kerberos can only be used as an external authenticator for existing users, which are defined in `users.xml` or in local access control paths. Those users may only use HTTP requests and must be able to authenticate using GSS-SPNEGO mechanism.
For this approach, Kerberos must be configured in the system and must be enabled in ClickHouse config.
## Enabling Kerberos in ClickHouse {#enabling-kerberos-in-clickhouse}
To enable Kerberos, one should include `kerberos` section in `config.xml`. This section may contain additional parameters.
#### Parameters:
- `principal` - canonical service principal name that will be acquired and used when accepting security contexts.
- This parameter is optional, if omitted, the default principal will be used.
- `realm` - a realm, that will be used to restrict authentication to only those requests whose initiator's realm matches it.
- This parameter is optional, if omitted, no additional filtering by realm will be applied.
Example (goes into `config.xml`):
```xml
<yandex>
<!- ... -->
<kerberos />
</yandex>
```
With principal specification:
```xml
<yandex>
<!- ... -->
<kerberos>
<principal>HTTP/clickhouse.example.com@EXAMPLE.COM</principal>
</kerberos>
</yandex>
```
With filtering by realm:
```xml
<yandex>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</yandex>
```
!!! warning "Note"
You can define only one `kerberos` section. The presence of multiple `kerberos` sections will force ClickHouse to disable Kerberos authentication.
!!! warning "Note"
`principal` and `realm` sections cannot be specified at the same time. The presence of both `principal` and `realm` sections will force ClickHouse to disable Kerberos authentication.
## Kerberos as an external authenticator for existing users {#kerberos-as-an-external-authenticator-for-existing-users}
Kerberos can be used as a method for verifying the identity of locally defined users (users defined in `users.xml` or in local access control paths). Currently, **only** requests over the HTTP interface can be *kerberized* (via GSS-SPNEGO mechanism).
Kerberos principal name format usually follows this pattern:
- *primary/instance@REALM*
The */instance* part may occur zero or more times. **The *primary* part of the canonical principal name of the initiator is expected to match the kerberized user name for authentication to succeed**.
### Enabling Kerberos in `users.xml` {#enabling-kerberos-in-users-xml}
In order to enable Kerberos authentication for the user, specify `kerberos` section instead of `password` or similar sections in the user definition.
Parameters:
- `realm` - a realm that will be used to restrict authentication to only those requests whose initiator's realm matches it.
- This parameter is optional, if omitted, no additional filtering by realm will be applied.
Example (goes into `users.xml`):
```xml
<yandex>
<!- ... -->
<users>
<!- ... -->
<my_user>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</my_user>
</users>
</yandex>
```
!!! warning "Warning"
Note that Kerberos authentication cannot be used alongside with any other authentication mechanism. The presence of any other sections like `password` alongside `kerberos` will force ClickHouse to shutdown.
!!! info "Reminder"
Note, that now, once user `my_user` uses `kerberos`, Kerberos must be enabled in the main `config.xml` file as described previously.
### Enabling Kerberos using SQL {#enabling-kerberos-using-sql}
When [SQL-driven Access Control and Account Management](../access-rights.md#access-control) is enabled in ClickHouse, users identified by Kerberos can also be created using SQL statements.
```sql
CREATE USER my_user IDENTIFIED WITH kerberos REALM 'EXAMPLE.COM'
```
...or, without filtering by realm:
```sql
CREATE USER my_user IDENTIFIED WITH kerberos
```

View File

@ -1883,6 +1883,53 @@ Possible values:
Default value: `0`.
## insert_shard_id {#insert_shard_id}
If not `0`, specifies the shard of [Distributed](../../engines/table-engines/special/distributed.md#distributed) table into which the data will be inserted synchronously.
If `insert_shard_id` value is incorrect, the server will throw an exception.
To get the number of shards on `requested_cluster`, you can check server config or use this query:
``` sql
SELECT uniq(shard_num) FROM system.clusters WHERE cluster = 'requested_cluster';
```
Possible values:
- 0 — Disabled.
- Any number from `1` to `shards_num` of corresponding [Distributed](../../engines/table-engines/special/distributed.md#distributed) table.
Default value: `0`.
**Example**
Query:
```sql
CREATE TABLE x AS system.numbers ENGINE = MergeTree ORDER BY number;
CREATE TABLE x_dist AS x ENGINE = Distributed('test_cluster_two_shards_localhost', currentDatabase(), x);
INSERT INTO x_dist SELECT * FROM numbers(5) SETTINGS insert_shard_id = 1;
SELECT * FROM x_dist ORDER BY number ASC;
```
Result:
``` text
┌─number─┐
│ 0 │
│ 0 │
│ 1 │
│ 1 │
│ 2 │
│ 2 │
│ 3 │
│ 3 │
│ 4 │
│ 4 │
└────────┘
```
## use_compact_format_in_distributed_parts_names {#use_compact_format_in_distributed_parts_names}
Uses compact format for storing blocks for async (`insert_distributed_sync`) INSERT into tables with `Distributed` engine.

View File

@ -17,5 +17,3 @@ Columns:
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of rows read from all tables and table functions participated in queries.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of bytes read from all tables and table functions participated in queries.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum of the query execution time, in seconds.
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/quota_limits) <!--hide-->

View File

@ -28,5 +28,3 @@ Columns:
## See Also {#see-also}
- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement)
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/quota_usage) <!--hide-->

View File

@ -30,6 +30,4 @@ Columns:
## See Also {#see-also}
- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement)
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/quotas_usage) <!--hide-->
- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement)

View File

@ -4,16 +4,66 @@ toc_priority: 141
# deltaSum {#agg_functions-deltasum}
Syntax: `deltaSum(value)`
Sums the arithmetic difference between consecutive rows. If the difference is negative, it is ignored.
Adds the differences between consecutive rows. If the difference is negative, it is ignored.
`value` must be some integer or floating point type.
**Syntax**
Example:
```sql
select deltaSum(arrayJoin([1, 2, 3])); -- => 2
select deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3])); -- => 7
select deltaSum(arrayJoin([2.25, 3, 4.5])); -- => 2.25
``` sql
deltaSum(value)
```
**Arguments**
- `value` — Input values, must be [Integer](../../data-types/int-uint.md) or [Float](../../data-types/float.md) type.
**Returned value**
- A gained arithmetic difference of the `Integer` or `Float` type.
**Examples**
Query:
``` sql
SELECT deltaSum(arrayJoin([1, 2, 3]));
```
Result:
``` text
┌─deltaSum(arrayJoin([1, 2, 3]))─┐
│ 2 │
└────────────────────────────────┘
```
Query:
``` sql
SELECT deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3]));
```
Result:
``` text
┌─deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3]))─┐
│ 7 │
└───────────────────────────────────────────────┘
```
Query:
``` sql
SELECT deltaSum(arrayJoin([2.25, 3, 4.5]));
```
Result:
``` text
┌─deltaSum(arrayJoin([2.25, 3, 4.5]))─┐
│ 2.25 │
└─────────────────────────────────────┘
```
## See Also {#see-also}
- [runningDifference](../../functions/other-functions.md#other_functions-runningdifference)

View File

@ -36,4 +36,4 @@ For the default user limit the maximum execution time with half a second in 30 m
``` sql
ALTER QUOTA IF EXISTS qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
```
```

View File

@ -18,7 +18,7 @@ CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
```
Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
Parameters `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.

View File

@ -62,7 +62,7 @@ Note that materialized view is influenced by [optimize_on_insert](../../../opera
Views look the same as normal tables. For example, they are listed in the result of the `SHOW TABLES` query.
There isnt a separate query for deleting views. To delete a view, use [DROP TABLE](../../../sql-reference/statements/drop.md).
To delete a view, use [DROP VIEW](../../../sql-reference/statements/drop.md#drop-view). Although `DROP TABLE` works for VIEWs as well.
## Live View (Experimental) {#live-view}

View File

@ -3,17 +3,19 @@ toc_priority: 45
toc_title: s3
---
# s3 {#s3}
# S3 Table Function {#s3-table-function}
Provides table-like interface to select/insert files in S3. This table function is similar to [hdfs](../../sql-reference/table-functions/hdfs.md).
Provides table-like interface to select/insert files in [Amazon S3](https://aws.amazon.com/s3/). This table function is similar to [hdfs](../../sql-reference/table-functions/hdfs.md), but provides S3-specific features.
**Syntax**
``` sql
s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression])
```
**Input parameters**
**Arguments**
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: *, ?, {abc,def} and {N..M} where N, M — numbers, `abc, def — strings.
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression` — Parameter is optional. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. By default, it will autodetect compression by file extension.
@ -22,14 +24,14 @@ s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compres
A table with the specified structure for reading or writing data in the specified file.
**Example**
**Examples**
Table from S3 file `https://storage.yandexcloud.net/my-test-bucket-768/data.csv` and selection of the first two rows from it:
Selecting the first two rows from the table from S3 file `https://storage.yandexcloud.net/my-test-bucket-768/data.csv`:
``` sql
SELECT *
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32')
LIMIT 2
LIMIT 2;
```
``` text
@ -44,7 +46,7 @@ The similar but from file with `gzip` compression:
``` sql
SELECT *
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip')
LIMIT 2
LIMIT 2;
```
``` text
@ -54,33 +56,20 @@ LIMIT 2
└─────────┴─────────┴─────────┘
```
**Globs in path**
## Usage {#usage-examples}
Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix).
Suppose that we have several files with following URIs on S3:
- `*` — Substitutes any number of any characters except `/` including empty string.
- `?` — Substitutes any single character.
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
- `{N..M}` — Substitutes any number in range from N to M including both borders. N and M can have leading zeroes e.g. `000..078`.
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_4.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_4.csv'
Constructions with `{}` are similar to the [remote table function](../../sql-reference/table-functions/remote.md)).
**Example**
1. Suppose that we have several files with following URIs on S3:
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_4.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_4.csv
2. Query the amount of rows in files end with number from 1 to 3:
<!-- -->
Count the amount of rows in files ending with numbers from 1 to 3:
``` sql
SELECT count(*)
@ -93,9 +82,7 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi
└─────────┘
```
3. Query the amount of rows in all files of these two directories:
<!-- -->
Count the total amount of rows in all files in these two directories:
``` sql
SELECT count(*)
@ -108,17 +95,14 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi
└─────────┘
```
!!! warning "Warning"
If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
**Example**
Query the data from files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
``` sql
SELECT count(*)
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32')
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32');
```
``` text
@ -127,42 +111,22 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000
└─────────┘
```
**Data insert**
The S3 table function may be used for data insert as well.
**Example**
Insert a data into file `test-data.csv.gz`:
Insert data into file `test-data.csv.gz`:
``` sql
INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
VALUES ('test-data', 1), ('test-data-2', 2)
VALUES ('test-data', 1), ('test-data-2', 2);
```
Insert a data into file `test-data.csv.gz` from existing table:
Insert data into file `test-data.csv.gz` from existing table:
``` sql
INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
SELECT name, value FROM existing_table
SELECT name, value FROM existing_table;
```
## Virtual Columns {#virtual-columns}
- `_path` — Path to the file.
- `_file` — Name of the file.
## S3-related settings {#settings}
The following settings can be set before query execution or placed into configuration file.
- `s3_max_single_part_upload_size` — Default value is `64Mb`. The maximum size of object to upload using singlepart upload to S3.
- `s3_min_upload_part_size` — Default value is `512Mb`. The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html).
- `s3_max_redirects` — Default value is `10`. Max number of S3 redirects hops allowed.
Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration.
**See Also**
- [Virtual columns](../../engines/table-engines/index.md#table_engines-virtual_columns)
- [S3 engine](../../engines/table-engines/integrations/s3.md)
[Original article](https://clickhouse.tech/docs/en/sql-reference/table-functions/s3/) <!--hide-->

View File

@ -3,144 +3,92 @@ toc_priority: 4
toc_title: S3
---
# S3 {#table_engines-s3}
# Движок таблиц S3 {#table-engine-s3}
Этот движок обеспечивает интеграцию с экосистемой [Amazon S3](https://aws.amazon.com/s3/). Этот движок похож на
движок [HDFS](../../../engines/table-engines/integrations/hdfs.md#table_engines-hdfs), но предоставляет S3-специфичные функции.
Этот движок обеспечивает интеграцию с экосистемой [Amazon S3](https://aws.amazon.com/s3/). Он похож на движок [HDFS](../../../engines/table-engines/special/file.md#table_engines-hdfs), но обеспечивает специфические для S3 возможности.
## Использование {#usage}
## Создание таблицы {#creating-a-table}
```sql
``` sql
CREATE TABLE s3_engine_table (name String, value UInt32)
ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression])
```
**Параметры**
**Параметры движка**
- `path` — URL ссылающийся на файл расположенный в S3. В режиме для чтения можно читать несколько файлов как один, поддерживаются следующие шаблоны для указания маски пути к файлам: *, ?, {abc,def} и {N..M} где N, M — числа, `abc, def — строки.
- `format` — [Формат](../../../interfaces/formats.md#formats) файла.
- `structure`Структура таблицы. Формат `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression`Алгоритм сжатия, не обязятельный параметр. Поддерживаемые значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. По умолчанию, алгоритм сжатия будет автоматически применен в зависимости от расширения в имени файла.
- `path` — URL-адрес бакета с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": `*`, `?`, `{abc,def}` и `{N..M}` где `N`, `M` — числа, `'abc'`, `'def'` — строки. Подробнее смотри [ниже](#wildcards-in-path).
- `format` — [формат](../../../interfaces/formats.md#formats) файла.
- `structure`структура таблицы в формате `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression`тип сжатия. Возможные значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Необязательный параметр. Если не указано, то тип сжатия определяется автоматически по расширению файла.
**Пример:**
**Пример**
**1.** Создание таблицы `s3_engine_table` :
```sql
CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE=S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
``` sql
CREATE TABLE s3_engine_table (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip');
INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3);
SELECT * FROM s3_engine_table LIMIT 2;
```
**2.** Заполнение файла:
```sql
INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3)
```
**3.** Запрос данных:
```sql
SELECT * FROM s3_engine_table LIMIT 2
```
```text
``` text
┌─name─┬─value─┐
│ one │ 1 │
│ two │ 2 │
└──────┴───────┘
```
## Виртуальные столбцы {#virtual-columns}
- `_path` — путь к файлу.
- `_file` — имя файла.
Подробнее про виртуальные столбцы можно прочитать [здесь](../../../engines/table-engines/index.md#table_engines-virtual_columns).
## Детали реализации {#implementation-details}
- Чтение и запись могут быть одновременными и паралельными
- Не поддерживается:
- `ALTER` и `SELECT...SAMPLE` операции.
- Индексы.
- Репликация.
- Чтение и запись могут быть параллельными.
- Не поддерживаются:
- запросы `ALTER` и `SELECT...SAMPLE`,
- индексы,
- репликация.
**Поддержка шаблонов в параметре path**
## Символы подстановки {#wildcards-in-path}
Множество частей параметра `path` поддерживает шаблоны. Для того чтобы быть обработанным файл должен присутствовать в S3 и соответсвовать шаблону. Списки файлов определяются в момент `SELECT` (но не в момент `CREATE`).
Аргумент `path` может указывать на несколько файлов, используя подстановочные знаки. Для обработки файл должен существовать и соответствовать всему шаблону пути. Список файлов определяется во время выполнения запроса `SELECT` (не в момент выполнения запроса `CREATE`).
- `*`Заменяет любой количество любых символов кроме `/` включая пустые строки.
- `?`Заменяет один символ.
- `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`.
- `{N..M}`Заменяет любое числов в диапозоне от N до M включительно. N и M могут иметь лидирующие нули например `000..078`.
- `*`заменяет любое количество любых символов, кроме `/`, включая пустую строку.
- `?`заменяет любые одиночные символы.
- `{some_string, another_string, yet_another_one}` — заменяет любые строки `'some_string', 'another_string', 'yet_another_one'`.
- `{N..M}`заменяет любое число от N до M, включая обе границы. N и M могут иметь ведущие нули, например `000..078`.
Конструкции с`{}` работают также как в табличной функции [remote](../../../sql-reference/table-functions/remote.md).
Конструкции с `{}` аналогичны функции [remote](../../../sql-reference/table-functions/remote.md).
## Настройки движка S3 {#s3-settings}
Перед выполнением запроса или в конфигурационном файле могут быть установлены следующие настройки:
- `s3_max_single_part_upload_size` — максимальный размер объекта для загрузки с использованием однокомпонентной загрузки в S3. Значение по умолчанию — `64 Mб`.
- `s3_min_upload_part_size` — минимальный размер объекта для загрузки при многокомпонентной загрузке в [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Значение по умолчанию — `512 Mб`.
- `s3_max_redirects` — максимальное количество разрешенных переадресаций S3. Значение по умолчанию — `10`.
Соображение безопасности: если злонамеренный пользователь попробует указать произвольные URL-адреса S3, параметр `s3_max_redirects` должен быть установлен в ноль, чтобы избежать атак [SSRF] (https://en.wikipedia.org/wiki/Server-side_request_forgery). Как альтернатива, в конфигурации сервера должен быть указан `remote_host_filter`.
## Настройки конечных точек {#endpoint-settings}
Для конечной точки (которая соответствует точному префиксу URL-адреса) в конфигурационном файле могут быть заданы следующие настройки:
Обязательная настройка:
- `endpoint` — указывает префикс конечной точки.
Необязательные настройки:
- `access_key_id` и `secret_access_key` — указывают учетные данные для использования с данной конечной точкой.
- `use_environment_credentials` — если `true`, S3-клиент будет пытаться получить учетные данные из переменных среды и метаданных Amazon EC2 для данной конечной точки. Значение по умолчанию - `false`.
- `header` — добавляет указанный HTTP-заголовок к запросу на заданную конечную точку. Может быть определен несколько раз.
- `server_side_encryption_customer_key_base64` — устанавливает необходимые заголовки для доступа к объектам S3 с шифрованием SSE-C.
**Пример**
1. Предположим у нас есть некоторые файлы в CSV формате со следующими URIs в S3:
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv
2. Есть несколько способов сделать таблицу состяющую из всех шести файлов:
<!-- -->
```sql
CREATE TABLE table_with_range (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV')
```
3. Другой способ:
```sql
CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV')
```
4. Таблица состоящая из всех файлах в обоих каталогах (все файлы должны удовлетворять формату и схеме описанными в запросе):
```sql
CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV')
```
!!! warning "Предупреждение"
Если список файлов содержит диапозоны номеров с ведующими нулями, используйте конструкции со скобками для каждой цифры или используйте `?`.
**Пример**
Создание таблицы с именами файлов `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
```sql
CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV')
```
## Виртуальные колонки {#virtual-columns}
- `_path` — Path to the file.
- `_file` — Name of the file.
**Смотри также**
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
## S3-специфичные настройки {#settings}
Следующие настройки могут быть заданы при запуске запроса или установлены в конфигурационном файле для пользовательского профиля.
- `s3_max_single_part_upload_size` — По умолчанию `64Mb`. Максикальный размер куска данных для загрузки в S3 как singlepart.
- `s3_min_upload_part_size` — По умолчанию `512Mb`. Минимальный размер куска данных для загрузки в S3 с помощью [S3 Multipart загрузки](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html).
- `s3_max_redirects` — Значение по умолчанию `10`. Максимально допустимое количество HTTP перенаправлений от серверов S3.
Примечания для безопасности: если злоумышленник может указать произвольные ссылки на S3, то лучше выставить `s3_max_redirects` как ноль для избежания атак типа [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) ; или ограничить с помощью `remote_host_filter` список адресов по которым возможно взаимодействие с S3.
### Настройки специфичные для заданной конечной точки {#endpointsettings}
Следующие настройки могут быть указаны в конфигурационном файле для заданной конечной точки (которой будет сопоставлен точный конечный префик URL):
- `endpoint` — Обязательный параметр. Указывает префикс URL для конечной точки.
- `access_key_id` и `secret_access_key`Не обязательно. Задает параметры авторизации для заданной конечной точки.
- `use_environment_credentials`Не обязательный параметр, значение по умолчанию `false`. Если установлено как `true`, S3 клиент будет пытаться получить параметры авторизации из переменных окружения и Amazon EC2 метаданных для заданной конечной точки.
- `header`Не обязательный параметр, может быть указан несколько раз. Добавляет указанный HTTP заголовок к запросу для заданной в `endpoint` URL префикса.
- `server_side_encryption_customer_key_base64`Не обязательный параметр. Если указан, к запросам будут указаны заголовки необходимые для доступа к S3 объектам с SSE-C шифрованием.
Пример:
```
``` xml
<s3>
<endpoint-name>
<endpoint>https://storage.yandexcloud.net/my-test-bucket-768/</endpoint>
@ -153,3 +101,50 @@ CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.
</s3>
```
## Примеры использования {#usage-examples}
Предположим, у нас есть несколько файлов в формате TSV со следующими URL-адресами в HDFS:
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv'
1. Существует несколько способов создать таблицу, включающую в себя все шесть файлов:
``` sql
CREATE TABLE table_with_range (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV');
```
2. Другой способ:
``` sql
CREATE TABLE table_with_question_mark (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV');
```
3. Таблица содержит все файлы в обоих каталогах (все файлы должны соответствовать формату и схеме, описанным в запросе):
``` sql
CREATE TABLE table_with_asterisk (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV');
```
!!! warning "Warning"
Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`.
4. Создание таблицы из файлов с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
``` sql
CREATE TABLE big_table (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV');
```
**Смотрите также**
- [Табличная функция S3](../../../sql-reference/table-functions/s3.md)
[Оригинальная статья](https://clickhouse.tech/docs/ru/engines/table-engines/integrations/s3/) <!--hide-->

View File

@ -11,5 +11,6 @@ ClickHouse поддерживает аутентификацию и управл
Поддерживаются следующие внешние аутентификаторы и каталоги:
- [LDAP](./ldap.md#external-authenticators-ldap) [аутентификатор](./ldap.md#ldap-external-authenticator) и [каталог](./ldap.md#ldap-external-user-directory)
- Kerberos [аутентификатор](./kerberos.md#external-authenticators-kerberos)
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/external-authenticators/index/) <!--hide-->

View File

@ -0,0 +1,118 @@
# Kerberos {#external-authenticators-kerberos}
ClickHouse предоставляет возможность аутентификации существующих (и правильно сконфигурированных) пользователей с использованием Kerberos.
В настоящее время возможно использование Kerberos только как внешнего аутентификатора, то есть для аутентификации уже существующих пользователей с помощью Kerberos. Пользователи, настроенные для Kerberos-аутентификации, могут работать с ClickHouse только через HTTP-интерфейс, причём сами клиенты должны иметь возможность аутентификации с использованием механизма GSS-SPNEGO.
!!! info "!!!"
Для Kerberos-аутентификации необходимо предварительно корректно настроить Kerberos на стороне клиента, на сервере и в конфигурационных файлах самого ClickHouse. Ниже описана лишь конфигурация ClickHouse.
## Настройка Kerberos в ClickHouse {#enabling-kerberos-in-clickhouse}
Для того, чтобы задействовать Kerberos-аутентификацию в ClickHouse, в первую очередь необходимо добавить одну-единственную секцию `kerberos` в `config.xml`.
В секции могут быть указаны дополнительные параметры:
- `principal` &mdash; задаёт имя принципала (canonical service principal name, SPN), используемое при авторизации ClickHouse на Kerberos-сервере.
- Это опциональный параметр, при его отсутствии будет использовано стандартное имя.
- `realm` &mdash; обеспечивает фильтрацию по реалм (realm). Пользователям, чей реалм не совпадает с указанным, будет отказано в аутентификации.
- Это опциональный параметр, при его отсутствии фильтр по реалм применяться не будет.
Примеры, как должен выглядеть файл `config.xml`:
```xml
<yandex>
<!- ... -->
<kerberos />
</yandex>
```
Или, с указанием принципала:
```xml
<yandex>
<!- ... -->
<kerberos>
<principal>HTTP/clickhouse.example.com@EXAMPLE.COM</principal>
</kerberos>
</yandex>
```
Или, с фильтрацией по реалм:
```xml
<yandex>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</yandex>
```
!!! Warning "Важно"
В конфигурационном файле не могут быть указаны одновременно оба параметра. В противном случае, аутентификация с помощью Kerberos будет недоступна для всех пользователей.
!!! Warning "Важно"
В конфигурационном файле может быть не более одной секции `kerberos`. В противном случае, аутентификация с помощью Kerberos будет отключена для всех пользователей.
## Аутентификация пользователей с помощью Kerberos {#kerberos-as-an-external-authenticator-for-existing-users}
Уже существующие пользователи могут воспользоваться аутентификацией с помощью Kerberos. Однако, Kerberos-аутентификация возможна только при использовании HTTP-интерфейса.
Имя принципала (principal name) обычно имеет вид:
- *primary/instance@REALM*
Для успешной аутентификации необходимо, чтобы *primary* совпало с именем пользователя ClickHouse, настроенного для использования Kerberos.
### Настройка Kerberos в `users.xml` {#enabling-kerberos-in-users-xml}
Для того, чтобы пользователь имел возможность производить аутентификацию с помощью Kerberos, достаточно включить секцию `kerberos` в описание пользователя в `users.xml` (например, вместо секции `password` или аналогичной ей).
В секции могут быть указаны дополнительные параметры:
- `realm` &mdash; обеспечивает фильтрацию по реалм (realm): аутентификация будет возможна только при совпадении реалм клиента с указанным.
- Этот параметр является опциональным, при его отсутствии фильтрация применяться не будет.
Пример, как выглядит конфигурация Kerberos в `users.xml`:
```xml
<yandex>
<!- ... -->
<users>
<!- ... -->
<my_user>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</my_user>
</users>
</yandex>
```
!!! Warning "Важно"
Если пользователь настроен для Kerberos-аутентификации, другие виды уатентификации будут для него недоступны. Если наряду с `kerberos` в определении пользователя будет указан какой-либо другой способ аутентификации, ClickHouse завершит работу.
!!! info ""
Ещё раз отметим, что кроме `users.xml`, необходимо также включить Kerberos в `config.xml`.
### Настройка Kerberos через SQL {#enabling-kerberos-using-sql}
Пользователей, использующих Kerberos-аутентификацию, можно создать не только с помощью изменения конфигурационных файлов.
Если SQL-ориентированное управление доступом включено в ClickHouse, можно также создать пользователя, работающего через Kerberos, с помощью SQL.
```sql
CREATE USER my_user IDENTIFIED WITH kerberos REALM 'EXAMPLE.COM'
```
Или, без фильтрации по реалм:
```sql
CREATE USER my_user IDENTIFIED WITH kerberos
```

View File

@ -29,6 +29,8 @@ toc_title: "Квоты"
<!-- Без ограничений. Просто считать соответствующие данные за указанный интервал. -->
<queries>0</queries>
<query_selects>0</query_selects>
<query_inserts>0</query_inserts>
<errors>0</errors>
<result_rows>0</result_rows>
<read_rows>0</read_rows>
@ -48,6 +50,8 @@ toc_title: "Квоты"
<duration>3600</duration>
<queries>1000</queries>
<query_selects>100</query_selects>
<query_inserts>100</query_inserts>
<errors>100</errors>
<result_rows>1000000000</result_rows>
<read_rows>100000000000</read_rows>
@ -58,6 +62,8 @@ toc_title: "Квоты"
<duration>86400</duration>
<queries>10000</queries>
<query_selects>10000</query_selects>
<query_inserts>10000</query_inserts>
<errors>1000</errors>
<result_rows>5000000000</result_rows>
<read_rows>500000000000</read_rows>
@ -74,6 +80,10 @@ toc_title: "Квоты"
`queries` - общее количество запросов;
`query_selects` общее количество запросов `SELECT`.
`query_inserts` общее количество запросов `INSERT`.
`errors` - количество запросов, при выполнении которых было выкинуто исключение;
`result_rows` - суммарное количество строк, отданных в виде результата;

View File

@ -1759,6 +1759,54 @@ ClickHouse генерирует исключение
- [Движок Distributed](../../engines/table-engines/special/distributed.md#distributed)
- [Управление распределёнными таблицами](../../sql-reference/statements/system.md#query-language-system-distributed)
## insert_shard_id {#insert_shard_id}
Если не `0`, указывает, в какой шард [Distributed](../../engines/table-engines/special/distributed.md#distributed) таблицы данные будут вставлены синхронно.
Если значение настройки `insert_shard_id` указано неверно, сервер выдаст ошибку.
Узнать количество шардов `shard_num` на кластере `requested_cluster` можно из конфигурации сервера, либо используя запрос:
``` sql
SELECT uniq(shard_num) FROM system.clusters WHERE cluster = 'requested_cluster';
```
Возможные значения:
- 0 — выключено.
- Любое число от `1` до `shards_num` соответствующей [Distributed](../../engines/table-engines/special/distributed.md#distributed) таблицы.
Значение по умолчанию: `0`.
**Пример**
Запрос:
```sql
CREATE TABLE x AS system.numbers ENGINE = MergeTree ORDER BY number;
CREATE TABLE x_dist AS x ENGINE = Distributed('test_cluster_two_shards_localhost', currentDatabase(), x);
INSERT INTO x_dist SELECT * FROM numbers(5) SETTINGS insert_shard_id = 1;
SELECT * FROM x_dist ORDER BY number ASC;
```
Результат:
``` text
┌─number─┐
│ 0 │
│ 0 │
│ 1 │
│ 1 │
│ 2 │
│ 2 │
│ 3 │
│ 3 │
│ 4 │
│ 4 │
└────────┘
```
## validate_polygons {#validate_polygons}
Включает или отключает генерирование исключения в функции [pointInPolygon](../../sql-reference/functions/geo/index.md#pointinpolygon), если многоугольник самопересекающийся или самокасающийся.

View File

@ -4,16 +4,17 @@
Столбцы:
- `quota_name` ([String](../../sql-reference/data-types/string.md)) — Имя квоты.
- `duration` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Длина временного интервала для расчета потребления ресурсов, в секундах.
- `is_randomized_interval` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Логическое значение. Оно показывает, является ли интервал рандомизированным. Интервал всегда начинается в одно и то же время, если он не рандомизирован. Например, интервал в 1 минуту всегда начинается с целого числа минут (то есть он может начинаться в 11:20:00, но никогда не начинается в 11:20:01), интервал в один день всегда начинается в полночь UTC. Если интервал рандомизирован, то самый первый интервал начинается в произвольное время, а последующие интервалы начинаются один за другим. Значения:
- `0` — Интервал рандомизирован.
- `1` — Интервал не рандомизирован.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное число запросов.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество ошибок.
- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество строк результата.
- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальный объем оперативной памяти в байтах, используемый для хранения результата запроса.
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросе.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество байтов, считываемых из всех таблиц и табличных функций, участвующих в запросе.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Максимальное время выполнения запроса, в секундах.
- `quota_name` ([String](../../sql-reference/data-types/string.md)) — имя квоты.
- `duration` ([UInt32](../../sql-reference/data-types/int-uint.md)) — длина временного интервала для расчета потребления ресурсов, в секундах.
- `is_randomized_interval` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — логическое значение. Оно показывает, является ли интервал рандомизированным. Интервал всегда начинается в одно и то же время, если он не рандомизирован. Например, интервал в 1 минуту всегда начинается с целого числа минут (то есть он может начинаться в 11:20:00, но никогда не начинается в 11:20:01), интервал в один день всегда начинается в полночь UTC. Если интервал рандомизирован, то самый первый интервал начинается в произвольное время, а последующие интервалы начинаются один за другим. Значения:
- `0` — интервал рандомизирован.
- `1` — интервал не рандомизирован.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное число запросов.
- `max_query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное число запросов `SELECT`.
- `max_query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное число запросов `INSERT`.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество ошибок.
- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк результата.
- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальный объем оперативной памяти в байтах, используемый для хранения результата запроса.
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросе.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество байтов, считываемых из всех таблиц и табличных функций, участвующих в запросе.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса, в секундах.

View File

@ -4,27 +4,28 @@
Столбцы:
- `quota_name` ([String](../../sql-reference/data-types/string.md)) — Имя квоты.
- `quota_key`([String](../../sql-reference/data-types/string.md)) — Значение ключа. Например, если keys = `ip_address`, `quota_key` может иметь значение '192.168.1.1'.
- `start_time`([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Время начала расчета потребления ресурсов.
- `end_time`([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Время окончания расчета потребления ресурс
- `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Длина временного интервала для расчета потребления ресурсов, в секундах.
- `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Общее количество запросов на этом интервале.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество запросов.
- `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Число запросов, вызвавших ошибки.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное число ошибок.
- `result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Общее количество строк результата.
- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество строк результата.
- `result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Объем оперативной памяти в байтах, используемый для хранения результата запроса.
- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальный объем оперативной памяти, используемый для хранения результата запроса, в байтах.
- `read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Общее число исходных строк, считываемых из таблиц для выполнения запроса на всех удаленных серверах.
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросах.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Общее количество байт, считанных из всех таблиц и табличных функций, участвующих в запросах.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество байт, считываемых из всех таблиц и табличных функций.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Общее время выполнения запроса, в секундах.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Максимальное время выполнения запроса.
- `quota_name` ([String](../../sql-reference/data-types/string.md)) — имя квоты.
- `quota_key`([String](../../sql-reference/data-types/string.md)) — значение ключа. Например, если keys = `ip_address`, `quota_key` может иметь значение '192.168.1.1'.
- `start_time`([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md))) — время начала расчета потребления ресурсов.
- `end_time`([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md))) — время окончания расчета потребления ресурс
- `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — длина временного интервала для расчета потребления ресурсов, в секундах.
- `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество запросов на этом интервале.
- `query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество запросов `SELECT` на этом интервале.
- `query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество запросов `INSERT` на этом интервале.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество запросов.
- `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — число запросов, вызвавших ошибки.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное число ошибок.
- `result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество строк результата.
- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк результата.
- `result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — объем оперативной памяти в байтах, используемый для хранения результата запроса.
- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальный объем оперативной памяти, используемый для хранения результата запроса, в байтах.
- `read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее число исходных строк, считываемых из таблиц для выполнения запроса на всех удаленных серверах.
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросах.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество байт, считанных из всех таблиц и табличных функций, участвующих в запросах.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество байт, считываемых из всех таблиц и табличных функций.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — общее время выполнения запроса, в секундах.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса.
## Смотрите также {#see-also}
- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement)
- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement)

View File

@ -4,28 +4,31 @@
Столбцы:
- `quota_name` ([String](../../sql-reference/data-types/string.md)) — Имя квоты.
- `quota_key` ([String](../../sql-reference/data-types/string.md)) — Ключ квоты.
- `is_current` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Квота используется для текущего пользователя.
- `start_time` ([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md)))) — Время начала расчета потребления ресурсов.
- `end_time` ([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md)))) — Время окончания расчета потребления ресурсов.
- `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt32](../../sql-reference/data-types/int-uint.md))) — Длина временного интервала для расчета потребления ресурсов, в секундах.
- `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Общее количество запросов на этом интервале.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное число запросов.
- `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Число запросов, вызвавших ошибки.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное число ошибок.
- `result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of rows given as a result.
- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum of source rows read from tables.
- `result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Объем оперативной памяти в байтах, используемый для хранения результата запроса.
- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальный объем оперативной памяти, используемый для хранения результата запроса, в байтах.
- `read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Общее число исходных строк, считываемых из таблиц для выполнения запроса на всех удаленных серверах.
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросах.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Общее количество байт, считанных из всех таблиц и табличных функций, участвующих в запросах.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество байт, считываемых из всех таблиц и табличных функций.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Общее время выполнения запроса, в секундах.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Максимальное время выполнения запроса.
- `quota_name` ([String](../../sql-reference/data-types/string.md)) — имя квоты.
- `quota_key` ([String](../../sql-reference/data-types/string.md)) — ключ квоты.
- `is_current` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — квота используется для текущего пользователя.
- `start_time` ([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md)))) — время начала расчета потребления ресурсов.
- `end_time` ([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md)))) — время окончания расчета потребления ресурсов.
- `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt32](../../sql-reference/data-types/int-uint.md))) — длина временного интервала для расчета потребления ресурсов, в секундах.
- `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество запросов на этом интервале.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное число запросов.
- `query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество запросов `SELECT` на этом интервале.
- `max_query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество запросов `SELECT` на этом интервале.
- `query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество запросов `INSERT` на этом интервале.
- `max_query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество запросов `INSERT` на этом интервале.
- `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — число запросов, вызвавших ошибки.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное число ошибок.
- `result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество строк, приведенных в результате.
- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество исходных строк, считываемых из таблиц.
- `result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — объем оперативной памяти в байтах, используемый для хранения результата запроса.
- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальный объем оперативной памяти, используемый для хранения результата запроса, в байтах.
- `read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее число исходных строк, считываемых из таблиц для выполнения запроса на всех удаленных серверах.
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросах.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество байт, считанных из всех таблиц и табличных функций, участвующих в запросах.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество байт, считываемых из всех таблиц и табличных функций.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — общее время выполнения запроса, в секундах.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса.
## Смотрите также {#see-also}
- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement)
- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement)

View File

@ -0,0 +1,69 @@
---
toc_priority: 141
---
# deltaSum {#agg_functions-deltasum}
Суммирует арифметическую разницу между последовательными строками. Если разница отрицательна — она будет проигнорирована.
**Синтаксис**
``` sql
deltaSum(value)
```
**Аргументы**
- `value` — входные значения, должны быть типа [Integer](../../data-types/int-uint.md) или [Float](../../data-types/float.md).
**Возвращаемое значение**
- накопленная арифметическая разница, типа `Integer` или `Float`.
**Примеры**
Запрос:
``` sql
SELECT deltaSum(arrayJoin([1, 2, 3]));
```
Результат:
``` text
┌─deltaSum(arrayJoin([1, 2, 3]))─┐
│ 2 │
└────────────────────────────────┘
```
Запрос:
``` sql
SELECT deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3]));
```
Результат:
``` text
┌─deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3]))─┐
│ 7 │
└───────────────────────────────────────────────┘
```
Запрос:
``` sql
SELECT deltaSum(arrayJoin([2.25, 3, 4.5]));
```
Результат:
``` text
┌─deltaSum(arrayJoin([2.25, 3, 4.5]))─┐
│ 2.25 │
└─────────────────────────────────────┘
```
## Смотрите также {#see-also}
- [runningDifference](../../functions/other-functions.md#runningdifferencex)

View File

@ -14,14 +14,14 @@ ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name]
[RENAME TO new_name]
[KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED]
[FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
{MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
{MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
NO LIMITS | TRACKING ONLY} [,...]]
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
```
Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md).
Параметры `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).
@ -37,5 +37,4 @@ ALTER QUOTA IF EXISTS qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER
``` sql
ALTER QUOTA IF EXISTS qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
```
```

View File

@ -13,13 +13,13 @@ toc_title: "Квота"
CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
[KEYED BY {user_name | ip_address | client_key | client_key, user_name | client_key, ip_address} | NOT KEYED]
[FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
{MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
{MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
NO LIMITS | TRACKING ONLY} [,...]]
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
```
Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md).
Параметры `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).
@ -35,6 +35,4 @@ CREATE QUOTA qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER;
``` sql
CREATE QUOTA qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
```
<!--hide-->
```

View File

@ -60,5 +60,5 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na
Представления выглядят так же, как обычные таблицы. Например, они перечисляются в результате запроса `SHOW TABLES`.
Отсутствует отдельный запрос для удаления представлений. Чтобы удалить представление, следует использовать `DROP TABLE`.
Чтобы удалить представление, следует использовать [DROP VIEW](../../../sql-reference/statements/drop.md#drop-view). Впрочем, `DROP TABLE` тоже работает для представлений.

View File

@ -23,15 +23,15 @@ toc_title: "Введение"
| Функция | Описание |
|-----------------------|---------------------------------------------------------------------------------------------------------------------------------------|
| [file](../../sql-reference/table-functions/file.md) | Создаёт таблицу с движком [File](../../engines/table-engines/special/file.md). |
| [merge](../../sql-reference/table-functions/merge.md) | Создаёт таблицу с движком [Merge](../../engines/table-engines/special/merge.md). |
| [numbers](../../sql-reference/table-functions/numbers.md) | Создаёт таблицу с единственным столбцом, заполненным целыми числами. |
| [remote](../../sql-reference/table-functions/remote.md) | Предоставляет доступ к удалённым серверам, не создавая таблицу с движком [Distributed](../../engines/table-engines/special/distributed.md). |
| [url](../../sql-reference/table-functions/url.md) | Создаёт таблицу с движком [Url](../../engines/table-engines/special/url.md). |
| [mysql](../../sql-reference/table-functions/mysql.md) | Создаёт таблицу с движком [MySQL](../../engines/table-engines/integrations/mysql.md). |
| [postgresql](../../sql-reference/table-functions/postgresql.md) | Создаёт таблицу с движком [PostgreSQL](../../engines/table-engines/integrations/postgresql.md). |
| [jdbc](../../sql-reference/table-functions/jdbc.md) | Создаёт таблицу с движком [JDBC](../../engines/table-engines/integrations/jdbc.md). |
| [odbc](../../sql-reference/table-functions/odbc.md) | Создаёт таблицу с движком [ODBC](../../engines/table-engines/integrations/odbc.md). |
| [hdfs](../../sql-reference/table-functions/hdfs.md) | Создаёт таблицу с движком [HDFS](../../engines/table-engines/integrations/hdfs.md). |
| [s3](../../sql-reference/table-functions/s3.md) | Создаёт таблицу с движком [S3](../../engines/table-engines/integrations/s3.md). |
| [file](file.md) | Создаёт таблицу с движком [File](../../engines/table-engines/special/file.md). |
| [merge](merge.md) | Создаёт таблицу с движком [Merge](../../engines/table-engines/special/merge.md). |
| [numbers](numbers.md) | Создаёт таблицу с единственным столбцом, заполненным целыми числами. |
| [remote](remote.md) | Предоставляет доступ к удалённым серверам, не создавая таблицу с движком [Distributed](../../engines/table-engines/special/distributed.md). |
| [url](url.md) | Создаёт таблицу с движком [Url](../../engines/table-engines/special/url.md). |
| [mysql](mysql.md) | Создаёт таблицу с движком [MySQL](../../engines/table-engines/integrations/mysql.md). |
| [jdbc](jdbc.md) | Создаёт таблицу с дижком [JDBC](../../engines/table-engines/integrations/jdbc.md). |
| [odbc](odbc.md) | Создаёт таблицу с движком [ODBC](../../engines/table-engines/integrations/odbc.md). |
| [hdfs](hdfs.md) | Создаёт таблицу с движком [HDFS](../../engines/table-engines/integrations/hdfs.md). |
| [s3](s3.md) | Создаёт таблицу с движком [S3](../../engines/table-engines/integrations/s3.md). |
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/table-functions/) <!--hide-->

View File

@ -3,35 +3,41 @@ toc_priority: 45
toc_title: s3
---
# s3 {#s3}
# Табличная Функция S3 {#s3-table-function}
Provides table-like interface to select/insert files in S3. This table function is similar to [hdfs](../../sql-reference/table-functions/hdfs.md).
Предоставляет табличный интерфейс для выбора/вставки файлов в [Amazon S3](https://aws.amazon.com/s3/). Эта табличная функция похожа на [hdfs](../../sql-reference/table-functions/hdfs.md), но обеспечивает специфические для S3 возможности.
**Синтаксис**
``` sql
s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression])
```
**Input parameters**
**Aргументы**
- `path`Bucket url with path to file. Supports following wildcards in readonly mode: *, ?, {abc,def} and {N..M} where N, M — numbers, `abc, def — strings.
- `format`The [format](../../interfaces/formats.md#formats) of the file.
- `structure`Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression`Parameter is optional. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. By default, it will autodetect compression by file extension.
- `path`URL-адрес бакета с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": `*, ?, {abc,def} и {N..M}` где `N, M` — числа, `'abc', 'def'` — строки. Подробнее смотри [здесь](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
- `format`[формат](../../interfaces/formats.md#formats) файла.
- `structure`руктура таблицы. Формат `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression`автоматически обнаруживает сжатие по расширению файла. Возможные значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Необязательный параметр.
**Returned value**
**Возвращаемые значения**
A table with the specified structure for reading or writing data in the specified file.
Таблица с указанной структурой для чтения или записи данных в указанный файл.
**Example**
**Примеры**
Table from S3 file `https://storage.yandexcloud.net/my-test-bucket-768/data.csv` and selection of the first two rows from it:
Создание таблицы из файла S3 `https://storage.yandexcloud.net/my-test-bucket-768/data.csv` и выбор первых трех столбцов из нее:
Запрос:
``` sql
SELECT *
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32')
LIMIT 2
LIMIT 2;
```
Результат:
``` text
┌─column1─┬─column2─┬─column3─┐
│ 1 │ 2 │ 3 │
@ -39,14 +45,18 @@ LIMIT 2
└─────────┴─────────┴─────────┘
```
The similar but from file with `gzip` compression:
То же самое, но файл со сжатием `gzip`:
Запрос:
``` sql
SELECT *
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip')
LIMIT 2
SELECT *
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip')
LIMIT 2;
```
Результат:
``` text
┌─column1─┬─column2─┬─column3─┐
│ 1 │ 2 │ 3 │
@ -54,37 +64,24 @@ LIMIT 2
└─────────┴─────────┴─────────┘
```
**Globs in path**
## Примеры использования {#usage-examples}
Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix).
Предположим, у нас есть несколько файлов со следующими URI на S3:
- `*` — Substitutes any number of any characters except `/` including empty string.
- `?` — Substitutes any single character.
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
- `{N..M}` — Substitutes any number in range from N to M including both borders. N and M can have leading zeroes e.g. `000..078`.
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_4.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_4.csv'
Constructions with `{}` are similar to the [remote table function](../../sql-reference/table-functions/remote.md)).
**Example**
1. Suppose that we have several files with following URIs on S3:
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_4.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_4.csv
2. Query the amount of rows in files end with number from 1 to 3:
<!-- -->
Подсчитаем количество строк в файлах, заканчивающихся цифрами от 1 до 3:
``` sql
SELECT count(*)
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32')
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32');
```
``` text
@ -93,13 +90,11 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi
└─────────┘
```
3. Query the amount of rows in all files of these two directories:
<!-- -->
Подсчитаем общее количество строк во всех файлах этих двух каталогов:
``` sql
SELECT count(*)
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32')
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32');
```
``` text
@ -108,17 +103,14 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi
└─────────┘
```
!!! warning "Warning"
If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`.
**Example**
Query the data from files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
Подсчитаем общее количество строк в файлах с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
``` sql
SELECT count(*)
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32')
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32');
```
``` text
@ -127,42 +119,23 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000
└─────────┘
```
**Data insert**
The S3 table function may be used for data insert as well.
**Example**
Insert a data into file `test-data.csv.gz`:
Запишем данные в файл `test-data.csv.gz`:
``` sql
INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
VALUES ('test-data', 1), ('test-data-2', 2)
VALUES ('test-data', 1), ('test-data-2', 2);
```
Insert a data into file `test-data.csv.gz` from existing table:
Запишем данные из существующей таблицы в файл `test-data.csv.gz`:
``` sql
INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
SELECT name, value FROM existing_table
SELECT name, value FROM existing_table;
```
## Virtual Columns {#virtual-columns}
**Смотрите также**
- `_path` — Path to the file.
- `_file` — Name of the file.
- [Движок таблиц S3](../../engines/table-engines/integrations/s3.md)
## S3-related settings {#settings}
The following settings can be set before query execution or placed into configuration file.
- `s3_max_single_part_upload_size` — Default value is `64Mb`. The maximum size of object to upload using singlepart upload to S3.
- `s3_min_upload_part_size` — Default value is `512Mb`. The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html).
- `s3_max_redirects` — Default value is `10`. Max number of S3 redirects hops allowed.
Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration.
**See Also**
- [Virtual columns](../../engines/table-engines/index.md#table_engines-virtual_columns)
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/table-functions/s3/) <!--hide-->

View File

@ -17,7 +17,7 @@ toc_title: ClickHouse的特性
在一些列式数据库管理系统中(例如InfiniDB CE 和 MonetDB) 并没有使用数据压缩。但是, 若想达到比较优异的性能,数据压缩确实起到了至关重要的作用。
除了在磁盘空间和CPU消耗之间进行不同权衡的高效通用压缩编解码器之外ClickHouse还提供针对特定类型数据的[专用编解码器](../sql-reference/statements/create/table.md#create-query-specialized-codecs)这使得ClickHouse能够与更小的数据库(如时间序列数据库)竞争并超越它们。
除了在磁盘空间和CPU消耗之间进行不同权衡的高效通用压缩编解码器之外ClickHouse还提供针对特定类型数据的[专用编解码器](../sql-reference/statements/create.md#create-query-specialized-codecs)这使得ClickHouse能够与更小的数据库(如时间序列数据库)竞争并超越它们。
## 数据的磁盘存储 {#shu-ju-de-ci-pan-cun-chu}

View File

@ -63,7 +63,7 @@ enum class AccessType
M(ALTER_SETTINGS, "ALTER SETTING, ALTER MODIFY SETTING, MODIFY SETTING", TABLE, ALTER_TABLE) /* allows to execute ALTER MODIFY SETTING */\
M(ALTER_MOVE_PARTITION, "ALTER MOVE PART, MOVE PARTITION, MOVE PART", TABLE, ALTER_TABLE) \
M(ALTER_FETCH_PARTITION, "FETCH PARTITION", TABLE, ALTER_TABLE) \
M(ALTER_FREEZE_PARTITION, "FREEZE PARTITION", TABLE, ALTER_TABLE) \
M(ALTER_FREEZE_PARTITION, "FREEZE PARTITION, UNFREEZE", TABLE, ALTER_TABLE) \
\
M(ALTER_TABLE, "", GROUP, ALTER) \
\

View File

@ -413,7 +413,7 @@ public:
for (const Field & f : keys_to_keep_)
{
keys_to_keep.emplace(f.safeGet<NearestFieldType<T>>());
keys_to_keep.emplace(f.safeGet<T>());
}
}

View File

@ -255,7 +255,7 @@ public:
/// The constant value. It is valid even if the size of the column is 0.
template <typename T>
T getValue() const { return getField().safeGet<NearestFieldType<T>>(); }
T getValue() const { return getField().safeGet<T>(); }
bool isCollationSupported() const override { return data->isCollationSupported(); }
};

View File

@ -107,7 +107,7 @@ public:
{
data.resize_fill(data.size() + length);
}
void insert(const Field & x) override { data.push_back(DB::get<NearestFieldType<T>>(x)); }
void insert(const Field & x) override { data.push_back(DB::get<T>(x)); }
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
void popBack(size_t n) override

View File

@ -261,7 +261,7 @@ public:
void insert(const Field & x) override
{
data.push_back(DB::get<NearestFieldType<T>>(x));
data.push_back(DB::get<T>(x));
}
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;

View File

@ -52,7 +52,7 @@
M(RWLockActiveWriters, "Number of threads holding write lock in a table RWLock.") \
M(GlobalThread, "Number of threads in global thread pool.") \
M(GlobalThreadActive, "Number of threads in global thread pool running a task.") \
M(LocalThread, "Number of threads in local thread pools. Should be similar to GlobalThreadActive.") \
M(LocalThread, "Number of threads in local thread pools. The threads in local thread pools are taken from the global thread pool.") \
M(LocalThreadActive, "Number of threads in local thread pools running a task.") \
M(DistributedFilesToInsert, "Number of pending files to process for asynchronous insertion into Distributed tables. Number of files for every shard is summed.") \
M(TablesToDropQueueSize, "Number of dropped tables, that are waiting for background data removal.") \

View File

@ -14,7 +14,7 @@ PEERDIR(
clickhouse/base/common
clickhouse/base/pcg-random
clickhouse/base/widechar_width
contrib/libs/libcpuid/libcpuid
contrib/libs/libcpuid
contrib/libs/openssl
contrib/libs/poco/NetSSL_OpenSSL
contrib/libs/re2

View File

@ -13,7 +13,7 @@ PEERDIR(
clickhouse/base/common
clickhouse/base/pcg-random
clickhouse/base/widechar_width
contrib/libs/libcpuid/libcpuid
contrib/libs/libcpuid
contrib/libs/openssl
contrib/libs/poco/NetSSL_OpenSSL
contrib/libs/re2

View File

@ -399,10 +399,10 @@ public:
template <typename T>
T & get();
NearestFieldType<std::decay_t<T>> & get();
template <typename T>
const T & get() const
const auto & get() const
{
auto mutable_this = const_cast<std::decay_t<decltype(*this)> *>(this);
return mutable_this->get<T>();
@ -436,21 +436,10 @@ public:
return true;
}
template <typename T> T & safeGet()
{
const Types::Which requested = TypeToEnum<std::decay_t<T>>::value;
if (which != requested)
throw Exception("Bad get: has " + std::string(getTypeName()) + ", requested " + std::string(Types::toString(requested)), ErrorCodes::BAD_GET);
return get<T>();
}
template <typename T> auto & safeGet() const
{ return const_cast<Field *>(this)->safeGet<T>(); }
template <typename T> const T & safeGet() const
{
const Types::Which requested = TypeToEnum<std::decay_t<T>>::value;
if (which != requested)
throw Exception("Bad get: has " + std::string(getTypeName()) + ", requested " + std::string(Types::toString(requested)), ErrorCodes::BAD_GET);
return get<T>();
}
template <typename T> auto & safeGet();
bool operator< (const Field & rhs) const
{
@ -778,22 +767,40 @@ inline constexpr bool isInt64FieldType(Field::Types::Which t)
// Field value getter with type checking in debug builds.
template <typename T>
T & Field::get()
NearestFieldType<std::decay_t<T>> & Field::get()
{
using ValueType = std::decay_t<T>;
// Before storing the value in the Field, we static_cast it to the field
// storage type, so here we return the value of storage type as well.
// Otherwise, it is easy to make a mistake of reinterpret_casting the stored
// value to a different and incompatible type.
// For example, a Float32 value is stored as Float64, and it is incorrect to
// return a reference to this value as Float32.
using StoredType = NearestFieldType<std::decay_t<T>>;
#ifndef NDEBUG
// Disregard signedness when converting between int64 types.
constexpr Field::Types::Which target = TypeToEnum<NearestFieldType<ValueType>>::value;
constexpr Field::Types::Which target = TypeToEnum<StoredType>::value;
if (target != which
&& (!isInt64FieldType(target) || !isInt64FieldType(which)))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid Field get from type {} to type {}", Types::toString(which), Types::toString(target));
#endif
ValueType * MAY_ALIAS ptr = reinterpret_cast<ValueType *>(&storage);
StoredType * MAY_ALIAS ptr = reinterpret_cast<StoredType *>(&storage);
return *ptr;
}
template <typename T>
auto & Field::safeGet()
{
const Types::Which requested = TypeToEnum<NearestFieldType<std::decay_t<T>>>::value;
if (which != requested)
throw Exception("Bad get: has " + std::string(getTypeName()) + ", requested " + std::string(Types::toString(requested)), ErrorCodes::BAD_GET);
return get<T>();
}
template <typename T>
T & Field::reinterpret()
{

View File

@ -220,8 +220,8 @@ class IColumn;
M(Milliseconds, stream_poll_timeout_ms, 500, "Timeout for polling data from/to streaming storages.", 0) \
\
/** Settings for testing hedged requests */ \
M(Int64, sleep_in_send_tables_status, 0, "Time to sleep in sending tables status response in TCPHandler", 0) \
M(Int64, sleep_in_send_data, 0, "Time to sleep in sending data in TCPHandler", 0) \
M(Milliseconds, sleep_in_send_tables_status_ms, 0, "Time to sleep in sending tables status response in TCPHandler", 0) \
M(Milliseconds, sleep_in_send_data_ms, 0, "Time to sleep in sending data in TCPHandler", 0) \
\
M(Bool, insert_allow_materialized_columns, 0, "If setting is enabled, Allow materialized columns in INSERT.", 0) \
M(Seconds, http_connection_timeout, DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT, "HTTP connection timeout.", 0) \

View File

@ -105,7 +105,7 @@ DataTypeEnum<Type>::DataTypeEnum(const Values & values_) : values{values_}
template <typename Type>
void DataTypeEnum<Type>::serializeBinary(const Field & field, WriteBuffer & ostr) const
{
const FieldType x = get<NearestFieldType<FieldType>>(field);
const FieldType x = get<FieldType>(field);
writeBinary(x, ostr);
}
@ -405,7 +405,7 @@ static DataTypePtr createExact(const ASTPtr & arguments)
ErrorCodes::UNEXPECTED_AST_STRUCTURE);
const String & field_name = name_literal->value.get<String>();
const auto value = value_literal->value.get<NearestFieldType<FieldType>>();
const auto value = value_literal->value.get<FieldType>();
if (value > std::numeric_limits<FieldType>::max() || value < std::numeric_limits<FieldType>::min())
throw Exception{"Value " + toString(value) + " for element '" + field_name + "' exceeds range of " + EnumName<FieldType>::value,

View File

@ -152,7 +152,7 @@ template <typename T>
void DataTypeNumberBase<T>::serializeBinary(const Field & field, WriteBuffer & ostr) const
{
/// ColumnVector<T>::ValueType is a narrower type. For example, UInt8, when the Field type is UInt64
typename ColumnVector<T>::ValueType x = get<NearestFieldType<FieldType>>(field);
typename ColumnVector<T>::ValueType x = get<FieldType>(field);
writeBinary(x, ostr);
}

View File

@ -338,7 +338,7 @@ void ComplexKeyHashedDictionary::calculateBytesAllocated()
template <typename T>
void ComplexKeyHashedDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
attribute.null_values = T(null_value.get<T>());
attribute.maps.emplace<ContainerType<T>>();
}
@ -450,7 +450,7 @@ bool ComplexKeyHashedDictionary::setAttributeValue(Attribute & attribute, const
}
}
result = setAttributeValueImpl<AttributeType>(attribute, key, value.get<NearestFieldType<AttributeType>>());
result = setAttributeValueImpl<AttributeType>(attribute, key, value.get<AttributeType>());
};
callOnDictionaryAttributeType(attribute.type, type_call);

View File

@ -370,7 +370,7 @@ void FlatDictionary::calculateBytesAllocated()
template <typename T>
void FlatDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
attribute.null_values = T(null_value.get<T>());
const auto & null_value_ref = std::get<T>(attribute.null_values);
attribute.arrays.emplace<ContainerType<T>>(initial_array_size, null_value_ref);
}
@ -478,7 +478,7 @@ void FlatDictionary::setAttributeValue(Attribute & attribute, const Key id, cons
}
}
setAttributeValueImpl<AttributeType>(attribute, id, value.get<NearestFieldType<AttributeType>>());
setAttributeValueImpl<AttributeType>(attribute, id, value.get<AttributeType>());
};
callOnDictionaryAttributeType(attribute.type, type_call);

View File

@ -451,7 +451,7 @@ void HashedDictionary::calculateBytesAllocated()
template <typename T>
void HashedDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
attribute.null_values = T(null_value.get<T>());
if (!sparse)
attribute.maps = std::make_unique<CollectionType<T>>();
else
@ -565,7 +565,7 @@ bool HashedDictionary::setAttributeValue(Attribute & attribute, const Key id, co
}
}
result = setAttributeValueImpl<AttributeType>(attribute, id, value.get<NearestFieldType<AttributeType>>());
result = setAttributeValueImpl<AttributeType>(attribute, id, value.get<AttributeType>());
};
callOnDictionaryAttributeType(attribute.type, type_call);

View File

@ -595,7 +595,7 @@ void IPAddressDictionary::calculateBytesAllocated()
template <typename T>
void IPAddressDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
attribute.null_values = null_value.isNull() ? T{} : T(null_value.get<NearestFieldType<T>>());
attribute.null_values = null_value.isNull() ? T{} : T(null_value.get<T>());
attribute.maps.emplace<ContainerType<T>>();
}
@ -786,7 +786,7 @@ void IPAddressDictionary::setAttributeValue(Attribute & attribute, const Field &
}
else
{
setAttributeValueImpl<AttributeType>(attribute, value.get<NearestFieldType<AttributeType>>());
setAttributeValueImpl<AttributeType>(attribute, value.get<AttributeType>());
}
};

View File

@ -350,7 +350,7 @@ void RangeHashedDictionary::calculateBytesAllocated()
template <typename T>
void RangeHashedDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
attribute.null_values = T(null_value.get<T>());
attribute.maps = std::make_unique<Collection<T>>();
}
@ -458,7 +458,7 @@ void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const K
}
else
{
value_to_insert = Value<ValueType>{ range, { value.get<NearestFieldType<ValueType>>() }};
value_to_insert = Value<ValueType>{ range, { value.get<ValueType>() }};
}
}

View File

@ -209,7 +209,13 @@ void DiskCacheWrapper::clearDirectory(const String & path)
void DiskCacheWrapper::moveDirectory(const String & from_path, const String & to_path)
{
if (cache_disk->exists(from_path))
{
/// Destination directory may not be empty if previous directory move attempt was failed.
if (cache_disk->exists(to_path) && cache_disk->isDirectory(to_path))
cache_disk->clearDirectory(to_path);
cache_disk->moveDirectory(from_path, to_path);
}
DiskDecorator::moveDirectory(from_path, to_path);
}

View File

@ -40,7 +40,8 @@ struct AddSecondsImpl
{
static constexpr auto name = "addSeconds";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64> execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &)
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &)
{
return {t.whole + delta, t.fractional};
}
@ -60,7 +61,8 @@ struct AddMinutesImpl
{
static constexpr auto name = "addMinutes";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64> execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &)
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &)
{
return {t.whole + delta * 60, t.fractional};
}
@ -80,7 +82,8 @@ struct AddHoursImpl
{
static constexpr auto name = "addHours";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64> execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &)
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &)
{
return {t.whole + delta * 3600, t.fractional};
}
@ -99,7 +102,8 @@ struct AddDaysImpl
{
static constexpr auto name = "addDays";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64> execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
{
return {time_zone.addDays(t.whole, delta), t.fractional};
}
@ -119,7 +123,8 @@ struct AddWeeksImpl
{
static constexpr auto name = "addWeeks";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64> execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
{
return {time_zone.addWeeks(t.whole, delta), t.fractional};
}
@ -139,7 +144,8 @@ struct AddMonthsImpl
{
static constexpr auto name = "addMonths";
static inline DecimalUtils::DecimalComponents<DateTime64> execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
static inline DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
{
return {time_zone.addMonths(t.whole, delta), t.fractional};
}
@ -159,7 +165,8 @@ struct AddQuartersImpl
{
static constexpr auto name = "addQuarters";
static inline DecimalUtils::DecimalComponents<DateTime64> execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
static inline DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
{
return {time_zone.addQuarters(t.whole, delta), t.fractional};
}
@ -179,7 +186,8 @@ struct AddYearsImpl
{
static constexpr auto name = "addYears";
static inline DecimalUtils::DecimalComponents<DateTime64> execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
static inline DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
{
return {time_zone.addYears(t.whole, delta), t.fractional};
}
@ -265,14 +273,16 @@ struct Adder
private:
template <typename FromVectorType, typename ToVectorType, typename DeltaColumnType>
NO_INLINE NO_SANITIZE_UNDEFINED void vectorVector(const FromVectorType & vec_from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, size_t size) const
NO_INLINE NO_SANITIZE_UNDEFINED void vectorVector(
const FromVectorType & vec_from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, size_t size) const
{
for (size_t i = 0; i < size; ++i)
vec_to[i] = transform.execute(vec_from[i], delta.getData()[i], time_zone);
}
template <typename FromType, typename ToVectorType, typename DeltaColumnType>
NO_INLINE NO_SANITIZE_UNDEFINED void constantVector(const FromType & from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, size_t size) const
NO_INLINE NO_SANITIZE_UNDEFINED void constantVector(
const FromType & from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, size_t size) const
{
for (size_t i = 0; i < size; ++i)
vec_to[i] = transform.execute(from, delta.getData()[i], time_zone);

View File

@ -2798,12 +2798,16 @@ private:
auto & out_data = static_cast<typename EnumType::ColumnType &>(*res).getData();
out_data.resize(size);
auto default_enum_value = result_type.getValues().front().second;
if (nullable_col)
{
for (const auto i : ext::range(0, size))
{
if (!nullable_col->isNullAt(i))
out_data[i] = result_type.getValue(col->getDataAt(i));
else
out_data[i] = default_enum_value;
}
}
else

View File

@ -49,8 +49,11 @@ public:
{}
template <typename ... Args>
inline auto execute(const DateTime64 & t, Args && ... args) const
inline auto NO_SANITIZE_UNDEFINED execute(const DateTime64 & t, Args && ... args) const
{
/// Type conversion from float to integer may be required.
/// We are Ok with implementation specific result for out of range and denormals conversion.
if constexpr (TransformHasExecuteOverload_v<DateTime64, decltype(scale_multiplier), Args...>)
{
return wrapped_transform.execute(t, scale_multiplier, std::forward<Args>(args)...);

View File

@ -872,15 +872,18 @@ bool FunctionArrayElement::matchKeyToIndexNumberConst(
if (!data_numeric)
return false;
bool is_integer_field = Field::dispatch([](const auto & value)
std::optional<DataType> index_as_integer;
Field::dispatch([&](const auto & value)
{
return is_integer_v<std::decay_t<decltype(value)>>;
using FieldType = std::decay_t<decltype(value)>;
if constexpr (is_integer_v<FieldType> && std::is_convertible_v<FieldType, DataType>)
index_as_integer = static_cast<DataType>(value);
}, index);
if (!is_integer_field)
if (!index_as_integer)
return false;
MatcherNumberConst<DataType> matcher{data_numeric->getData(), get<DataType>(index)};
MatcherNumberConst<DataType> matcher{data_numeric->getData(), *index_as_integer};
executeMatchKeyToIndex(offsets, matched_idxs, matcher);
return true;
}

View File

@ -49,11 +49,10 @@ struct DivideIntegralByConstantImpl
#pragma GCC diagnostic ignored "-Wsign-compare"
/// Division by -1. By the way, we avoid FPE by division of the largest negative number by -1.
/// And signed integer overflow is well defined in C++20.
if (unlikely(is_signed_v<B> && b == -1))
{
for (size_t i = 0; i < size; ++i)
c_pos[i] = -a_pos[i];
c_pos[i] = -make_unsigned_t<A>(a_pos[i]); /// Avoid UBSan report in signed integer overflow.
return;
}

View File

@ -21,6 +21,7 @@ namespace ProfileEvents
extern const Event S3WriteBytes;
}
namespace DB
{
// S3 protocol does not allow to have multipart upload with more than 10000 parts.
@ -50,9 +51,9 @@ WriteBufferFromS3::WriteBufferFromS3(
, client_ptr(std::move(client_ptr_))
, minimum_upload_part_size(minimum_upload_part_size_)
, max_single_part_upload_size(max_single_part_upload_size_)
, temporary_buffer(Aws::MakeShared<Aws::StringStream>("temporary buffer"))
, last_part_size(0)
{ }
{
allocateBuffer();
}
void WriteBufferFromS3::nextImpl()
{
@ -72,11 +73,17 @@ void WriteBufferFromS3::nextImpl()
if (!multipart_upload_id.empty() && last_part_size > minimum_upload_part_size)
{
writePart();
last_part_size = 0;
temporary_buffer = Aws::MakeShared<Aws::StringStream>("temporary buffer");
allocateBuffer();
}
}
void WriteBufferFromS3::allocateBuffer()
{
temporary_buffer = Aws::MakeShared<Aws::StringStream>("temporary buffer");
temporary_buffer->exceptions(std::ios::badbit);
last_part_size = 0;
}
void WriteBufferFromS3::finalize()
{
/// FIXME move final flush into the caller
@ -130,17 +137,26 @@ void WriteBufferFromS3::createMultipartUpload()
if (outcome.IsSuccess())
{
multipart_upload_id = outcome.GetResult().GetUploadId();
LOG_DEBUG(log, "Multipart upload has created. Upload id: {}", multipart_upload_id);
LOG_DEBUG(log, "Multipart upload has created. Bucket: {}, Key: {}, Upload id: {}", bucket, key, multipart_upload_id);
}
else
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
}
void WriteBufferFromS3::writePart()
{
if (temporary_buffer->tellp() <= 0)
auto size = temporary_buffer->tellp();
LOG_DEBUG(log, "Writing part. Bucket: {}, Key: {}, Upload_id: {}, Size: {}", bucket, key, multipart_upload_id, size);
if (size < 0)
throw Exception("Failed to write part. Buffer in invalid state.", ErrorCodes::S3_ERROR);
if (size == 0)
{
LOG_DEBUG(log, "Skipping writing part. Buffer is empty.");
return;
}
if (part_tags.size() == S3_WARN_MAX_PARTS)
{
@ -154,19 +170,16 @@ void WriteBufferFromS3::writePart()
req.SetKey(key);
req.SetPartNumber(part_tags.size() + 1);
req.SetUploadId(multipart_upload_id);
req.SetContentLength(temporary_buffer->tellp());
req.SetContentLength(size);
req.SetBody(temporary_buffer);
auto outcome = client_ptr->UploadPart(req);
LOG_TRACE(
log, "Writing part. Bucket: {}, Key: {}, Upload_id: {}, Data size: {}", bucket, key, multipart_upload_id, req.GetContentLength());
if (outcome.IsSuccess())
{
auto etag = outcome.GetResult().GetETag();
part_tags.push_back(etag);
LOG_DEBUG(log, "Writing part finished. Total parts: {}, Upload_id: {}, Etag: {}", part_tags.size(), multipart_upload_id, etag);
LOG_DEBUG(log, "Writing part finished. Bucket: {}, Key: {}, Upload_id: {}, Etag: {}, Parts: {}", bucket, key, multipart_upload_id, etag, part_tags.size());
}
else
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
@ -174,7 +187,10 @@ void WriteBufferFromS3::writePart()
void WriteBufferFromS3::completeMultipartUpload()
{
LOG_DEBUG(log, "Completing multipart upload. Bucket: {}, Key: {}, Upload_id: {}", bucket, key, multipart_upload_id);
LOG_DEBUG(log, "Completing multipart upload. Bucket: {}, Key: {}, Upload_id: {}, Parts: {}", bucket, key, multipart_upload_id, part_tags.size());
if (part_tags.empty())
throw Exception("Failed to complete multipart upload. No parts have uploaded", ErrorCodes::S3_ERROR);
Aws::S3::Model::CompleteMultipartUploadRequest req;
req.SetBucket(bucket);
@ -193,22 +209,30 @@ void WriteBufferFromS3::completeMultipartUpload()
auto outcome = client_ptr->CompleteMultipartUpload(req);
if (outcome.IsSuccess())
LOG_DEBUG(log, "Multipart upload has completed. Upload_id: {}", multipart_upload_id);
LOG_DEBUG(log, "Multipart upload has completed. Bucket: {}, Key: {}, Upload_id: {}, Parts: {}", bucket, key, multipart_upload_id, part_tags.size());
else
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
}
void WriteBufferFromS3::makeSinglepartUpload()
{
if (temporary_buffer->tellp() <= 0)
return;
auto size = temporary_buffer->tellp();
LOG_DEBUG(log, "Making single part upload. Bucket: {}, Key: {}", bucket, key);
LOG_DEBUG(log, "Making single part upload. Bucket: {}, Key: {}, Size: {}", bucket, key, size);
if (size < 0)
throw Exception("Failed to make single part upload. Buffer in invalid state", ErrorCodes::S3_ERROR);
if (size == 0)
{
LOG_DEBUG(log, "Skipping single part upload. Buffer is empty.");
return;
}
Aws::S3::Model::PutObjectRequest req;
req.SetBucket(bucket);
req.SetKey(key);
req.SetContentLength(temporary_buffer->tellp());
req.SetContentLength(size);
req.SetBody(temporary_buffer);
if (object_metadata.has_value())
req.SetMetadata(object_metadata.value());

View File

@ -69,6 +69,8 @@ public:
private:
bool finalized = false;
void allocateBuffer();
void createMultipartUpload();
void writePart();
void completeMultipartUpload();

View File

@ -41,8 +41,8 @@ String makeStringsEnum(const std::set<String> & values)
void changeIfArguments(ASTPtr & first, ASTPtr & second)
{
String first_value = first->as<ASTLiteral>()->value.get<NearestFieldType<String>>();
String second_value = second->as<ASTLiteral>()->value.get<NearestFieldType<String>>();
String first_value = first->as<ASTLiteral>()->value.get<String>();
String second_value = second->as<ASTLiteral>()->value.get<String>();
std::set<String> values;
values.insert(first_value);
@ -67,9 +67,9 @@ void changeTransformArguments(ASTPtr & array_to, ASTPtr & other)
{
std::set<String> values;
for (const auto & item : array_to->as<ASTLiteral>()->value.get<NearestFieldType<Array>>())
values.insert(item.get<NearestFieldType<String>>());
values.insert(other->as<ASTLiteral>()->value.get<NearestFieldType<String>>());
for (const auto & item : array_to->as<ASTLiteral>()->value.get<Array>())
values.insert(item.get<String>());
values.insert(other->as<ASTLiteral>()->value.get<String>());
String enum_string = makeStringsEnum(values);
@ -197,7 +197,7 @@ struct ConvertStringsToEnumMatcher
String(literal_other->value.getTypeName()) != "String")
return;
Array array_to = literal_to->value.get<NearestFieldType<Array>>();
Array array_to = literal_to->value.get<Array>();
if (array_to.size() == 0)
return;

View File

@ -823,7 +823,9 @@ private:
if (info->loading_id < min_id)
startLoading(*info, forced_to_reload, *min_id);
return false; /// wait for the next event
/// Wait for the next event if loading wasn't completed, and stop otherwise.
return (info->state_id >= min_id);
};
if (timeout == WAIT)
@ -851,9 +853,10 @@ private:
if (info.state_id >= min_id)
continue;
all_ready = false;
if (info.loading_id < min_id)
startLoading(info, forced_to_reload, *min_id);
all_ready &= (info.state_id >= min_id);
}
return all_ready;
};

View File

@ -299,7 +299,9 @@ AccessRightsElements InterpreterAlterQuery::getRequiredAccessForCommand(const AS
break;
}
case ASTAlterCommand::FREEZE_PARTITION: [[fallthrough]];
case ASTAlterCommand::FREEZE_ALL:
case ASTAlterCommand::FREEZE_ALL: [[fallthrough]];
case ASTAlterCommand::UNFREEZE_PARTITION: [[fallthrough]];
case ASTAlterCommand::UNFREEZE_ALL:
{
required_access.emplace_back(AccessType::ALTER_FREEZE_PARTITION, database, table);
break;

View File

@ -290,8 +290,6 @@ std::optional<Blocks> evaluateExpressionOverConstantCondition(const ASTPtr & nod
{
Blocks result;
// TODO: `node` may be always-false literal.
if (const auto * fn = node->as<ASTFunction>())
{
const auto dnf = analyzeFunction(fn, target_expr);
@ -350,6 +348,14 @@ std::optional<Blocks> evaluateExpressionOverConstantCondition(const ASTPtr & nod
}
}
}
else if (const auto * literal = node->as<ASTLiteral>())
{
// Check if it's always true or false.
if (literal->value.getType() == Field::Types::UInt64 && literal->value.get<UInt64>() == 0)
return {result};
else
return {};
}
return {result};
}

View File

@ -271,6 +271,27 @@ void ASTAlterCommand::formatImpl(
<< " " << DB::quote << with_name;
}
}
else if (type == ASTAlterCommand::UNFREEZE_PARTITION)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "UNFREEZE PARTITION " << (settings.hilite ? hilite_none : "");
partition->formatImpl(settings, state, frame);
if (!with_name.empty())
{
settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << "WITH NAME" << (settings.hilite ? hilite_none : "")
<< " " << DB::quote << with_name;
}
}
else if (type == ASTAlterCommand::UNFREEZE_ALL)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "UNFREEZE";
if (!with_name.empty())
{
settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << "WITH NAME" << (settings.hilite ? hilite_none : "")
<< " " << DB::quote << with_name;
}
}
else if (type == ASTAlterCommand::DELETE)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "DELETE" << (settings.hilite ? hilite_none : "");
@ -368,7 +389,8 @@ bool ASTAlterQuery::isSettingsAlter() const
bool ASTAlterQuery::isFreezeAlter() const
{
return isOneCommandTypeOnly(ASTAlterCommand::FREEZE_PARTITION) || isOneCommandTypeOnly(ASTAlterCommand::FREEZE_ALL);
return isOneCommandTypeOnly(ASTAlterCommand::FREEZE_PARTITION) || isOneCommandTypeOnly(ASTAlterCommand::FREEZE_ALL)
|| isOneCommandTypeOnly(ASTAlterCommand::UNFREEZE_PARTITION) || isOneCommandTypeOnly(ASTAlterCommand::UNFREEZE_ALL);
}
/** Get the text that identifies this element. */

View File

@ -54,6 +54,8 @@ public:
FETCH_PARTITION,
FREEZE_PARTITION,
FREEZE_ALL,
UNFREEZE_PARTITION,
UNFREEZE_ALL,
DELETE,
UPDATE,
@ -153,7 +155,9 @@ public:
*/
String from;
/** For FREEZE PARTITION - place local backup to directory with specified name.
/**
* For FREEZE PARTITION - place local backup to directory with specified name.
* For UNFREEZE - delete local backup at directory with specified name.
*/
String with_name;

View File

@ -63,6 +63,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
ParserKeyword s_fetch_partition("FETCH PARTITION");
ParserKeyword s_replace_partition("REPLACE PARTITION");
ParserKeyword s_freeze("FREEZE");
ParserKeyword s_unfreeze("UNFREEZE");
ParserKeyword s_partition("PARTITION");
ParserKeyword s_first("FIRST");
@ -454,6 +455,37 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
command->with_name = ast_with_name->as<ASTLiteral &>().value.get<const String &>();
}
}
else if (s_unfreeze.ignore(pos, expected))
{
if (s_partition.ignore(pos, expected))
{
if (!parser_partition.parse(pos, command->partition, expected))
return false;
command->type = ASTAlterCommand::UNFREEZE_PARTITION;
}
else
{
command->type = ASTAlterCommand::UNFREEZE_ALL;
}
/// WITH NAME 'name' - remove local backup to directory with specified name
if (s_with.ignore(pos, expected))
{
if (!s_name.ignore(pos, expected))
return false;
ASTPtr ast_with_name;
if (!parser_string_literal.parse(pos, ast_with_name, expected))
return false;
command->with_name = ast_with_name->as<ASTLiteral &>().value.get<const String &>();
}
else
{
return false;
}
}
else if (s_modify_column.ignore(pos, expected))
{
if (s_if_exists.ignore(pos, expected))

View File

@ -722,11 +722,11 @@ void TCPHandler::processTablesStatusRequest()
/// For testing hedged requests
const Settings & settings = query_context->getSettingsRef();
if (settings.sleep_in_send_tables_status)
if (settings.sleep_in_send_tables_status_ms.totalMilliseconds())
{
out->next();
std::chrono::seconds sec(settings.sleep_in_send_tables_status);
std::this_thread::sleep_for(sec);
std::chrono::milliseconds ms(settings.sleep_in_send_tables_status_ms.totalMilliseconds());
std::this_thread::sleep_for(ms);
}
response.write(*out, client_tcp_protocol_version);
@ -1415,11 +1415,11 @@ void TCPHandler::sendData(const Block & block)
/// For testing hedged requests
const Settings & settings = query_context->getSettingsRef();
if (block.rows() > 0 && settings.sleep_in_send_data)
if (block.rows() > 0 && settings.sleep_in_send_data_ms.totalMilliseconds())
{
out->next();
std::chrono::seconds sec(settings.sleep_in_send_data);
std::this_thread::sleep_for(sec);
std::chrono::milliseconds ms(settings.sleep_in_send_data_ms.totalMilliseconds());
std::this_thread::sleep_for(ms);
}
state.block_out->write(block);

View File

@ -1013,7 +1013,7 @@ void IMergeTreeDataPart::renameTo(const String & new_relative_path, bool remove_
}
volume->getDisk()->setLastModified(from, Poco::Timestamp::fromEpochTime(time(nullptr)));
volume->getDisk()->moveFile(from, to);
volume->getDisk()->moveDirectory(from, to);
relative_path = new_relative_path;
SyncGuardPtr sync_guard;
@ -1065,7 +1065,7 @@ void IMergeTreeDataPart::remove(bool keep_s3) const
try
{
volume->getDisk()->moveFile(from, to);
volume->getDisk()->moveDirectory(from, to);
}
catch (const Poco::FileNotFoundException &)
{

View File

@ -500,7 +500,7 @@ void MergeTreeData::checkPartitionKeyAndInitMinMax(const KeyDescription & new_pa
else
{
/// There is more than one DateTime column in partition key and we don't know which one to choose.
minmax_idx_time_column_pos = -1;
minmax_idx_time_column_pos = -1;
}
}
}
@ -1849,11 +1849,6 @@ void MergeTreeData::changeSettings(
}
}
PartitionCommandsResultInfo MergeTreeData::freezeAll(const String & with_name, const StorageMetadataPtr & metadata_snapshot, const Context & context, TableLockHolder &)
{
return freezePartitionsByMatcher([] (const DataPartPtr &) { return true; }, metadata_snapshot, with_name, context);
}
void MergeTreeData::PartsTemporaryRename::addPart(const String & old_name, const String & new_name)
{
old_and_new_names.push_back({old_name, new_name});
@ -2690,44 +2685,6 @@ void MergeTreeData::removePartContributionToColumnSizes(const DataPartPtr & part
}
}
PartitionCommandsResultInfo MergeTreeData::freezePartition(const ASTPtr & partition_ast, const StorageMetadataPtr & metadata_snapshot, const String & with_name, const Context & context, TableLockHolder &)
{
std::optional<String> prefix;
String partition_id;
if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
{
/// Month-partitioning specific - partition value can represent a prefix of the partition to freeze.
if (const auto * partition_lit = partition_ast->as<ASTPartition &>().value->as<ASTLiteral>())
prefix = partition_lit->value.getType() == Field::Types::UInt64
? toString(partition_lit->value.get<UInt64>())
: partition_lit->value.safeGet<String>();
else
partition_id = getPartitionIDFromQuery(partition_ast, context);
}
else
partition_id = getPartitionIDFromQuery(partition_ast, context);
if (prefix)
LOG_DEBUG(log, "Freezing parts with prefix {}", *prefix);
else
LOG_DEBUG(log, "Freezing parts with partition ID {}", partition_id);
return freezePartitionsByMatcher(
[&prefix, &partition_id](const DataPartPtr & part)
{
if (prefix)
return startsWith(part->info.partition_id, *prefix);
else
return part->info.partition_id == partition_id;
},
metadata_snapshot,
with_name,
context);
}
void MergeTreeData::checkAlterPartitionIsPossible(const PartitionCommands & commands, const StorageMetadataPtr & /*metadata_snapshot*/, const Settings & settings) const
{
for (const auto & command : commands)
@ -2957,6 +2914,21 @@ Pipe MergeTreeData::alterPartition(
current_command_results = freezeAll(command.with_name, metadata_snapshot, query_context, lock);
}
break;
case PartitionCommand::UNFREEZE_PARTITION:
{
auto lock = lockForShare(query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout);
current_command_results = unfreezePartition(command.partition, command.with_name, query_context, lock);
}
break;
case PartitionCommand::UNFREEZE_ALL_PARTITIONS:
{
auto lock = lockForShare(query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout);
current_command_results = unfreezeAll(command.with_name, query_context, lock);
}
break;
}
for (auto & command_result : current_command_results)
command_result.command_type = command.typeToString();
@ -3719,7 +3691,60 @@ MergeTreeData::PathsWithDisks MergeTreeData::getRelativeDataPathsWithDisks() con
return res;
}
PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher(MatcherFn matcher, const StorageMetadataPtr & metadata_snapshot, const String & with_name, const Context & context)
MergeTreeData::MatcherFn MergeTreeData::getPartitionMatcher(const ASTPtr & partition_ast, const Context & context) const
{
bool prefixed = false;
String id;
if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
{
/// Month-partitioning specific - partition value can represent a prefix of the partition to freeze.
if (const auto * partition_lit = partition_ast->as<ASTPartition &>().value->as<ASTLiteral>())
{
id = partition_lit->value.getType() == Field::Types::UInt64
? toString(partition_lit->value.get<UInt64>())
: partition_lit->value.safeGet<String>();
prefixed = true;
}
else
id = getPartitionIDFromQuery(partition_ast, context);
}
else
id = getPartitionIDFromQuery(partition_ast, context);
return [prefixed, id](const String & partition_id)
{
if (prefixed)
return startsWith(partition_id, id);
else
return id == partition_id;
};
}
PartitionCommandsResultInfo MergeTreeData::freezePartition(
const ASTPtr & partition_ast,
const StorageMetadataPtr & metadata_snapshot,
const String & with_name,
const Context & context,
TableLockHolder &)
{
return freezePartitionsByMatcher(getPartitionMatcher(partition_ast, context), metadata_snapshot, with_name, context);
}
PartitionCommandsResultInfo MergeTreeData::freezeAll(
const String & with_name,
const StorageMetadataPtr & metadata_snapshot,
const Context & context,
TableLockHolder &)
{
return freezePartitionsByMatcher([] (const String &) { return true; }, metadata_snapshot, with_name, context);
}
PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher(
MatcherFn matcher,
const StorageMetadataPtr & metadata_snapshot,
const String & with_name,
const Context & context)
{
String clickhouse_path = Poco::Path(context.getPath()).makeAbsolute().toString();
String default_shadow_path = clickhouse_path + "shadow/";
@ -3742,7 +3767,7 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher(MatcherFn m
size_t parts_processed = 0;
for (const auto & part : data_parts)
{
if (!matcher(part))
if (!matcher(part->info.partition_id))
continue;
LOG_DEBUG(log, "Freezing part {} snapshot will be placed at {}", part->name, backup_path);
@ -3772,6 +3797,70 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher(MatcherFn m
return result;
}
PartitionCommandsResultInfo MergeTreeData::unfreezePartition(
const ASTPtr & partition,
const String & backup_name,
const Context & context,
TableLockHolder &)
{
return unfreezePartitionsByMatcher(getPartitionMatcher(partition, context), backup_name, context);
}
PartitionCommandsResultInfo MergeTreeData::unfreezeAll(
const String & backup_name,
const Context & context,
TableLockHolder &)
{
return unfreezePartitionsByMatcher([] (const String &) { return true; }, backup_name, context);
}
PartitionCommandsResultInfo MergeTreeData::unfreezePartitionsByMatcher(MatcherFn matcher, const String & backup_name, const Context &)
{
auto backup_path = std::filesystem::path("shadow") / escapeForFileName(backup_name) / relative_data_path;
LOG_DEBUG(log, "Unfreezing parts by path {}", backup_path.generic_string());
PartitionCommandsResultInfo result;
for (const auto & disk : getStoragePolicy()->getDisks())
{
if (!disk->exists(backup_path))
continue;
for (auto it = disk->iterateDirectory(backup_path); it->isValid(); it->next())
{
const auto & partition_directory = it->name();
/// Partition ID is prefix of part directory name: <partition id>_<rest of part directory name>
auto found = partition_directory.find('_');
if (found == std::string::npos)
continue;
auto partition_id = partition_directory.substr(0, found);
if (!matcher(partition_id))
continue;
const auto & path = it->path();
disk->removeRecursive(path);
result.push_back(PartitionCommandResultInfo{
.partition_id = partition_id,
.part_name = partition_directory,
.backup_path = disk->getPath() + backup_path.generic_string(),
.part_backup_path = disk->getPath() + path,
.backup_name = backup_name,
});
LOG_DEBUG(log, "Unfreezed part by path {}", disk->getPath() + path);
}
}
LOG_DEBUG(log, "Unfreezed {} parts", result.size());
return result;
}
bool MergeTreeData::canReplacePartition(const DataPartPtr & src_part) const
{
const auto settings = getSettings();

View File

@ -544,13 +544,6 @@ public:
const ASTPtr & new_settings,
TableLockHolder & table_lock_holder);
/// Freezes all parts.
PartitionCommandsResultInfo freezeAll(
const String & with_name,
const StorageMetadataPtr & metadata_snapshot,
const Context & context,
TableLockHolder & table_lock_holder);
/// Should be called if part data is suspected to be corrupted.
void reportBrokenPart(const String & name) const
{
@ -568,8 +561,32 @@ public:
* Backup is created in directory clickhouse_dir/shadow/i/, where i - incremental number,
* or if 'with_name' is specified - backup is created in directory with specified name.
*/
PartitionCommandsResultInfo freezePartition(const ASTPtr & partition, const StorageMetadataPtr & metadata_snapshot, const String & with_name, const Context & context, TableLockHolder & table_lock_holder);
PartitionCommandsResultInfo freezePartition(
const ASTPtr & partition,
const StorageMetadataPtr & metadata_snapshot,
const String & with_name,
const Context & context,
TableLockHolder & table_lock_holder);
/// Freezes all parts.
PartitionCommandsResultInfo freezeAll(
const String & with_name,
const StorageMetadataPtr & metadata_snapshot,
const Context & context,
TableLockHolder & table_lock_holder);
/// Unfreezes particular partition.
PartitionCommandsResultInfo unfreezePartition(
const ASTPtr & partition,
const String & backup_name,
const Context & context,
TableLockHolder & table_lock_holder);
/// Unfreezes all parts.
PartitionCommandsResultInfo unfreezeAll(
const String & backup_name,
const Context & context,
TableLockHolder & table_lock_holder);
public:
/// Moves partition to specified Disk
@ -939,8 +956,9 @@ protected:
bool isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node, const StorageMetadataPtr & metadata_snapshot) const;
/// Common part for |freezePartition()| and |freezeAll()|.
using MatcherFn = std::function<bool(const DataPartPtr &)>;
using MatcherFn = std::function<bool(const String &)>;
PartitionCommandsResultInfo freezePartitionsByMatcher(MatcherFn matcher, const StorageMetadataPtr & metadata_snapshot, const String & with_name, const Context & context);
PartitionCommandsResultInfo unfreezePartitionsByMatcher(MatcherFn matcher, const String & backup_name, const Context & context);
// Partition helpers
bool canReplacePartition(const DataPartPtr & src_part) const;
@ -1024,6 +1042,9 @@ private:
// Record all query ids which access the table. It's guarded by `query_id_set_mutex` and is always mutable.
mutable std::set<String> query_id_set;
mutable std::mutex query_id_set_mutex;
// Get partition matcher for FREEZE / UNFREEZE queries.
MatcherFn getPartitionMatcher(const ASTPtr & partition, const Context & context) const;
};
/// RAII struct to record big parts that are submerging or emerging.

View File

@ -94,10 +94,25 @@ std::optional<PartitionCommand> PartitionCommand::parse(const ASTAlterCommand *
}
else if (command_ast->type == ASTAlterCommand::FREEZE_ALL)
{
PartitionCommand command;
command.type = PartitionCommand::FREEZE_ALL_PARTITIONS;
command.with_name = command_ast->with_name;
return command;
PartitionCommand res;
res.type = PartitionCommand::FREEZE_ALL_PARTITIONS;
res.with_name = command_ast->with_name;
return res;
}
else if (command_ast->type == ASTAlterCommand::UNFREEZE_PARTITION)
{
PartitionCommand res;
res.type = PartitionCommand::UNFREEZE_PARTITION;
res.partition = command_ast->partition;
res.with_name = command_ast->with_name;
return res;
}
else if (command_ast->type == ASTAlterCommand::UNFREEZE_ALL)
{
PartitionCommand res;
res.type = PartitionCommand::UNFREEZE_ALL_PARTITIONS;
res.with_name = command_ast->with_name;
return res;
}
else
return {};
@ -130,6 +145,10 @@ std::string PartitionCommand::typeToString() const
return "FREEZE ALL";
case PartitionCommand::Type::FREEZE_PARTITION:
return "FREEZE PARTITION";
case PartitionCommand::Type::UNFREEZE_PARTITION:
return "UNFREEZE PARTITION";
case PartitionCommand::Type::UNFREEZE_ALL_PARTITIONS:
return "UNFREEZE ALL";
case PartitionCommand::Type::REPLACE_PARTITION:
return "REPLACE PARTITION";
}

View File

@ -27,6 +27,8 @@ struct PartitionCommand
FETCH_PARTITION,
FREEZE_ALL_PARTITIONS,
FREEZE_PARTITION,
UNFREEZE_ALL_PARTITIONS,
UNFREEZE_PARTITION,
REPLACE_PARTITION,
};
@ -52,7 +54,7 @@ struct PartitionCommand
/// For FETCH PARTITION - path in ZK to the shard, from which to download the partition.
String from_zookeeper_path;
/// For FREEZE PARTITION
/// For FREEZE PARTITION and UNFREEZE
String with_name;
enum MoveDestinationType

View File

@ -1389,7 +1389,7 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const
DataPartsLock lock(mutex);
for (MutableDataPartPtr & part : dst_parts)
dest_table_storage->renameTempPartAndReplace(part, &increment, &transaction, lock);
dest_table_storage->renameTempPartAndReplace(part, &dest_table_storage->increment, &transaction, lock);
removePartsFromWorkingSet(src_parts, true, lock);
transaction.commit(&lock);

View File

@ -0,0 +1,4 @@
<yandex>
<catboost_dynamic_library_path>/etc/clickhouse-server/model/libcatboostmodel.so</catboost_dynamic_library_path>
<models_config>/etc/clickhouse-server/model/model_config.xml</models_config>
</yandex>

View File

@ -0,0 +1,8 @@
<models>
<model>
<type>catboost</type>
<name>titanic</name>
<path>/etc/clickhouse-server/model/model.bin</path>
<lifetime>0</lifetime>
</model>
</models>

View File

@ -0,0 +1,36 @@
import os
import sys
import time
import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance('node', stay_alive=True, main_configs=['config/models_config.xml'])
def copy_file_to_container(local_path, dist_path, container_id):
os.system("docker cp {local} {cont_id}:{dist}".format(local=local_path, cont_id=container_id, dist=dist_path))
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
copy_file_to_container(os.path.join(SCRIPT_DIR, 'model/.'), '/etc/clickhouse-server/model', node.docker_id)
node.restart_clickhouse()
yield cluster
finally:
cluster.shutdown()
def test(started_cluster):
node.query("select modelEvaluate('titanic', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);")

View File

@ -15,28 +15,30 @@ NODES = {'node_' + str(i): None for i in (1, 2, 3)}
NODES['node'] = None
sleep_time = 30
# Sleep time in milliseconds.
sleep_time = 30000
@pytest.fixture(scope="module")
def started_cluster():
NODES['node'] = cluster.add_instance(
'node', with_zookeeper=True, stay_alive=True, main_configs=['configs/remote_servers.xml'], user_configs=['configs/users.xml'])
'node', stay_alive=True, main_configs=['configs/remote_servers.xml'], user_configs=['configs/users.xml'])
for name in NODES:
if name != 'node':
NODES[name] = cluster.add_instance(name, with_zookeeper=True, user_configs=['configs/users1.xml'])
NODES[name] = cluster.add_instance(name, user_configs=['configs/users1.xml'])
try:
cluster.start()
for node_id, node in list(NODES.items()):
node.query('''CREATE TABLE replicated (id UInt32, date Date) ENGINE =
ReplicatedMergeTree('/clickhouse/tables/replicated', '{}') ORDER BY id PARTITION BY toYYYYMM(date)'''.format(node_id))
node.query('''CREATE TABLE test_hedged (id UInt32, date Date) ENGINE =
MergeTree() ORDER BY id PARTITION BY toYYYYMM(date)''')
node.query("INSERT INTO test_hedged select number, toDate(number) from numbers(100);")
NODES['node'].query('''CREATE TABLE distributed (id UInt32, date Date) ENGINE =
Distributed('test_cluster', 'default', 'replicated')''')
NODES['node'].query("INSERT INTO distributed select number, toDate(number) from numbers(100);")
Distributed('test_cluster', 'default', 'test_hedged')''')
yield cluster
@ -47,8 +49,8 @@ def started_cluster():
config = '''<yandex>
<profiles>
<default>
<sleep_in_send_tables_status>{sleep_in_send_tables_status}</sleep_in_send_tables_status>
<sleep_in_send_data>{sleep_in_send_data}</sleep_in_send_data>
<sleep_in_send_tables_status_ms>{sleep_in_send_tables_status_ms}</sleep_in_send_tables_status_ms>
<sleep_in_send_data_ms>{sleep_in_send_data_ms}</sleep_in_send_data_ms>
</default>
</profiles>
</yandex>'''
@ -70,12 +72,12 @@ def check_query(expected_replica, receive_timeout=300):
assert query_time < 10
def check_settings(node_name, sleep_in_send_tables_status, sleep_in_send_data):
def check_settings(node_name, sleep_in_send_tables_status_ms, sleep_in_send_data_ms):
attempts = 0
while attempts < 1000:
setting1 = NODES[node_name].http_query("SELECT value FROM system.settings WHERE name='sleep_in_send_tables_status'")
setting2 = NODES[node_name].http_query("SELECT value FROM system.settings WHERE name='sleep_in_send_data'")
if int(setting1) == sleep_in_send_tables_status and int(setting2) == sleep_in_send_data:
setting1 = NODES[node_name].http_query("SELECT value FROM system.settings WHERE name='sleep_in_send_tables_status_ms'")
setting2 = NODES[node_name].http_query("SELECT value FROM system.settings WHERE name='sleep_in_send_data_ms'")
if int(setting1) == sleep_in_send_tables_status_ms and int(setting2) == sleep_in_send_data_ms:
return
time.sleep(0.1)
attempts += 1
@ -88,7 +90,29 @@ def check_changing_replica_events(expected_count):
assert int(result) == expected_count
def update_configs(node_1_sleep_in_send_tables_status=0, node_1_sleep_in_send_data=0,
node_2_sleep_in_send_tables_status=0, node_2_sleep_in_send_data=0,
node_3_sleep_in_send_tables_status=0, node_3_sleep_in_send_data=0):
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status_ms=node_1_sleep_in_send_tables_status, sleep_in_send_data_ms=node_1_sleep_in_send_data))
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status_ms=node_2_sleep_in_send_tables_status, sleep_in_send_data_ms=node_2_sleep_in_send_data))
NODES['node_3'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status_ms=node_3_sleep_in_send_tables_status, sleep_in_send_data_ms=node_3_sleep_in_send_data))
check_settings('node_1', node_1_sleep_in_send_tables_status, node_1_sleep_in_send_data)
check_settings('node_2', node_2_sleep_in_send_tables_status, node_2_sleep_in_send_data)
check_settings('node_3', node_3_sleep_in_send_tables_status, node_3_sleep_in_send_data)
def test_stuck_replica(started_cluster):
update_configs()
cluster.pause_container("node_1")
check_query(expected_replica="node_2")
@ -111,6 +135,8 @@ def test_stuck_replica(started_cluster):
def test_long_query(started_cluster):
update_configs()
# Restart to reset pool states.
NODES['node'].restart_clickhouse()
@ -121,169 +147,54 @@ def test_long_query(started_cluster):
def test_send_table_status_sleep(started_cluster):
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=sleep_time, sleep_in_send_data=0))
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
NODES['node_3'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
check_settings('node_1', sleep_time, 0)
check_settings('node_2', 0, 0)
check_settings('node_3', 0, 0)
update_configs(node_1_sleep_in_send_tables_status=sleep_time)
check_query(expected_replica="node_2")
check_changing_replica_events(1)
def test_send_table_status_sleep2(started_cluster):
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=sleep_time, sleep_in_send_data=0))
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=sleep_time, sleep_in_send_data=0))
NODES['node_3'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
check_settings('node_1', sleep_time, 0)
check_settings('node_2', sleep_time, 0)
check_settings('node_3', 0, 0)
update_configs(node_1_sleep_in_send_tables_status=sleep_time, node_2_sleep_in_send_tables_status=sleep_time)
check_query(expected_replica="node_3")
check_changing_replica_events(2)
def test_send_data(started_cluster):
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
NODES['node_3'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
check_settings('node_1', 0, sleep_time)
check_settings('node_2', 0, 0)
check_settings('node_3', 0, 0)
update_configs(node_1_sleep_in_send_data=sleep_time)
check_query(expected_replica="node_2")
check_changing_replica_events(1)
def test_send_data2(started_cluster):
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
NODES['node_3'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
check_settings('node_1', 0, sleep_time)
check_settings('node_2', 0, sleep_time)
check_settings('node_3', 0, 0)
update_configs(node_1_sleep_in_send_data=sleep_time, node_2_sleep_in_send_data=sleep_time)
check_query(expected_replica="node_3")
check_changing_replica_events(2)
def test_combination1(started_cluster):
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=sleep_time, sleep_in_send_data=0))
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
NODES['node_3'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
check_settings('node_1', sleep_time, 0)
check_settings('node_2', 0, sleep_time)
check_settings('node_3', 0, 0)
update_configs(node_1_sleep_in_send_tables_status=sleep_time, node_2_sleep_in_send_data=sleep_time)
check_query(expected_replica="node_3")
check_changing_replica_events(2)
def test_combination2(started_cluster):
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=sleep_time, sleep_in_send_data=0))
NODES['node_3'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
check_settings('node_1', 0, sleep_time)
check_settings('node_2', sleep_time, 0)
check_settings('node_3', 0, 0)
update_configs(node_1_sleep_in_send_data=sleep_time, node_2_sleep_in_send_tables_status=sleep_time)
check_query(expected_replica="node_3")
check_changing_replica_events(2)
def test_combination3(started_cluster):
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=1, sleep_in_send_data=0))
NODES['node_3'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
check_settings('node_1', 0, sleep_time)
check_settings('node_2', 1, 0)
check_settings('node_3', 0, sleep_time)
update_configs(node_1_sleep_in_send_data=sleep_time,
node_2_sleep_in_send_tables_status=1000,
node_3_sleep_in_send_data=sleep_time)
check_query(expected_replica="node_2")
check_changing_replica_events(3)
def test_combination4(started_cluster):
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=1, sleep_in_send_data=sleep_time))
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=1, sleep_in_send_data=0))
NODES['node_3'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=2, sleep_in_send_data=0))
check_settings('node_1', 1, sleep_time)
check_settings('node_2', 1, 0)
check_settings('node_3', 2, 0)
update_configs(node_1_sleep_in_send_tables_status=1000,
node_1_sleep_in_send_data=sleep_time,
node_2_sleep_in_send_tables_status=1000,
node_3_sleep_in_send_tables_status=1000)
check_query(expected_replica="node_2")
check_changing_replica_events(4)
@ -291,22 +202,9 @@ def test_combination4(started_cluster):
def test_receive_timeout1(started_cluster):
# Check the situation when first two replicas get receive timeout
# in establishing connection, but the third replica is ok.
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=3, sleep_in_send_data=0))
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=3, sleep_in_send_data=0))
NODES['node_3'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=1))
check_settings('node_1', 3, 0)
check_settings('node_2', 3, 0)
check_settings('node_3', 0, 1)
update_configs(node_1_sleep_in_send_tables_status=3000,
node_2_sleep_in_send_tables_status=3000,
node_3_sleep_in_send_data=1000)
check_query(expected_replica="node_3", receive_timeout=2)
check_changing_replica_events(2)
@ -315,22 +213,9 @@ def test_receive_timeout2(started_cluster):
# Check the situation when first replica get receive timeout
# in packet receiving but there are replicas in process of
# connection establishing.
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=4))
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=2, sleep_in_send_data=0))
NODES['node_3'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=2, sleep_in_send_data=0))
check_settings('node_1', 0, 4)
check_settings('node_2', 2, 0)
check_settings('node_3', 2, 0)
update_configs(node_1_sleep_in_send_data=4000,
node_2_sleep_in_send_tables_status=2000,
node_3_sleep_in_send_tables_status=2000)
check_query(expected_replica="node_2", receive_timeout=3)
check_changing_replica_events(3)

View File

@ -14,29 +14,30 @@ cluster = ClickHouseCluster(__file__)
NODES = {'node_' + str(i): None for i in (1, 2, 3, 4)}
NODES['node'] = None
sleep_time = 30
# Cleep time in milliseconds.
sleep_time = 30000
@pytest.fixture(scope="module")
def started_cluster():
cluster = ClickHouseCluster(__file__)
NODES['node'] = cluster.add_instance(
'node', with_zookeeper=True, stay_alive=True, main_configs=['configs/remote_servers.xml'], user_configs=['configs/users.xml'])
'node', stay_alive=True, main_configs=['configs/remote_servers.xml'], user_configs=['configs/users.xml'])
for name in NODES:
if name != 'node':
NODES[name] = cluster.add_instance(name, with_zookeeper=True, user_configs=['configs/users1.xml'])
NODES[name] = cluster.add_instance(name, user_configs=['configs/users1.xml'])
try:
cluster.start()
for node_id, node in list(NODES.items()):
node.query('''CREATE TABLE replicated (id UInt32, date Date) ENGINE =
ReplicatedMergeTree('/clickhouse/tables/replicated', '{}') ORDER BY id PARTITION BY toYYYYMM(date)'''.format(node_id))
node.query('''CREATE TABLE test_hedged (id UInt32, date Date) ENGINE =
MergeTree() ORDER BY id PARTITION BY toYYYYMM(date)''')
node.query("INSERT INTO test_hedged SELECT number, toDateTime(number) FROM numbers(100)")
NODES['node'].query('''CREATE TABLE distributed (id UInt32, date Date) ENGINE =
Distributed('test_cluster', 'default', 'replicated')''')
NODES['node'].query("INSERT INTO distributed SELECT number, toDateTime(number) FROM numbers(100)")
Distributed('test_cluster', 'default', 'test_hedged')''')
yield cluster
@ -47,8 +48,8 @@ def started_cluster():
config = '''<yandex>
<profiles>
<default>
<sleep_in_send_tables_status>{sleep_in_send_tables_status}</sleep_in_send_tables_status>
<sleep_in_send_data>{sleep_in_send_data}</sleep_in_send_data>
<sleep_in_send_tables_status_ms>{sleep_in_send_tables_status_ms}</sleep_in_send_tables_status_ms>
<sleep_in_send_data_ms>{sleep_in_send_data_ms}</sleep_in_send_data_ms>
</default>
</profiles>
</yandex>'''
@ -72,12 +73,12 @@ def check_query(query=QUERY_1):
assert query_time < 5
def check_settings(node_name, sleep_in_send_tables_status, sleep_in_send_data):
def check_settings(node_name, sleep_in_send_tables_status_ms, sleep_in_send_data_ms):
attempts = 0
while attempts < 1000:
setting1 = NODES[node_name].http_query("SELECT value FROM system.settings WHERE name='sleep_in_send_tables_status'")
setting2 = NODES[node_name].http_query("SELECT value FROM system.settings WHERE name='sleep_in_send_data'")
if int(setting1) == sleep_in_send_tables_status and int(setting2) == sleep_in_send_data:
setting1 = NODES[node_name].http_query("SELECT value FROM system.settings WHERE name='sleep_in_send_tables_status_ms'")
setting2 = NODES[node_name].http_query("SELECT value FROM system.settings WHERE name='sleep_in_send_data_ms'")
if int(setting1) == sleep_in_send_tables_status_ms and int(setting2) == sleep_in_send_data_ms:
return
time.sleep(0.1)
attempts += 1
@ -90,107 +91,64 @@ def check_changing_replica_events(expected_count):
assert int(result) == expected_count
def test_send_table_status_sleep(started_cluster):
def update_configs(node_1_sleep_in_send_tables_status=0, node_1_sleep_in_send_data=0,
node_2_sleep_in_send_tables_status=0, node_2_sleep_in_send_data=0,
node_3_sleep_in_send_tables_status=0, node_3_sleep_in_send_data=0,
node_4_sleep_in_send_tables_status=0, node_4_sleep_in_send_data=0):
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=sleep_time, sleep_in_send_data=0))
config.format(sleep_in_send_tables_status_ms=node_1_sleep_in_send_tables_status, sleep_in_send_data_ms=node_1_sleep_in_send_data))
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=sleep_time, sleep_in_send_data=0))
check_settings('node_1', sleep_time, 0)
check_settings('node_2', sleep_time, 0)
config.format(sleep_in_send_tables_status_ms=node_2_sleep_in_send_tables_status, sleep_in_send_data_ms=node_2_sleep_in_send_data))
NODES['node_3'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status_ms=node_3_sleep_in_send_tables_status, sleep_in_send_data_ms=node_3_sleep_in_send_data))
NODES['node_4'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status_ms=node_4_sleep_in_send_tables_status, sleep_in_send_data_ms=node_4_sleep_in_send_data))
check_settings('node_1', node_1_sleep_in_send_tables_status, node_1_sleep_in_send_data)
check_settings('node_2', node_2_sleep_in_send_tables_status, node_2_sleep_in_send_data)
check_settings('node_3', node_3_sleep_in_send_tables_status, node_3_sleep_in_send_data)
check_settings('node_4', node_4_sleep_in_send_tables_status, node_4_sleep_in_send_data)
def test_send_table_status_sleep(started_cluster):
update_configs(node_1_sleep_in_send_tables_status=sleep_time, node_2_sleep_in_send_tables_status=sleep_time)
check_query()
check_changing_replica_events(2)
def test_send_data(started_cluster):
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
check_settings('node_1', 0, sleep_time)
check_settings('node_2', 0, sleep_time)
update_configs(node_1_sleep_in_send_data=sleep_time, node_2_sleep_in_send_data=sleep_time)
check_query()
check_changing_replica_events(2)
def test_combination1(started_cluster):
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=1, sleep_in_send_data=0))
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=1, sleep_in_send_data=0))
NODES['node_3'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
check_settings('node_1', 1, 0)
check_settings('node_2', 1, 0)
check_settings('node_3', 0, sleep_time)
update_configs(node_1_sleep_in_send_tables_status=1000,
node_2_sleep_in_send_tables_status=1000,
node_3_sleep_in_send_data=sleep_time)
check_query()
check_changing_replica_events(3)
def test_combination2(started_cluster):
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=1, sleep_in_send_data=0))
NODES['node_3'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
NODES['node_4'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=1, sleep_in_send_data=0))
check_settings('node_1', 0, sleep_time)
check_settings('node_2', 1, 0)
check_settings('node_3', 0, sleep_time)
check_settings('node_4', 1, 0)
update_configs(node_1_sleep_in_send_data=sleep_time,
node_2_sleep_in_send_tables_status=1000,
node_3_sleep_in_send_data=sleep_time,
node_4_sleep_in_send_tables_status=1000)
check_query()
check_changing_replica_events(4)
def test_query_with_no_data_to_sample(started_cluster):
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
NODES['node_3'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
NODES['node_4'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
check_settings('node_1', 0, sleep_time)
check_settings('node_2', 0, sleep_time)
check_settings('node_3', 0, 0)
check_settings('node_4', 0, 0)
update_configs(node_1_sleep_in_send_data=sleep_time,
node_2_sleep_in_send_data=sleep_time)
# When there is no way to sample data, the whole query will be performed by
# the first replica and the second replica will just send EndOfStream,

View File

@ -137,8 +137,17 @@ def test_insert_same_partition_and_merge(cluster, merge_vertical):
list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD_PER_PART_WIDE * 6 + FILES_OVERHEAD
node.query("SYSTEM START MERGES s3_test")
# Wait for merges and old parts deletion
time.sleep(3)
for attempt in range(0, 10):
parts_count = node.query("SELECT COUNT(*) FROM system.parts WHERE table = 's3_test' FORMAT Values")
if parts_count == "(1)":
break
if attempt == 9:
assert parts_count == "(1)"
time.sleep(1)
assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)"
assert node.query("SELECT count(distinct(id)) FROM s3_test FORMAT Values") == "(8192)"
@ -333,3 +342,28 @@ def test_move_replace_partition_to_another_table(cluster):
for obj in list(minio.list_objects(cluster.minio_bucket, 'data/')):
minio.remove_object(cluster.minio_bucket, obj.object_name)
def test_freeze_unfreeze(cluster):
create_table(cluster, "s3_test")
node = cluster.instances["node"]
minio = cluster.minio_client
node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 4096)))
node.query("ALTER TABLE s3_test FREEZE WITH NAME 'backup1'")
node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-04', 4096)))
node.query("ALTER TABLE s3_test FREEZE WITH NAME 'backup2'")
node.query("TRUNCATE TABLE s3_test")
assert len(
list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
# Unfreeze single partition from backup1.
node.query("ALTER TABLE s3_test UNFREEZE PARTITION '2020-01-03' WITH NAME 'backup1'")
# Unfreeze all partitions from backup2.
node.query("ALTER TABLE s3_test UNFREEZE WITH NAME 'backup2'")
# Data should be removed from S3.
assert len(
list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD

View File

@ -29,7 +29,7 @@ ALTER MATERIALIZE TTL ['MATERIALIZE TTL'] TABLE ALTER TABLE
ALTER SETTINGS ['ALTER SETTING','ALTER MODIFY SETTING','MODIFY SETTING'] TABLE ALTER TABLE
ALTER MOVE PARTITION ['ALTER MOVE PART','MOVE PARTITION','MOVE PART'] TABLE ALTER TABLE
ALTER FETCH PARTITION ['FETCH PARTITION'] TABLE ALTER TABLE
ALTER FREEZE PARTITION ['FREEZE PARTITION'] TABLE ALTER TABLE
ALTER FREEZE PARTITION ['FREEZE PARTITION','UNFREEZE'] TABLE ALTER TABLE
ALTER TABLE [] \N ALTER
ALTER VIEW REFRESH ['ALTER LIVE VIEW REFRESH','REFRESH VIEW'] VIEW ALTER VIEW
ALTER VIEW MODIFY QUERY ['ALTER TABLE MODIFY QUERY'] VIEW ALTER VIEW

View File

@ -16,3 +16,9 @@ ATTACH PARTITION 3 3_12_12_0 3_4_4_0
command_type partition_id part_name backup_name old_part_name
FREEZE PARTITION 7 7_8_8_0 test_01417_single_part_7
ATTACH PART 5 5_13_13_0 5_6_6_0
command_type partition_id part_name backup_name
UNFREEZE PARTITION 7 7_8_8_0 test_01417_single_part_7
command_type partition_id part_name backup_name
FREEZE PARTITION 202103 20210301_20210301_1_1_0 test_01417_single_part_old_syntax
command_type partition_id part_name backup_name
UNFREEZE PARTITION 20210301 20210301_20210301_1_1_0 test_01417_single_part_old_syntax

View File

@ -13,6 +13,11 @@ ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS table_for_freeze;"
${CLICKHOUSE_CLIENT} --query "CREATE TABLE table_for_freeze (key UInt64, value String) ENGINE = MergeTree() ORDER BY key PARTITION BY key % 10;"
${CLICKHOUSE_CLIENT} --query "INSERT INTO table_for_freeze SELECT number, toString(number) from numbers(10);"
# also for old syntax
${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS table_for_freeze_old_syntax;"
${CLICKHOUSE_CLIENT} --query "CREATE TABLE table_for_freeze_old_syntax (dt Date, value String) ENGINE = MergeTree(dt, (value), 8192);"
${CLICKHOUSE_CLIENT} --query "INSERT INTO table_for_freeze_old_syntax SELECT toDate('2021-03-01'), toString(number) from numbers(10);"
${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze FREEZE WITH NAME 'test_01417' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \
| ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE" \
--query "SELECT command_type, partition_id, part_name, backup_name FROM table"
@ -35,5 +40,21 @@ ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze FREEZE PARTITION '7'
| ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE, $ATTACH_OUT_STRUCTURE" \
--query "SELECT command_type, partition_id, part_name, backup_name, old_part_name FROM table"
# Unfreeze partition
${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze UNFREEZE PARTITION '7' WITH NAME 'test_01417_single_part_7' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \
| ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE" \
--query "SELECT command_type, partition_id, part_name, backup_name FROM table"
# Freeze partition with old syntax
${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze_old_syntax FREEZE PARTITION '202103' WITH NAME 'test_01417_single_part_old_syntax' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \
| ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE" \
--query "SELECT command_type, partition_id, part_name, backup_name FROM table"
# Unfreeze partition with old syntax
${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze_old_syntax UNFREEZE PARTITION '202103' WITH NAME 'test_01417_single_part_old_syntax' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \
| ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE" \
--query "SELECT command_type, partition_id, part_name, backup_name FROM table"
# teardown
${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS table_for_freeze;"
${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS table_for_freeze_old_syntax;"

View File

@ -1,6 +1,6 @@
AlterQuery t1 (children 1)
ExpressionList (children 1)
AlterCommand 25 (children 1)
AlterCommand 27 (children 1)
Function equals (children 1)
ExpressionList (children 2)
Identifier date

View File

@ -31,10 +31,12 @@ Map(Date, Int32)
Map(UUID, UInt16)
{'00001192-0000-4000-8000-000000000001':1,'00001192-0000-4000-7000-000000000001':2}
0 2 1
Map(Int128, Int32)
Map(Int128, String)
{-1:'a',0:'b',1234567898765432193024000:'c',-1234567898765432193024000:'d'}
a b c d
a
b
a b c d
b b b b

View File

@ -11,7 +11,7 @@ SELECT 'Map(Int8, Int8)';
SELECT m FROM table_map_with_key_integer;
SELECT m[127], m[1], m[0], m[-1] FROM table_map_with_key_integer;
SELECT m[toInt8(number - 2)] FROM table_map_with_key_integer ARRAY JOIN range(5) AS number;
SELECT m[toInt8(number - 2)] FROM table_map_with_key_integer ARRAY JOIN [0, 1, 2, 3, 4] AS number;
SELECT count() FROM table_map_with_key_integer WHERE m = map();
@ -26,7 +26,7 @@ SELECT 'Map(Int32, UInt16)';
SELECT m FROM table_map_with_key_integer;
SELECT m[-1], m[2147483647], m[-2147483648] FROM table_map_with_key_integer;
SELECT m[toInt32(number - 2)] FROM table_map_with_key_integer ARRAY JOIN range(5) AS number;
SELECT m[toInt32(number - 2)] FROM table_map_with_key_integer ARRAY JOIN [0, 1, 2, 3, 4] AS number;
DROP TABLE IF EXISTS table_map_with_key_integer;
@ -39,7 +39,7 @@ SELECT 'Map(Date, Int32)';
SELECT m FROM table_map_with_key_integer;
SELECT m[toDate('2020-01-01')], m[toDate('2020-01-02')], m[toDate('2020-01-03')] FROM table_map_with_key_integer;
SELECT m[toDate(number)] FROM table_map_with_key_integer ARRAY JOIN range(3) AS number;
SELECT m[toDate(number)] FROM table_map_with_key_integer ARRAY JOIN [0, 1, 2] AS number;
DROP TABLE IF EXISTS table_map_with_key_integer;
@ -51,12 +51,14 @@ INSERT INTO table_map_with_key_integer VALUES ('2020-01-01', map('00001192-0000-
SELECT 'Map(UUID, UInt16)';
SELECT m FROM table_map_with_key_integer;
SELECT
m[toUUID('00001192-0000-4000-6000-000000000001')],
m[toUUID('00001192-0000-4000-7000-000000000001')],
SELECT
m[toUUID('00001192-0000-4000-6000-000000000001')],
m[toUUID('00001192-0000-4000-7000-000000000001')],
m[toUUID('00001192-0000-4000-8000-000000000001')]
FROM table_map_with_key_integer;
SELECT m[257], m[1] FROM table_map_with_key_integer; -- { serverError 43 }
DROP TABLE IF EXISTS table_map_with_key_integer;
CREATE TABLE table_map_with_key_integer (d DATE, m Map(Int128, String))
@ -65,11 +67,14 @@ ENGINE = MergeTree() ORDER BY d;
INSERT INTO table_map_with_key_integer SELECT '2020-01-01', map(-1, 'a', 0, 'b', toInt128(1234567898765432123456789), 'c', toInt128(-1234567898765432123456789), 'd');
SELECT 'Map(Int128, Int32)';
SELECT 'Map(Int128, String)';
SELECT m FROM table_map_with_key_integer;
SELECT m[toInt128(-1)], m[toInt128(0)], m[toInt128(1234567898765432123456789)], m[toInt128(-1234567898765432123456789)] FROM table_map_with_key_integer;
SELECT m[toInt128(number - 2)] FROM table_map_with_key_integer ARRAY JOIN range(4) AS number;
SELECT m[toInt128(number - 2)] FROM table_map_with_key_integer ARRAY JOIN [0, 1, 2, 3] AS number;
SELECT m[-1], m[0], m[toInt128(1234567898765432123456789)], m[toInt128(-1234567898765432123456789)] FROM table_map_with_key_integer;
SELECT m[toUInt64(0)], m[toInt64(0)], m[toUInt8(0)], m[toUInt16(0)] FROM table_map_with_key_integer;
DROP TABLE IF EXISTS table_map_with_key_integer;

View File

@ -0,0 +1,2 @@
1
1

View File

@ -0,0 +1,14 @@
set optimize_skip_unused_shards=1;
drop table if exists data_01755;
drop table if exists dist_01755;
create table data_01755 (i Int) Engine=Memory;
create table dist_01755 as data_01755 Engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01755, i);
insert into data_01755 values (1);
select * from dist_01755 where 1 settings enable_early_constant_folding = 0;
drop table if exists data_01755;
drop table if exists dist_01755;

View File

@ -0,0 +1 @@
1

View File

@ -0,0 +1 @@
SELECT toUInt8(assumeNotNull(cast(cast(NULL, 'Nullable(String)'), 'Nullable(Enum8(\'Hello\' = 1))')));

View File

@ -0,0 +1,4 @@
1 1 1_1_1_0
1 2 1_2_2_0
1 3 1_3_3_0
1 4 1_4_4_0

View File

@ -0,0 +1,20 @@
DROP TABLE IF EXISTS t_src;
DROP TABLE IF EXISTS t_dst;
CREATE TABLE t_src (id UInt32, v UInt32) ENGINE = MergeTree ORDER BY id PARTITION BY id;
CREATE TABLE t_dst (id UInt32, v UInt32) ENGINE = MergeTree ORDER BY id PARTITION BY id;
SYSTEM STOP MERGES t_src;
SYSTEM STOP MERGES t_dst;
INSERT INTO t_dst VALUES (1, 1);
INSERT INTO t_dst VALUES (1, 2);
INSERT INTO t_dst VALUES (1, 3);
INSERT INTO t_src VALUES (1, 4);
ALTER TABLE t_src MOVE PARTITION 1 TO TABLE t_dst;
SELECT *, _part FROM t_dst ORDER BY v;
DROP TABLE t_src;
DROP TABLE t_dst;

View File

@ -0,0 +1,2 @@
2021-03-22 23:15:11
2020-03-21 23:00:00

View File

@ -0,0 +1,2 @@
SELECT toTimeZone(toDateTime('2021-03-22 18:45:11', 'UTC'), 'Asia/Tehran');
SELECT toDateTime('2020-03-21 23:00:00', 'Asia/Tehran');

View File

@ -0,0 +1 @@
2021-03-22 00:00:00.000

View File

@ -0,0 +1 @@
SELECT toDateTime64('2021-03-22', 3);

View File

@ -0,0 +1,3 @@
1968
-473
1990-01-01

View File

@ -0,0 +1,4 @@
SELECT toYear(toDateTime64('1968-12-12 11:22:33', 0, 'UTC'));
SELECT toInt16(toRelativeWeekNum(toDateTime64('1960-11-30 18:00:11.999', 3, 'UTC')));
SELECT toStartOfQuarter(toDateTime64('1990-01-04 12:14:12', 0, 'UTC'));
SELECT toUnixTimestamp(toDateTime64('1900-12-12 11:22:33', 0, 'UTC')); -- { serverError 407 }

Some files were not shown because too many files have changed in this diff Show More