Merge remote-tracking branch 'origin/master' into HEAD

This commit is contained in:
Alexander Kuzmenkov 2021-03-23 17:40:13 +03:00
commit c9facd631c
84 changed files with 1714 additions and 561 deletions

View File

@ -8,7 +8,7 @@ ClickHouse® is an open-source column-oriented database management system that a
* [Tutorial](https://clickhouse.tech/docs/en/getting_started/tutorial/) shows how to set up and query small ClickHouse cluster.
* [Documentation](https://clickhouse.tech/docs/en/) provides more in-depth information.
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-ly9m4w1x-6j7x5Ts_pQZqrctAbRZ3cg) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time.
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-nwwakmk4-xOJ6cdy0sJC3It8j348~IA) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time.
* [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announcements and reports about events.
* [Code Browser](https://clickhouse.tech/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation.
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.

View File

@ -1,4 +1,5 @@
usr/bin/clickhouse
usr/bin/clickhouse-odbc-bridge
usr/bin/clickhouse-extract-from-config
usr/share/bash-completion/completions
etc/security/limits.d/clickhouse.conf

View File

@ -0,0 +1,284 @@
# CMake in ClickHouse
## TL; DR How to make ClickHouse compile and link faster?
Developer only! This command will likely fulfill most of your needs. Run before calling `ninja`.
```cmake
cmake .. \
-DCMAKE_C_COMPILER=/bin/clang-10 \
-DCMAKE_CXX_COMPILER=/bin/clang++-10 \
-DCMAKE_BUILD_TYPE=Debug \
-DENABLE_CLICKHOUSE_ALL=OFF \
-DENABLE_CLICKHOUSE_SERVER=ON \
-DENABLE_CLICKHOUSE_CLIENT=ON \
-DUSE_STATIC_LIBRARIES=OFF \
-DSPLIT_SHARED_LIBRARIES=ON \
-DENABLE_LIBRARIES=OFF \
-DUSE_UNWIND=ON \
-DENABLE_UTILS=OFF \
-DENABLE_TESTS=OFF
```
## CMake files types
1. ClickHouse's source CMake files (located in the root directory and in `/src`).
2. Arch-dependent CMake files (located in `/cmake/*os_name*`).
3. Libraries finders (search for contrib libraries, located in `/cmake/find`).
3. Contrib build CMake files (used instead of libraries' own CMake files, located in `/cmake/modules`)
## List of CMake flags
* This list is auto-generated by [this Python script](https://github.com/clickhouse/clickhouse/blob/master/docs/tools/cmake_in_clickhouse_generator.py).
* The flag name is a link to its position in the code.
* If an option's default value is itself an option, it's also a link to its position in this list.
### ClickHouse modes
| Name | Default value | Description | Comment |
|------|---------------|-------------|---------|
| <a name="enable-clickhouse-all"></a>[`ENABLE_CLICKHOUSE_ALL`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L8) | `ON` | Enable all ClickHouse modes by default | The `clickhouse` binary is a multi purpose tool that contains multiple execution modes (client, server, etc.), each of them may be built and linked as a separate library. If you do not know what modes you need, turn this option OFF and enable SERVER and CLIENT only. |
| <a name="enable-clickhouse-benchmark"></a>[`ENABLE_CLICKHOUSE_BENCHMARK`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L18) | `ENABLE_CLICKHOUSE_ALL` | Queries benchmarking mode | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-benchmark/ |
| <a name="enable-clickhouse-client"></a>[`ENABLE_CLICKHOUSE_CLIENT`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L11) | `ENABLE_CLICKHOUSE_ALL` | Client mode (interactive tui/shell that connects to the server) | |
| <a name="enable-clickhouse-compressor"></a>[`ENABLE_CLICKHOUSE_COMPRESSOR`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L23) | `ENABLE_CLICKHOUSE_ALL` | Data compressor and decompressor | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-compressor/ |
| <a name="enable-clickhouse-copier"></a>[`ENABLE_CLICKHOUSE_COPIER`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L26) | `ENABLE_CLICKHOUSE_ALL` | Inter-cluster data copying mode | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-copier/ |
| <a name="enable-clickhouse-extract-from-config"></a>[`ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L20) | `ENABLE_CLICKHOUSE_ALL` | Configs processor (extract values etc.) | |
| <a name="enable-clickhouse-format"></a>[`ENABLE_CLICKHOUSE_FORMAT`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L28) | `ENABLE_CLICKHOUSE_ALL` | Queries pretty-printer and formatter with syntax highlighting | |
| <a name="enable-clickhouse-git-import"></a>[`ENABLE_CLICKHOUSE_GIT_IMPORT`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L40) | `ENABLE_CLICKHOUSE_ALL` | A tool to analyze Git repositories | https://presentations.clickhouse.tech/matemarketing_2020/ |
| <a name="enable-clickhouse-install"></a>[`ENABLE_CLICKHOUSE_INSTALL`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L44) | `OFF` | Install ClickHouse without .deb/.rpm/.tgz packages (having the binary only) | |
| <a name="enable-clickhouse-local"></a>[`ENABLE_CLICKHOUSE_LOCAL`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L15) | `ENABLE_CLICKHOUSE_ALL` | Local files fast processing mode | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-local/ |
| <a name="enable-clickhouse-obfuscator"></a>[`ENABLE_CLICKHOUSE_OBFUSCATOR`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L32) | `ENABLE_CLICKHOUSE_ALL` | Table data obfuscator (convert real data to benchmark-ready one) | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-obfuscator/ |
| <a name="enable-clickhouse-odbc-bridge"></a>[`ENABLE_CLICKHOUSE_ODBC_BRIDGE`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L36) | `ENABLE_CLICKHOUSE_ALL` | HTTP-server working like a proxy to ODBC driver | https://clickhouse.tech/docs/en/operations/utilities/odbc-bridge/ |
| <a name="enable-clickhouse-server"></a>[`ENABLE_CLICKHOUSE_SERVER`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L10) | `ENABLE_CLICKHOUSE_ALL` | Server mode (main mode) | |
### External libraries
Note that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.
| Name | Default value | Description | Comment |
|------|---------------|-------------|---------|
| <a name="enable-amqpcpp"></a>[`ENABLE_AMQPCPP`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/amqpcpp.cmake#L1) | `ENABLE_LIBRARIES` | Enalbe AMQP-CPP | |
| <a name="enable-avro"></a>[`ENABLE_AVRO`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/avro.cmake#L2) | `ENABLE_LIBRARIES` | Enable Avro | Needed when using Apache Avro serialization format |
| <a name="enable-base"></a>[`ENABLE_BASE64`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/base64.cmake#L1) | `ENABLE_LIBRARIES` | Enable base64 | |
| <a name="enable-brotli"></a>[`ENABLE_BROTLI`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/brotli.cmake#L1) | `ENABLE_LIBRARIES` | Enable brotli | |
| <a name="enable-capnp"></a>[`ENABLE_CAPNP`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/capnp.cmake#L1) | `ENABLE_LIBRARIES` | Enable Cap'n Proto | |
| <a name="enable-cassandra"></a>[`ENABLE_CASSANDRA`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/cassandra.cmake#L1) | `ENABLE_LIBRARIES` | Enable Cassandra | |
| <a name="enable-ccache"></a>[`ENABLE_CCACHE`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ccache.cmake#L22) | `ENABLE_CCACHE_BY_DEFAULT` | Speedup re-compilations using ccache (external tool) | https://ccache.dev/ |
| <a name="enable-clang-tidy"></a>[`ENABLE_CLANG_TIDY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/analysis.cmake#L2) | `OFF` | Use clang-tidy static analyzer | https://clang.llvm.org/extra/clang-tidy/ |
| <a name="enable-curl"></a>[`ENABLE_CURL`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/curl.cmake#L1) | `ENABLE_LIBRARIES` | Enable curl | |
| <a name="enable-embedded-compiler"></a>[`ENABLE_EMBEDDED_COMPILER`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/llvm.cmake#L5) | `ENABLE_LIBRARIES` | Set to TRUE to enable support for 'compile_expressions' option for query execution | |
| <a name="enable-fastops"></a>[`ENABLE_FASTOPS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/fastops.cmake#L2) | `ENABLE_LIBRARIES` | Enable fast vectorized mathematical functions library by Mikhail Parakhin | |
| <a name="enable-gperf"></a>[`ENABLE_GPERF`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/gperf.cmake#L5) | `ENABLE_LIBRARIES` | Use gperf function hash generator tool | |
| <a name="enable-grpc"></a>[`ENABLE_GRPC`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/grpc.cmake#L8) | `ENABLE_GRPC_DEFAULT` | Use gRPC | |
| <a name="enable-gsasl-library"></a>[`ENABLE_GSASL_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/libgsasl.cmake#L1) | `ENABLE_LIBRARIES` | Enable gsasl library | |
| <a name="enable-h"></a>[`ENABLE_H3`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/h3.cmake#L1) | `ENABLE_LIBRARIES` | Enable H3 | |
| <a name="enable-hdfs"></a>[`ENABLE_HDFS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/hdfs3.cmake#L2) | `ENABLE_LIBRARIES` | Enable HDFS | |
| <a name="enable-icu"></a>[`ENABLE_ICU`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/icu.cmake#L2) | `ENABLE_LIBRARIES` | Enable ICU | |
| <a name="enable-ldap"></a>[`ENABLE_LDAP`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ldap.cmake#L5) | `ENABLE_LIBRARIES` | Enable LDAP | |
| <a name="enable-libpqxx"></a>[`ENABLE_LIBPQXX`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/libpqxx.cmake#L1) | `ENABLE_LIBRARIES` | Enalbe libpqxx | |
| <a name="enable-msgpack"></a>[`ENABLE_MSGPACK`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/msgpack.cmake#L1) | `ENABLE_LIBRARIES` | Enable msgpack library | |
| <a name="enable-mysql"></a>[`ENABLE_MYSQL`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/mysqlclient.cmake#L2) | `ENABLE_LIBRARIES` | Enable MySQL | |
| <a name="enable-nuraft"></a>[`ENABLE_NURAFT`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/nuraft.cmake#L1) | `ENABLE_LIBRARIES` | Enable NuRaft | |
| <a name="enable-odbc"></a>[`ENABLE_ODBC`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/odbc.cmake#L1) | `ENABLE_LIBRARIES` | Enable ODBC library | |
| <a name="enable-orc"></a>[`ENABLE_ORC`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/orc.cmake#L1) | `ENABLE_LIBRARIES` | Enable ORC | |
| <a name="enable-parquet"></a>[`ENABLE_PARQUET`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/parquet.cmake#L2) | `ENABLE_LIBRARIES` | Enable parquet | |
| <a name="enable-protobuf"></a>[`ENABLE_PROTOBUF`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/protobuf.cmake#L1) | `ENABLE_LIBRARIES` | Enable protobuf | |
| <a name="enable-rapidjson"></a>[`ENABLE_RAPIDJSON`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rapidjson.cmake#L1) | `ENABLE_LIBRARIES` | Use rapidjson | |
| <a name="enable-rdkafka"></a>[`ENABLE_RDKAFKA`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rdkafka.cmake#L1) | `ENABLE_LIBRARIES` | Enable kafka | |
| <a name="enable-rocksdb"></a>[`ENABLE_ROCKSDB`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rocksdb.cmake#L1) | `ENABLE_LIBRARIES` | Enable ROCKSDB | |
| <a name="enable-s"></a>[`ENABLE_S3`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/s3.cmake#L2) | `ENABLE_LIBRARIES` | Enable S3 | |
| <a name="enable-ssl"></a>[`ENABLE_SSL`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ssl.cmake#L3) | `ENABLE_LIBRARIES` | Enable ssl | Needed when securely connecting to an external server, e.g. clickhouse-client --host ... --secure |
| <a name="enable-stats"></a>[`ENABLE_STATS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/stats.cmake#L1) | `ENABLE_LIBRARIES` | Enalbe StatsLib library | |
### External libraries system/bundled mode
| Name | Default value | Description | Comment |
|------|---------------|-------------|---------|
| <a name="use-internal-avro-library"></a>[`USE_INTERNAL_AVRO_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/avro.cmake#L11) | `ON` | Set to FALSE to use system avro library instead of bundled | |
| <a name="use-internal-aws-s-library"></a>[`USE_INTERNAL_AWS_S3_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/s3.cmake#L14) | `ON` | Set to FALSE to use system S3 instead of bundled (experimental set to OFF on your own risk) | |
| <a name="use-internal-brotli-library"></a>[`USE_INTERNAL_BROTLI_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/brotli.cmake#L12) | `USE_STATIC_LIBRARIES` | Set to FALSE to use system libbrotli library instead of bundled | Many system ship only dynamic brotly libraries, so we back off to bundled by default |
| <a name="use-internal-capnp-library"></a>[`USE_INTERNAL_CAPNP_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/capnp.cmake#L10) | `NOT_UNBUNDLED` | Set to FALSE to use system capnproto library instead of bundled | |
| <a name="use-internal-curl"></a>[`USE_INTERNAL_CURL`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/curl.cmake#L10) | `NOT_UNBUNDLED` | Use internal curl library | |
| <a name="use-internal-grpc-library"></a>[`USE_INTERNAL_GRPC_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/grpc.cmake#L25) | `NOT_UNBUNDLED` | Set to FALSE to use system gRPC library instead of bundled. (Experimental. Set to OFF on your own risk) | Normally we use the internal gRPC framework. You can set USE_INTERNAL_GRPC_LIBRARY to OFF to force using the external gRPC framework, which should be installed in the system in this case. The external gRPC framework can be installed in the system by running sudo apt-get install libgrpc++-dev protobuf-compiler-grpc |
| <a name="use-internal-gtest-library"></a>[`USE_INTERNAL_GTEST_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/gtest.cmake#L3) | `NOT_UNBUNDLED` | Set to FALSE to use system Google Test instead of bundled | |
| <a name="use-internal-h-library"></a>[`USE_INTERNAL_H3_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/h3.cmake#L9) | `ON` | Set to FALSE to use system h3 library instead of bundled | |
| <a name="use-internal-hdfs-library"></a>[`USE_INTERNAL_HDFS3_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/hdfs3.cmake#L14) | `ON` | Set to FALSE to use system HDFS3 instead of bundled (experimental - set to OFF on your own risk) | |
| <a name="use-internal-icu-library"></a>[`USE_INTERNAL_ICU_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/icu.cmake#L15) | `NOT_UNBUNDLED` | Set to FALSE to use system ICU library instead of bundled | |
| <a name="use-internal-ldap-library"></a>[`USE_INTERNAL_LDAP_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ldap.cmake#L14) | `NOT_UNBUNDLED` | Set to FALSE to use system *LDAP library instead of bundled | |
| <a name="use-internal-libcxx-library"></a>[`USE_INTERNAL_LIBCXX_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/cxx.cmake#L15) | `USE_INTERNAL_LIBCXX_LIBRARY_DEFAULT` | Disable to use system libcxx and libcxxabi libraries instead of bundled | |
| <a name="use-internal-libgsasl-library"></a>[`USE_INTERNAL_LIBGSASL_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/libgsasl.cmake#L12) | `USE_STATIC_LIBRARIES` | Set to FALSE to use system libgsasl library instead of bundled | when USE_STATIC_LIBRARIES we usually need to pick up hell a lot of dependencies for libgsasl |
| <a name="use-internal-libxml-library"></a>[`USE_INTERNAL_LIBXML2_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/libxml2.cmake#L1) | `NOT_UNBUNDLED` | Set to FALSE to use system libxml2 library instead of bundled | |
| <a name="use-internal-llvm-library"></a>[`USE_INTERNAL_LLVM_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/llvm.cmake#L8) | `NOT_UNBUNDLED` | Use bundled or system LLVM library. | |
| <a name="use-internal-msgpack-library"></a>[`USE_INTERNAL_MSGPACK_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/msgpack.cmake#L10) | `NOT_UNBUNDLED` | Set to FALSE to use system msgpack library instead of bundled | |
| <a name="use-internal-mysql-library"></a>[`USE_INTERNAL_MYSQL_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/mysqlclient.cmake#L15) | `NOT_UNBUNDLED` | Set to FALSE to use system mysqlclient library instead of bundled | |
| <a name="use-internal-odbc-library"></a>[`USE_INTERNAL_ODBC_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/odbc.cmake#L22) | `NOT_UNBUNDLED` | Use internal ODBC library | |
| <a name="use-internal-orc-library"></a>[`USE_INTERNAL_ORC_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/orc.cmake#L11) | `ON` | Set to FALSE to use system ORC instead of bundled (experimental set to OFF on your own risk) | |
| <a name="use-internal-parquet-library"></a>[`USE_INTERNAL_PARQUET_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/parquet.cmake#L16) | `NOT_UNBUNDLED` | Set to FALSE to use system parquet library instead of bundled | |
| <a name="use-internal-poco-library"></a>[`USE_INTERNAL_POCO_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/poco.cmake#L1) | `ON` | Use internal Poco library | |
| <a name="use-internal-protobuf-library"></a>[`USE_INTERNAL_PROTOBUF_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/protobuf.cmake#L14) | `NOT_UNBUNDLED` | Set to FALSE to use system protobuf instead of bundled. (Experimental. Set to OFF on your own risk) | Normally we use the internal protobuf library. You can set USE_INTERNAL_PROTOBUF_LIBRARY to OFF to force using the external protobuf library, which should be installed in the system in this case. The external protobuf library can be installed in the system by running sudo apt-get install libprotobuf-dev protobuf-compiler libprotoc-dev |
| <a name="use-internal-rapidjson-library"></a>[`USE_INTERNAL_RAPIDJSON_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rapidjson.cmake#L9) | `NOT_UNBUNDLED` | Set to FALSE to use system rapidjson library instead of bundled | |
| <a name="use-internal-rdkafka-library"></a>[`USE_INTERNAL_RDKAFKA_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rdkafka.cmake#L10) | `NOT_UNBUNDLED` | Set to FALSE to use system librdkafka instead of the bundled | |
| <a name="use-internal-re-library"></a>[`USE_INTERNAL_RE2_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/re2.cmake#L1) | `NOT_UNBUNDLED` | Set to FALSE to use system re2 library instead of bundled [slower] | |
| <a name="use-internal-rocksdb-library"></a>[`USE_INTERNAL_ROCKSDB_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rocksdb.cmake#L10) | `NOT_UNBUNDLED` | Set to FALSE to use system ROCKSDB library instead of bundled | |
| <a name="use-internal-snappy-library"></a>[`USE_INTERNAL_SNAPPY_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/snappy.cmake#L10) | `NOT_UNBUNDLED` | Set to FALSE to use system snappy library instead of bundled | |
| <a name="use-internal-sparsehash-library"></a>[`USE_INTERNAL_SPARSEHASH_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/sparsehash.cmake#L1) | `ON` | Set to FALSE to use system sparsehash library instead of bundled | |
| <a name="use-internal-ssl-library"></a>[`USE_INTERNAL_SSL_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ssl.cmake#L12) | `NOT_UNBUNDLED` | Set to FALSE to use system *ssl library instead of bundled | |
| <a name="use-internal-zlib-library"></a>[`USE_INTERNAL_ZLIB_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/zlib.cmake#L1) | `NOT_UNBUNDLED` | Set to FALSE to use system zlib library instead of bundled | |
| <a name="use-internal-zstd-library"></a>[`USE_INTERNAL_ZSTD_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/zstd.cmake#L1) | `NOT_UNBUNDLED` | Set to FALSE to use system zstd library instead of bundled | |
### Other flags
| Name | Default value | Description | Comment |
|------|---------------|-------------|---------|
| <a name="add-gdb-index-for-gold"></a>[`ADD_GDB_INDEX_FOR_GOLD`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L195) | `OFF` | Add .gdb-index to resulting binaries for gold linker. | Ignored if `lld` is used |
| <a name="arch-native"></a>[`ARCH_NATIVE`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L248) | `OFF` | Add -march=native compiler flag | |
| <a name="clickhouse-split-binary"></a>[`CLICKHOUSE_SPLIT_BINARY`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L98) | `OFF` | Make several binaries (clickhouse-server, clickhouse-client etc.) instead of one bundled | |
| <a name="compiler-pipe"></a>[`COMPILER_PIPE`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L235) | `ON` | -pipe compiler option | Less `/tmp` usage, more RAM usage. |
| <a name="enable-check-heavy-builds"></a>[`ENABLE_CHECK_HEAVY_BUILDS`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L69) | `OFF` | Don't allow C++ translation units to compile too long or to take too much memory while compiling | |
| <a name="enable-fuzzing"></a>[`ENABLE_FUZZING`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L115) | `OFF` | Fuzzy testing using libfuzzer | Implies `WITH_COVERAGE` |
| <a name="enable-libraries"></a>[`ENABLE_LIBRARIES`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L357) | `ON` | Enable all external libraries by default | Turns on all external libs like s3, kafka, ODBC, ... |
| <a name="enable-multitarget-code"></a>[`ENABLE_MULTITARGET_CODE`](https://github.com/clickhouse/clickhouse/blob/master/src/Functions/CMakeLists.txt#L100) | `ON` | Enable platform-dependent code | ClickHouse developers may use platform-dependent code under some macro (e.g. `ifdef ENABLE_MULTITARGET`). If turned ON, this option defines such macro. See `src/Functions/TargetSpecific.h` |
| <a name="enable-tests"></a>[`ENABLE_TESTS`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L154) | `ON` | Provide unit_test_dbms target with Google.Test unit tests | If turned `ON`, assumes the user has either the system GTest library or the bundled one. |
| <a name="enable-thinlto"></a>[`ENABLE_THINLTO`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L313) | `ON` | Clang-specific link time optimization | https://clang.llvm.org/docs/ThinLTO.html Applies to clang only. Disabled when building with tests or sanitizers. |
| <a name="fail-on-unsupported-options-combination"></a>[`FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATION`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L32) | `ON` | Stop/Fail CMake configuration if some ENABLE_XXX option is defined (either ON or OFF) but is not possible to satisfy | If turned off: e.g. when ENABLE_FOO is ON, but FOO tool was not found, the CMake will continue. |
| <a name="glibc-compatibility"></a>[`GLIBC_COMPATIBILITY`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L159) | `ON` | Enable compatibility with older glibc libraries. | Only for Linux, x86_64. Implies `ENABLE_FASTMEMCPY` |
| <a name="linker-name"></a>[`LINKER_NAME`](https://github.com/clickhouse/clickhouse/blob/master/cmake/tools.cmake#L44) | `OFF` | Linker name or full path | Example values: `lld-10`, `gold`. |
| <a name="llvm-has-rtti"></a>[`LLVM_HAS_RTTI`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/llvm.cmake#L40) | `ON` | Enable if LLVM was build with RTTI enabled | |
| <a name="make-static-libraries"></a>[`MAKE_STATIC_LIBRARIES`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L91) | `USE_STATIC_LIBRARIES` | Disable to make shared libraries | |
| <a name="parallel-compile-jobs"></a>[`PARALLEL_COMPILE_JOBS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/limit_jobs.cmake#L10) | `""` | Maximum number of concurrent compilation jobs | 1 if not set |
| <a name="parallel-link-jobs"></a>[`PARALLEL_LINK_JOBS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/limit_jobs.cmake#L13) | `""` | Maximum number of concurrent link jobs | 1 if not set |
| <a name="sanitize"></a>[`SANITIZE`](https://github.com/clickhouse/clickhouse/blob/master/cmake/sanitize.cmake#L7) | `""` | Enable one of the code sanitizers | Possible values: - `address` (ASan) - `memory` (MSan) - `thread` (TSan) - `undefined` (UBSan) - "" (no sanitizing) |
| <a name="split-shared-libraries"></a>[`SPLIT_SHARED_LIBRARIES`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L96) | `OFF` | Keep all internal libraries as separate .so files | DEVELOPER ONLY. Faster linking if turned on. |
| <a name="strip-debug-symbols-functions"></a>[`STRIP_DEBUG_SYMBOLS_FUNCTIONS`](https://github.com/clickhouse/clickhouse/blob/master/src/Functions/CMakeLists.txt#L49) | `STRIP_DSF_DEFAULT` | Do not generate debugger info for ClickHouse functions | Provides faster linking and lower binary size. Tradeoff is the inability to debug some source files with e.g. gdb (empty stack frames and no local variables)." |
| <a name="unbundled"></a>[`UNBUNDLED`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L363) | `OFF` | Use system libraries instead of ones in contrib/ | We recommend avoiding this mode for production builds because we can't guarantee all needed libraries exist in your system. This mode exists for enthusiastic developers who are searching for trouble. Useful for maintainers of OS packages. |
| <a name="use-include-what-you-use"></a>[`USE_INCLUDE_WHAT_YOU_USE`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L418) | `OFF` | Automatically reduce unneeded includes in source code (external tool) | https://github.com/include-what-you-use/include-what-you-use |
| <a name="use-libcxx"></a>[`USE_LIBCXX`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/cxx.cmake#L1) | `NOT_UNBUNDLED` | Use libc++ and libc++abi instead of libstdc++ | |
| <a name="use-sentry"></a>[`USE_SENTRY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/sentry.cmake#L13) | `ENABLE_LIBRARIES` | Use Sentry | |
| <a name="use-simdjson"></a>[`USE_SIMDJSON`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/simdjson.cmake#L1) | `ENABLE_LIBRARIES` | Use simdjson | |
| <a name="use-snappy"></a>[`USE_SNAPPY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/snappy.cmake#L1) | `ENABLE_LIBRARIES` | Enable snappy library | |
| <a name="use-static-libraries"></a>[`USE_STATIC_LIBRARIES`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L90) | `ON` | Disable to use shared libraries | |
| <a name="use-unwind"></a>[`USE_UNWIND`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/unwind.cmake#L1) | `ENABLE_LIBRARIES` | Enable libunwind (better stacktraces) | |
| <a name="werror"></a>[`WERROR`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L373) | `OFF` | Enable -Werror compiler option | Using system libs can cause a lot of warnings in includes (on macro expansion). |
| <a name="weverything"></a>[`WEVERYTHING`](https://github.com/clickhouse/clickhouse/blob/master/cmake/warnings.cmake#L22) | `ON` | Enable -Weverything option with some exceptions. | Add some warnings that are not available even with -Wall -Wextra -Wpedantic. Intended for exploration of new compiler warnings that may be found useful. Applies to clang only |
| <a name="with-coverage"></a>[`WITH_COVERAGE`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L274) | `OFF` | Profile the resulting binary/binaries | Compiler-specific coverage flags e.g. -fcoverage-mapping for gcc |
## Developer's guide for adding new CMake options
### Don't be obvious. Be informative.
Bad:
```cmake
option (ENABLE_TESTS "Enables testing" OFF)
```
This description is quite useless as is neither gives the viewer any additional information nor explains the option purpose.
Better:
```cmake
option(ENABLE_TESTS "Provide unit_test_dbms target with Google.test unit tests" OFF)
```
If the option's purpose can't be guessed by its name, or the purpose guess may be misleading, or option has some
pre-conditions, leave a comment above the `option()` line and explain what it does.
The best way would be linking the docs page (if it exists).
The comment is parsed into a separate column (see below).
Even better:
```cmake
# implies ${TESTS_ARE_ENABLED}
# see tests/CMakeLists.txt for implementation detail.
option(ENABLE_TESTS "Provide unit_test_dbms target with Google.test unit tests" OFF)
```
### If the option's state could produce unwanted (or unusual) result, explicitly warn the user.
Suppose you have an option that may strip debug symbols from the ClickHouse's part.
This can speed up the linking process, but produces a binary that cannot be debugged.
In that case, prefer explicitly raising a warning telling the developer that he may be doing something wrong.
Also, such options should be disabled if applies.
Bad:
```cmake
option(STRIP_DEBUG_SYMBOLS_FUNCTIONS
"Do not generate debugger info for ClickHouse functions.
${STRIP_DSF_DEFAULT})
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
target_compile_options(clickhouse_functions PRIVATE "-g0")
endif()
```
Better:
```cmake
# Provides faster linking and lower binary size.
# Tradeoff is the inability to debug some source files with e.g. gdb
# (empty stack frames and no local variables)."
option(STRIP_DEBUG_SYMBOLS_FUNCTIONS
"Do not generate debugger info for ClickHouse functions."
${STRIP_DSF_DEFAULT})
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
message(WARNING "Not generating debugger info for ClickHouse functions")
target_compile_options(clickhouse_functions PRIVATE "-g0")
endif()
```
### In the option's description, explain WHAT the option does rather than WHY it does something.
The WHY explanation should be placed in the comment.
You may find that the option's name is self-descriptive.
Bad:
```cmake
option(ENABLE_THINLTO "Enable Thin LTO. Only applicable for clang. It's also suppressed when building with tests or sanitizers." ON)
```
Better:
```cmake
# Only applicable for clang.
# Turned off when building with tests or sanitizers.
option(ENABLE_THINLTO "Clang-specific link time optimisation" ON).
```
### Don't assume other developers know as much as you do.
In ClickHouse, there are many tools used that an ordinary developer may not know. If you are in doubt, give a link to
the tool's docs. It won't take much of your time.
Bad:
```cmake
option(ENABLE_THINLTO "Enable Thin LTO. Only applicable for clang. It's also suppressed when building with tests or sanitizers." ON)
```
Better (combined with the above hint):
```cmake
# https://clang.llvm.org/docs/ThinLTO.html
# Only applicable for clang.
# Turned off when building with tests or sanitizers.
option(ENABLE_THINLTO "Clang-specific link time optimisation" ON).
```
Other example, bad:
```cmake
option (USE_INCLUDE_WHAT_YOU_USE "Use 'include-what-you-use' tool" OFF)
```
Better:
```cmake
# https://github.com/include-what-you-use/include-what-you-use
option (USE_INCLUDE_WHAT_YOU_USE "Reduce unneeded #include s (external tool)" OFF)
```
### Prefer consistent default values.
CMake allows you to pass a plethora of values representing boolean `true/false`, e.g. `1, ON, YES, ...`.
Prefer the `ON/OFF` values, if possible.

View File

@ -52,7 +52,7 @@ Engines in the family:
- [ODBC](../../engines/table-engines/integrations/odbc.md#table-engine-odbc)
- [JDBC](../../engines/table-engines/integrations/jdbc.md#table-engine-jdbc)
- [HDFS](../../engines/table-engines/integrations/hdfs.md#hdfs)
- [S3](../../engines/table-engines/integrations/s3.md#table_engines-s3)
- [S3](../../engines/table-engines/integrations/s3.md#table-engine-s3)
### Special Engines {#special-engines}

View File

@ -3,23 +3,23 @@ toc_priority: 4
toc_title: S3
---
# S3 {#table_engines-s3}
# S3 Table Engine {#table-engine-s3}
This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ecosystem. This engine is similar
to the [HDFS](../../../engines/table-engines/integrations/hdfs.md#table_engines-hdfs) engine, but provides S3-specific features.
This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ecosystem. This engine is similar to the [HDFS](../../../engines/table-engines/special/file.md#table_engines-hdfs) engine, but provides S3-specific features.
## Usage {#usage}
## Create Table {#creating-a-table}
```sql
``` sql
CREATE TABLE s3_engine_table (name String, value UInt32)
ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression])
```
**Input parameters**
**Engine parameters**
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: *, ?, {abc,def} and {N..M} where N, M — numbers, `abc, def — strings.
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path).
- `format` — The [format](../../../interfaces/formats.md#formats) of the file.
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression`Parameter is optional. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. By default, it will autodetect compression by file extension.
- `compression`Compression type. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Parameter is optional. By default, it will autodetect compression by file extension.
**Example:**
@ -47,6 +47,12 @@ SELECT * FROM s3_engine_table LIMIT 2
│ two │ 2 │
└──────┴───────┘
```
## Virtual columns {#virtual-columns}
- `_path` — Path to the file.
- `_file` — Name of the file.
For more information about virtual columns see [here](../../../engines/table-engines/index.md#table_engines-virtual_columns).
## Implementation Details {#implementation-details}
@ -56,9 +62,9 @@ SELECT * FROM s3_engine_table LIMIT 2
- Indexes.
- Replication.
**Globs in path**
## Wildcards In Path {#wildcards-in-path}
Multiple path components can have globs. For being processed file should exist and match to the whole path pattern. Listing of files determines during `SELECT` (not at `CREATE` moment).
`path` argument can specify multiple files using bash-like wildcards. For being processed file should exist and match to the whole path pattern. Listing of files is determined during `SELECT` (not at `CREATE` moment).
- `*` — Substitutes any number of any characters except `/` including empty string.
- `?` — Substitutes any single character.
@ -67,80 +73,29 @@ Multiple path components can have globs. For being processed file should exist a
Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function.
**Example**
1. Suppose we have several files in CSV format with the following URIs on S3:
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv
2. There are several ways to make a table consisting of all six files:
<!-- -->
```sql
CREATE TABLE table_with_range (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV')
```
3. Another way:
```sql
CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV')
```
4. Table consists of all the files in both directories (all files should satisfy format and schema described in query):
```sql
CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV')
```
!!! warning "Warning"
If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
**Example**
Create table with files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
```sql
CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV')
```
## Virtual Columns {#virtual-columns}
- `_path` — Path to the file.
- `_file` — Name of the file.
**See Also**
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
## S3-related settings {#settings}
## S3-related Settings {#s3-settings}
The following settings can be set before query execution or placed into configuration file.
- `s3_max_single_part_upload_size`Default value is `64Mb`. The maximum size of object to upload using singlepart upload to S3.
- `s3_min_upload_part_size`Default value is `512Mb`. The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html).
- `s3_max_redirects`Default value is `10`. Max number of HTTP redirects S3 hops allowed.
- `s3_max_single_part_upload_size` — The maximum size of object to upload using singlepart upload to S3. Default value is `64Mb`.
- `s3_min_upload_part_size` — The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Default value is `512Mb`.
- `s3_max_redirects` — Max number of S3 redirects hops allowed. Default value is `10`.
Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration.
### Endpoint-based settings {#endpointsettings}
## Endpoint-based Settings {#endpoint-settings}
The following settings can be specified in configuration file for given endpoint (which will be matched by exact prefix of a URL):
- `endpoint`Mandatory. Specifies prefix of an endpoint.
- `access_key_id` and `secret_access_key`Optional. Specifies credentials to use with given endpoint.
- `use_environment_credentials`Optional, default value is `false`. If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint.
- `header`Optional, can be speficied multiple times. Adds specified HTTP header to a request to given endpoint.
- `server_side_encryption_customer_key_base64`Optional. If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
- `endpoint` — Specifies prefix of an endpoint. Mandatory.
- `access_key_id` and `secret_access_key` — Specifies credentials to use with given endpoint. Optional.
- `use_environment_credentials` — If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint. Optional, default value is `false`.
- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be speficied multiple times.
- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. Optional.
Example:
**Example:**
```
``` xml
<s3>
<endpoint-name>
<endpoint>https://storage.yandexcloud.net/my-test-bucket-768/</endpoint>
@ -152,5 +107,50 @@ Example:
</endpoint-name>
</s3>
```
## Usage {#usage-examples}
Suppose we have several files in TSV format with the following URIs on HDFS:
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv'
1. There are several ways to make a table consisting of all six files:
``` sql
CREATE TABLE table_with_range (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV');
```
2. Another way:
``` sql
CREATE TABLE table_with_question_mark (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV');
```
3. Table consists of all the files in both directories (all files should satisfy format and schema described in query):
``` sql
CREATE TABLE table_with_asterisk (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV');
```
!!! warning "Warning"
If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
4. Create table with files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
``` sql
CREATE TABLE big_table (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV');
```
## See also
- [S3 table function](../../../sql-reference/table-functions/s3.md)
[Original article](https://clickhouse.tech/docs/en/engines/table-engines/integrations/s3/) <!--hide-->

View File

@ -123,5 +123,6 @@ toc_title: Adopters
| <a href="https://mkb.ru/" class="favicon">МКБ</a> | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) |
| <a href="https://cft.ru/" class="favicon">ЦФТ</a> | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) |
| <a href="https://www.kakaocorp.com/" class="favicon">kakaocorp</a> | Internet company | — | — | — | [if(kakao)2020 conference](https://if.kakao.com/session/117) |
| <a href="https://www.tesla.com/" class="favicon">Tesla</a> | Electric vehicle and clean energy company | — | — | — | [Vacancy description, March 2021](https://news.ycombinator.com/item?id=26306170) |
[Original article](https://clickhouse.tech/docs/en/introduction/adopters/) <!--hide-->

View File

@ -11,5 +11,6 @@ ClickHouse supports authenticating and managing users using external services.
The following external authenticators and directories are supported:
- [LDAP](./ldap.md#external-authenticators-ldap) [Authenticator](./ldap.md#ldap-external-authenticator) and [Directory](./ldap.md#ldap-external-user-directory)
- Kerberos [Authenticator](./kerberos.md#external-authenticators-kerberos)
[Original article](https://clickhouse.tech/docs/en/operations/external-authenticators/index/) <!--hide-->

View File

@ -0,0 +1,115 @@
# Kerberos {#external-authenticators-kerberos}
Existing and properly configured ClickHouse users can be authenticated via Kerberos authentication protocol.
Currently, Kerberos can only be used as an external authenticator for existing users, which are defined in `users.xml` or in local access control paths. Those users may only use HTTP requests and must be able to authenticate using GSS-SPNEGO mechanism.
For this approach, Kerberos must be configured in the system and must be enabled in ClickHouse config.
## Enabling Kerberos in ClickHouse {#enabling-kerberos-in-clickhouse}
To enable Kerberos, one should include `kerberos` section in `config.xml`. This section may contain additional parameters.
#### Parameters:
- `principal` - canonical service principal name that will be acquired and used when accepting security contexts.
- This parameter is optional, if omitted, the default principal will be used.
- `realm` - a realm, that will be used to restrict authentication to only those requests whose initiator's realm matches it.
- This parameter is optional, if omitted, no additional filtering by realm will be applied.
Example (goes into `config.xml`):
```xml
<yandex>
<!- ... -->
<kerberos />
</yandex>
```
With principal specification:
```xml
<yandex>
<!- ... -->
<kerberos>
<principal>HTTP/clickhouse.example.com@EXAMPLE.COM</principal>
</kerberos>
</yandex>
```
With filtering by realm:
```xml
<yandex>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</yandex>
```
!!! warning "Note"
You can define only one `kerberos` section. The presence of multiple `kerberos` sections will force ClickHouse to disable Kerberos authentication.
!!! warning "Note"
`principal` and `realm` sections cannot be specified at the same time. The presence of both `principal` and `realm` sections will force ClickHouse to disable Kerberos authentication.
## Kerberos as an external authenticator for existing users {#kerberos-as-an-external-authenticator-for-existing-users}
Kerberos can be used as a method for verifying the identity of locally defined users (users defined in `users.xml` or in local access control paths). Currently, **only** requests over the HTTP interface can be *kerberized* (via GSS-SPNEGO mechanism).
Kerberos principal name format usually follows this pattern:
- *primary/instance@REALM*
The */instance* part may occur zero or more times. **The *primary* part of the canonical principal name of the initiator is expected to match the kerberized user name for authentication to succeed**.
### Enabling Kerberos in `users.xml` {#enabling-kerberos-in-users-xml}
In order to enable Kerberos authentication for the user, specify `kerberos` section instead of `password` or similar sections in the user definition.
Parameters:
- `realm` - a realm that will be used to restrict authentication to only those requests whose initiator's realm matches it.
- This parameter is optional, if omitted, no additional filtering by realm will be applied.
Example (goes into `users.xml`):
```xml
<yandex>
<!- ... -->
<users>
<!- ... -->
<my_user>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</my_user>
</users>
</yandex>
```
!!! warning "Warning"
Note that Kerberos authentication cannot be used alongside with any other authentication mechanism. The presence of any other sections like `password` alongside `kerberos` will force ClickHouse to shutdown.
!!! info "Reminder"
Note, that now, once user `my_user` uses `kerberos`, Kerberos must be enabled in the main `config.xml` file as described previously.
### Enabling Kerberos using SQL {#enabling-kerberos-using-sql}
When [SQL-driven Access Control and Account Management](../access-rights.md#access-control) is enabled in ClickHouse, users identified by Kerberos can also be created using SQL statements.
```sql
CREATE USER my_user IDENTIFIED WITH kerberos REALM 'EXAMPLE.COM'
```
...or, without filtering by realm:
```sql
CREATE USER my_user IDENTIFIED WITH kerberos
```

View File

@ -1883,6 +1883,53 @@ Possible values:
Default value: `0`.
## insert_shard_id {#insert_shard_id}
If not `0`, specifies the shard of [Distributed](../../engines/table-engines/special/distributed.md#distributed) table into which the data will be inserted synchronously.
If `insert_shard_id` value is incorrect, the server will throw an exception.
To get the number of shards on `requested_cluster`, you can check server config or use this query:
``` sql
SELECT uniq(shard_num) FROM system.clusters WHERE cluster = 'requested_cluster';
```
Possible values:
- 0 — Disabled.
- Any number from `1` to `shards_num` of corresponding [Distributed](../../engines/table-engines/special/distributed.md#distributed) table.
Default value: `0`.
**Example**
Query:
```sql
CREATE TABLE x AS system.numbers ENGINE = MergeTree ORDER BY number;
CREATE TABLE x_dist AS x ENGINE = Distributed('test_cluster_two_shards_localhost', currentDatabase(), x);
INSERT INTO x_dist SELECT * FROM numbers(5) SETTINGS insert_shard_id = 1;
SELECT * FROM x_dist ORDER BY number ASC;
```
Result:
``` text
┌─number─┐
│ 0 │
│ 0 │
│ 1 │
│ 1 │
│ 2 │
│ 2 │
│ 3 │
│ 3 │
│ 4 │
│ 4 │
└────────┘
```
## use_compact_format_in_distributed_parts_names {#use_compact_format_in_distributed_parts_names}
Uses compact format for storing blocks for async (`insert_distributed_sync`) INSERT into tables with `Distributed` engine.

View File

@ -17,5 +17,3 @@ Columns:
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of rows read from all tables and table functions participated in queries.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of bytes read from all tables and table functions participated in queries.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum of the query execution time, in seconds.
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/quota_limits) <!--hide-->

View File

@ -28,5 +28,3 @@ Columns:
## See Also {#see-also}
- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement)
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/quota_usage) <!--hide-->

View File

@ -30,6 +30,4 @@ Columns:
## See Also {#see-also}
- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement)
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/quotas_usage) <!--hide-->
- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement)

View File

@ -4,16 +4,66 @@ toc_priority: 141
# deltaSum {#agg_functions-deltasum}
Syntax: `deltaSum(value)`
Sums the arithmetic difference between consecutive rows. If the difference is negative, it is ignored.
Adds the differences between consecutive rows. If the difference is negative, it is ignored.
`value` must be some integer or floating point type.
**Syntax**
Example:
```sql
select deltaSum(arrayJoin([1, 2, 3])); -- => 2
select deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3])); -- => 7
select deltaSum(arrayJoin([2.25, 3, 4.5])); -- => 2.25
``` sql
deltaSum(value)
```
**Arguments**
- `value` — Input values, must be [Integer](../../data-types/int-uint.md) or [Float](../../data-types/float.md) type.
**Returned value**
- A gained arithmetic difference of the `Integer` or `Float` type.
**Examples**
Query:
``` sql
SELECT deltaSum(arrayJoin([1, 2, 3]));
```
Result:
``` text
┌─deltaSum(arrayJoin([1, 2, 3]))─┐
│ 2 │
└────────────────────────────────┘
```
Query:
``` sql
SELECT deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3]));
```
Result:
``` text
┌─deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3]))─┐
│ 7 │
└───────────────────────────────────────────────┘
```
Query:
``` sql
SELECT deltaSum(arrayJoin([2.25, 3, 4.5]));
```
Result:
``` text
┌─deltaSum(arrayJoin([2.25, 3, 4.5]))─┐
│ 2.25 │
└─────────────────────────────────────┘
```
## See Also {#see-also}
- [runningDifference](../../functions/other-functions.md#other_functions-runningdifference)

View File

@ -36,4 +36,4 @@ For the default user limit the maximum execution time with half a second in 30 m
``` sql
ALTER QUOTA IF EXISTS qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
```
```

View File

@ -18,7 +18,7 @@ CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
```
Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
Parameters `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.

View File

@ -62,7 +62,7 @@ Note that materialized view is influenced by [optimize_on_insert](../../../opera
Views look the same as normal tables. For example, they are listed in the result of the `SHOW TABLES` query.
There isnt a separate query for deleting views. To delete a view, use [DROP TABLE](../../../sql-reference/statements/drop.md).
To delete a view, use [DROP VIEW](../../../sql-reference/statements/drop.md#drop-view). Although `DROP TABLE` works for VIEWs as well.
## Live View (Experimental) {#live-view}

View File

@ -3,17 +3,19 @@ toc_priority: 45
toc_title: s3
---
# s3 {#s3}
# S3 Table Function {#s3-table-function}
Provides table-like interface to select/insert files in S3. This table function is similar to [hdfs](../../sql-reference/table-functions/hdfs.md).
Provides table-like interface to select/insert files in [Amazon S3](https://aws.amazon.com/s3/). This table function is similar to [hdfs](../../sql-reference/table-functions/hdfs.md), but provides S3-specific features.
**Syntax**
``` sql
s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression])
```
**Input parameters**
**Arguments**
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: *, ?, {abc,def} and {N..M} where N, M — numbers, `abc, def — strings.
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression` — Parameter is optional. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. By default, it will autodetect compression by file extension.
@ -22,14 +24,14 @@ s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compres
A table with the specified structure for reading or writing data in the specified file.
**Example**
**Examples**
Table from S3 file `https://storage.yandexcloud.net/my-test-bucket-768/data.csv` and selection of the first two rows from it:
Selecting the first two rows from the table from S3 file `https://storage.yandexcloud.net/my-test-bucket-768/data.csv`:
``` sql
SELECT *
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32')
LIMIT 2
LIMIT 2;
```
``` text
@ -44,7 +46,7 @@ The similar but from file with `gzip` compression:
``` sql
SELECT *
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip')
LIMIT 2
LIMIT 2;
```
``` text
@ -54,33 +56,20 @@ LIMIT 2
└─────────┴─────────┴─────────┘
```
**Globs in path**
## Usage {#usage-examples}
Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix).
Suppose that we have several files with following URIs on S3:
- `*` — Substitutes any number of any characters except `/` including empty string.
- `?` — Substitutes any single character.
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
- `{N..M}` — Substitutes any number in range from N to M including both borders. N and M can have leading zeroes e.g. `000..078`.
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_4.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_4.csv'
Constructions with `{}` are similar to the [remote table function](../../sql-reference/table-functions/remote.md)).
**Example**
1. Suppose that we have several files with following URIs on S3:
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_4.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_4.csv
2. Query the amount of rows in files end with number from 1 to 3:
<!-- -->
Count the amount of rows in files ending with numbers from 1 to 3:
``` sql
SELECT count(*)
@ -93,9 +82,7 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi
└─────────┘
```
3. Query the amount of rows in all files of these two directories:
<!-- -->
Count the total amount of rows in all files in these two directories:
``` sql
SELECT count(*)
@ -108,17 +95,14 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi
└─────────┘
```
!!! warning "Warning"
If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
**Example**
Query the data from files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
``` sql
SELECT count(*)
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32')
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32');
```
``` text
@ -127,42 +111,22 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000
└─────────┘
```
**Data insert**
The S3 table function may be used for data insert as well.
**Example**
Insert a data into file `test-data.csv.gz`:
Insert data into file `test-data.csv.gz`:
``` sql
INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
VALUES ('test-data', 1), ('test-data-2', 2)
VALUES ('test-data', 1), ('test-data-2', 2);
```
Insert a data into file `test-data.csv.gz` from existing table:
Insert data into file `test-data.csv.gz` from existing table:
``` sql
INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
SELECT name, value FROM existing_table
SELECT name, value FROM existing_table;
```
## Virtual Columns {#virtual-columns}
- `_path` — Path to the file.
- `_file` — Name of the file.
## S3-related settings {#settings}
The following settings can be set before query execution or placed into configuration file.
- `s3_max_single_part_upload_size` — Default value is `64Mb`. The maximum size of object to upload using singlepart upload to S3.
- `s3_min_upload_part_size` — Default value is `512Mb`. The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html).
- `s3_max_redirects` — Default value is `10`. Max number of S3 redirects hops allowed.
Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration.
**See Also**
- [Virtual columns](../../engines/table-engines/index.md#table_engines-virtual_columns)
- [S3 engine](../../engines/table-engines/integrations/s3.md)
[Original article](https://clickhouse.tech/docs/en/sql-reference/table-functions/s3/) <!--hide-->

View File

@ -3,144 +3,92 @@ toc_priority: 4
toc_title: S3
---
# S3 {#table_engines-s3}
# Движок таблиц S3 {#table-engine-s3}
Этот движок обеспечивает интеграцию с экосистемой [Amazon S3](https://aws.amazon.com/s3/). Этот движок похож на
движок [HDFS](../../../engines/table-engines/integrations/hdfs.md#table_engines-hdfs), но предоставляет S3-специфичные функции.
Этот движок обеспечивает интеграцию с экосистемой [Amazon S3](https://aws.amazon.com/s3/). Он похож на движок [HDFS](../../../engines/table-engines/special/file.md#table_engines-hdfs), но обеспечивает специфические для S3 возможности.
## Использование {#usage}
## Создание таблицы {#creating-a-table}
```sql
``` sql
CREATE TABLE s3_engine_table (name String, value UInt32)
ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression])
```
**Параметры**
**Параметры движка**
- `path` — URL ссылающийся на файл расположенный в S3. В режиме для чтения можно читать несколько файлов как один, поддерживаются следующие шаблоны для указания маски пути к файлам: *, ?, {abc,def} и {N..M} где N, M — числа, `abc, def — строки.
- `format` — [Формат](../../../interfaces/formats.md#formats) файла.
- `structure`Структура таблицы. Формат `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression`Алгоритм сжатия, не обязятельный параметр. Поддерживаемые значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. По умолчанию, алгоритм сжатия будет автоматически применен в зависимости от расширения в имени файла.
- `path` — URL-адрес бакета с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": `*`, `?`, `{abc,def}` и `{N..M}` где `N`, `M` — числа, `'abc'`, `'def'` — строки. Подробнее смотри [ниже](#wildcards-in-path).
- `format` — [формат](../../../interfaces/formats.md#formats) файла.
- `structure`структура таблицы в формате `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression`тип сжатия. Возможные значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Необязательный параметр. Если не указано, то тип сжатия определяется автоматически по расширению файла.
**Пример:**
**Пример**
**1.** Создание таблицы `s3_engine_table` :
```sql
CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE=S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
``` sql
CREATE TABLE s3_engine_table (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip');
INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3);
SELECT * FROM s3_engine_table LIMIT 2;
```
**2.** Заполнение файла:
```sql
INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3)
```
**3.** Запрос данных:
```sql
SELECT * FROM s3_engine_table LIMIT 2
```
```text
``` text
┌─name─┬─value─┐
│ one │ 1 │
│ two │ 2 │
└──────┴───────┘
```
## Виртуальные столбцы {#virtual-columns}
- `_path` — путь к файлу.
- `_file` — имя файла.
Подробнее про виртуальные столбцы можно прочитать [здесь](../../../engines/table-engines/index.md#table_engines-virtual_columns).
## Детали реализации {#implementation-details}
- Чтение и запись могут быть одновременными и паралельными
- Не поддерживается:
- `ALTER` и `SELECT...SAMPLE` операции.
- Индексы.
- Репликация.
- Чтение и запись могут быть параллельными.
- Не поддерживаются:
- запросы `ALTER` и `SELECT...SAMPLE`,
- индексы,
- репликация.
**Поддержка шаблонов в параметре path**
## Символы подстановки {#wildcards-in-path}
Множество частей параметра `path` поддерживает шаблоны. Для того чтобы быть обработанным файл должен присутствовать в S3 и соответсвовать шаблону. Списки файлов определяются в момент `SELECT` (но не в момент `CREATE`).
Аргумент `path` может указывать на несколько файлов, используя подстановочные знаки. Для обработки файл должен существовать и соответствовать всему шаблону пути. Список файлов определяется во время выполнения запроса `SELECT` (не в момент выполнения запроса `CREATE`).
- `*`Заменяет любой количество любых символов кроме `/` включая пустые строки.
- `?`Заменяет один символ.
- `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`.
- `{N..M}`Заменяет любое числов в диапозоне от N до M включительно. N и M могут иметь лидирующие нули например `000..078`.
- `*`заменяет любое количество любых символов, кроме `/`, включая пустую строку.
- `?`заменяет любые одиночные символы.
- `{some_string, another_string, yet_another_one}` — заменяет любые строки `'some_string', 'another_string', 'yet_another_one'`.
- `{N..M}`заменяет любое число от N до M, включая обе границы. N и M могут иметь ведущие нули, например `000..078`.
Конструкции с`{}` работают также как в табличной функции [remote](../../../sql-reference/table-functions/remote.md).
Конструкции с `{}` аналогичны функции [remote](../../../sql-reference/table-functions/remote.md).
## Настройки движка S3 {#s3-settings}
Перед выполнением запроса или в конфигурационном файле могут быть установлены следующие настройки:
- `s3_max_single_part_upload_size` — максимальный размер объекта для загрузки с использованием однокомпонентной загрузки в S3. Значение по умолчанию — `64 Mб`.
- `s3_min_upload_part_size` — минимальный размер объекта для загрузки при многокомпонентной загрузке в [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Значение по умолчанию — `512 Mб`.
- `s3_max_redirects` — максимальное количество разрешенных переадресаций S3. Значение по умолчанию — `10`.
Соображение безопасности: если злонамеренный пользователь попробует указать произвольные URL-адреса S3, параметр `s3_max_redirects` должен быть установлен в ноль, чтобы избежать атак [SSRF] (https://en.wikipedia.org/wiki/Server-side_request_forgery). Как альтернатива, в конфигурации сервера должен быть указан `remote_host_filter`.
## Настройки конечных точек {#endpoint-settings}
Для конечной точки (которая соответствует точному префиксу URL-адреса) в конфигурационном файле могут быть заданы следующие настройки:
Обязательная настройка:
- `endpoint` — указывает префикс конечной точки.
Необязательные настройки:
- `access_key_id` и `secret_access_key` — указывают учетные данные для использования с данной конечной точкой.
- `use_environment_credentials` — если `true`, S3-клиент будет пытаться получить учетные данные из переменных среды и метаданных Amazon EC2 для данной конечной точки. Значение по умолчанию - `false`.
- `header` — добавляет указанный HTTP-заголовок к запросу на заданную конечную точку. Может быть определен несколько раз.
- `server_side_encryption_customer_key_base64` — устанавливает необходимые заголовки для доступа к объектам S3 с шифрованием SSE-C.
**Пример**
1. Предположим у нас есть некоторые файлы в CSV формате со следующими URIs в S3:
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv
2. Есть несколько способов сделать таблицу состяющую из всех шести файлов:
<!-- -->
```sql
CREATE TABLE table_with_range (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV')
```
3. Другой способ:
```sql
CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV')
```
4. Таблица состоящая из всех файлах в обоих каталогах (все файлы должны удовлетворять формату и схеме описанными в запросе):
```sql
CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV')
```
!!! warning "Предупреждение"
Если список файлов содержит диапозоны номеров с ведующими нулями, используйте конструкции со скобками для каждой цифры или используйте `?`.
**Пример**
Создание таблицы с именами файлов `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
```sql
CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV')
```
## Виртуальные колонки {#virtual-columns}
- `_path` — Path to the file.
- `_file` — Name of the file.
**Смотри также**
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
## S3-специфичные настройки {#settings}
Следующие настройки могут быть заданы при запуске запроса или установлены в конфигурационном файле для пользовательского профиля.
- `s3_max_single_part_upload_size` — По умолчанию `64Mb`. Максикальный размер куска данных для загрузки в S3 как singlepart.
- `s3_min_upload_part_size` — По умолчанию `512Mb`. Минимальный размер куска данных для загрузки в S3 с помощью [S3 Multipart загрузки](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html).
- `s3_max_redirects` — Значение по умолчанию `10`. Максимально допустимое количество HTTP перенаправлений от серверов S3.
Примечания для безопасности: если злоумышленник может указать произвольные ссылки на S3, то лучше выставить `s3_max_redirects` как ноль для избежания атак типа [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) ; или ограничить с помощью `remote_host_filter` список адресов по которым возможно взаимодействие с S3.
### Настройки специфичные для заданной конечной точки {#endpointsettings}
Следующие настройки могут быть указаны в конфигурационном файле для заданной конечной точки (которой будет сопоставлен точный конечный префик URL):
- `endpoint` — Обязательный параметр. Указывает префикс URL для конечной точки.
- `access_key_id` и `secret_access_key`Не обязательно. Задает параметры авторизации для заданной конечной точки.
- `use_environment_credentials`Не обязательный параметр, значение по умолчанию `false`. Если установлено как `true`, S3 клиент будет пытаться получить параметры авторизации из переменных окружения и Amazon EC2 метаданных для заданной конечной точки.
- `header`Не обязательный параметр, может быть указан несколько раз. Добавляет указанный HTTP заголовок к запросу для заданной в `endpoint` URL префикса.
- `server_side_encryption_customer_key_base64`Не обязательный параметр. Если указан, к запросам будут указаны заголовки необходимые для доступа к S3 объектам с SSE-C шифрованием.
Пример:
```
``` xml
<s3>
<endpoint-name>
<endpoint>https://storage.yandexcloud.net/my-test-bucket-768/</endpoint>
@ -153,3 +101,50 @@ CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.
</s3>
```
## Примеры использования {#usage-examples}
Предположим, у нас есть несколько файлов в формате TSV со следующими URL-адресами в HDFS:
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv'
1. Существует несколько способов создать таблицу, включающую в себя все шесть файлов:
``` sql
CREATE TABLE table_with_range (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV');
```
2. Другой способ:
``` sql
CREATE TABLE table_with_question_mark (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV');
```
3. Таблица содержит все файлы в обоих каталогах (все файлы должны соответствовать формату и схеме, описанным в запросе):
``` sql
CREATE TABLE table_with_asterisk (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV');
```
!!! warning "Warning"
Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`.
4. Создание таблицы из файлов с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
``` sql
CREATE TABLE big_table (name String, value UInt32)
ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV');
```
**Смотрите также**
- [Табличная функция S3](../../../sql-reference/table-functions/s3.md)
[Оригинальная статья](https://clickhouse.tech/docs/ru/engines/table-engines/integrations/s3/) <!--hide-->

View File

@ -11,5 +11,6 @@ ClickHouse поддерживает аутентификацию и управл
Поддерживаются следующие внешние аутентификаторы и каталоги:
- [LDAP](./ldap.md#external-authenticators-ldap) [аутентификатор](./ldap.md#ldap-external-authenticator) и [каталог](./ldap.md#ldap-external-user-directory)
- Kerberos [аутентификатор](./kerberos.md#external-authenticators-kerberos)
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/external-authenticators/index/) <!--hide-->

View File

@ -0,0 +1,118 @@
# Kerberos {#external-authenticators-kerberos}
ClickHouse предоставляет возможность аутентификации существующих (и правильно сконфигурированных) пользователей с использованием Kerberos.
В настоящее время возможно использование Kerberos только как внешнего аутентификатора, то есть для аутентификации уже существующих пользователей с помощью Kerberos. Пользователи, настроенные для Kerberos-аутентификации, могут работать с ClickHouse только через HTTP-интерфейс, причём сами клиенты должны иметь возможность аутентификации с использованием механизма GSS-SPNEGO.
!!! info "!!!"
Для Kerberos-аутентификации необходимо предварительно корректно настроить Kerberos на стороне клиента, на сервере и в конфигурационных файлах самого ClickHouse. Ниже описана лишь конфигурация ClickHouse.
## Настройка Kerberos в ClickHouse {#enabling-kerberos-in-clickhouse}
Для того, чтобы задействовать Kerberos-аутентификацию в ClickHouse, в первую очередь необходимо добавить одну-единственную секцию `kerberos` в `config.xml`.
В секции могут быть указаны дополнительные параметры:
- `principal` &mdash; задаёт имя принципала (canonical service principal name, SPN), используемое при авторизации ClickHouse на Kerberos-сервере.
- Это опциональный параметр, при его отсутствии будет использовано стандартное имя.
- `realm` &mdash; обеспечивает фильтрацию по реалм (realm). Пользователям, чей реалм не совпадает с указанным, будет отказано в аутентификации.
- Это опциональный параметр, при его отсутствии фильтр по реалм применяться не будет.
Примеры, как должен выглядеть файл `config.xml`:
```xml
<yandex>
<!- ... -->
<kerberos />
</yandex>
```
Или, с указанием принципала:
```xml
<yandex>
<!- ... -->
<kerberos>
<principal>HTTP/clickhouse.example.com@EXAMPLE.COM</principal>
</kerberos>
</yandex>
```
Или, с фильтрацией по реалм:
```xml
<yandex>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</yandex>
```
!!! Warning "Важно"
В конфигурационном файле не могут быть указаны одновременно оба параметра. В противном случае, аутентификация с помощью Kerberos будет недоступна для всех пользователей.
!!! Warning "Важно"
В конфигурационном файле может быть не более одной секции `kerberos`. В противном случае, аутентификация с помощью Kerberos будет отключена для всех пользователей.
## Аутентификация пользователей с помощью Kerberos {#kerberos-as-an-external-authenticator-for-existing-users}
Уже существующие пользователи могут воспользоваться аутентификацией с помощью Kerberos. Однако, Kerberos-аутентификация возможна только при использовании HTTP-интерфейса.
Имя принципала (principal name) обычно имеет вид:
- *primary/instance@REALM*
Для успешной аутентификации необходимо, чтобы *primary* совпало с именем пользователя ClickHouse, настроенного для использования Kerberos.
### Настройка Kerberos в `users.xml` {#enabling-kerberos-in-users-xml}
Для того, чтобы пользователь имел возможность производить аутентификацию с помощью Kerberos, достаточно включить секцию `kerberos` в описание пользователя в `users.xml` (например, вместо секции `password` или аналогичной ей).
В секции могут быть указаны дополнительные параметры:
- `realm` &mdash; обеспечивает фильтрацию по реалм (realm): аутентификация будет возможна только при совпадении реалм клиента с указанным.
- Этот параметр является опциональным, при его отсутствии фильтрация применяться не будет.
Пример, как выглядит конфигурация Kerberos в `users.xml`:
```xml
<yandex>
<!- ... -->
<users>
<!- ... -->
<my_user>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</my_user>
</users>
</yandex>
```
!!! Warning "Важно"
Если пользователь настроен для Kerberos-аутентификации, другие виды уатентификации будут для него недоступны. Если наряду с `kerberos` в определении пользователя будет указан какой-либо другой способ аутентификации, ClickHouse завершит работу.
!!! info ""
Ещё раз отметим, что кроме `users.xml`, необходимо также включить Kerberos в `config.xml`.
### Настройка Kerberos через SQL {#enabling-kerberos-using-sql}
Пользователей, использующих Kerberos-аутентификацию, можно создать не только с помощью изменения конфигурационных файлов.
Если SQL-ориентированное управление доступом включено в ClickHouse, можно также создать пользователя, работающего через Kerberos, с помощью SQL.
```sql
CREATE USER my_user IDENTIFIED WITH kerberos REALM 'EXAMPLE.COM'
```
Или, без фильтрации по реалм:
```sql
CREATE USER my_user IDENTIFIED WITH kerberos
```

View File

@ -29,6 +29,8 @@ toc_title: "Квоты"
<!-- Без ограничений. Просто считать соответствующие данные за указанный интервал. -->
<queries>0</queries>
<query_selects>0</query_selects>
<query_inserts>0</query_inserts>
<errors>0</errors>
<result_rows>0</result_rows>
<read_rows>0</read_rows>
@ -48,6 +50,8 @@ toc_title: "Квоты"
<duration>3600</duration>
<queries>1000</queries>
<query_selects>100</query_selects>
<query_inserts>100</query_inserts>
<errors>100</errors>
<result_rows>1000000000</result_rows>
<read_rows>100000000000</read_rows>
@ -58,6 +62,8 @@ toc_title: "Квоты"
<duration>86400</duration>
<queries>10000</queries>
<query_selects>10000</query_selects>
<query_inserts>10000</query_inserts>
<errors>1000</errors>
<result_rows>5000000000</result_rows>
<read_rows>500000000000</read_rows>
@ -74,6 +80,10 @@ toc_title: "Квоты"
`queries` - общее количество запросов;
`query_selects` общее количество запросов `SELECT`.
`query_inserts` общее количество запросов `INSERT`.
`errors` - количество запросов, при выполнении которых было выкинуто исключение;
`result_rows` - суммарное количество строк, отданных в виде результата;

View File

@ -1759,6 +1759,54 @@ ClickHouse генерирует исключение
- [Движок Distributed](../../engines/table-engines/special/distributed.md#distributed)
- [Управление распределёнными таблицами](../../sql-reference/statements/system.md#query-language-system-distributed)
## insert_shard_id {#insert_shard_id}
Если не `0`, указывает, в какой шард [Distributed](../../engines/table-engines/special/distributed.md#distributed) таблицы данные будут вставлены синхронно.
Если значение настройки `insert_shard_id` указано неверно, сервер выдаст ошибку.
Узнать количество шардов `shard_num` на кластере `requested_cluster` можно из конфигурации сервера, либо используя запрос:
``` sql
SELECT uniq(shard_num) FROM system.clusters WHERE cluster = 'requested_cluster';
```
Возможные значения:
- 0 — выключено.
- Любое число от `1` до `shards_num` соответствующей [Distributed](../../engines/table-engines/special/distributed.md#distributed) таблицы.
Значение по умолчанию: `0`.
**Пример**
Запрос:
```sql
CREATE TABLE x AS system.numbers ENGINE = MergeTree ORDER BY number;
CREATE TABLE x_dist AS x ENGINE = Distributed('test_cluster_two_shards_localhost', currentDatabase(), x);
INSERT INTO x_dist SELECT * FROM numbers(5) SETTINGS insert_shard_id = 1;
SELECT * FROM x_dist ORDER BY number ASC;
```
Результат:
``` text
┌─number─┐
│ 0 │
│ 0 │
│ 1 │
│ 1 │
│ 2 │
│ 2 │
│ 3 │
│ 3 │
│ 4 │
│ 4 │
└────────┘
```
## validate_polygons {#validate_polygons}
Включает или отключает генерирование исключения в функции [pointInPolygon](../../sql-reference/functions/geo/index.md#pointinpolygon), если многоугольник самопересекающийся или самокасающийся.

View File

@ -4,16 +4,17 @@
Столбцы:
- `quota_name` ([String](../../sql-reference/data-types/string.md)) — Имя квоты.
- `duration` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Длина временного интервала для расчета потребления ресурсов, в секундах.
- `is_randomized_interval` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Логическое значение. Оно показывает, является ли интервал рандомизированным. Интервал всегда начинается в одно и то же время, если он не рандомизирован. Например, интервал в 1 минуту всегда начинается с целого числа минут (то есть он может начинаться в 11:20:00, но никогда не начинается в 11:20:01), интервал в один день всегда начинается в полночь UTC. Если интервал рандомизирован, то самый первый интервал начинается в произвольное время, а последующие интервалы начинаются один за другим. Значения:
- `0` — Интервал рандомизирован.
- `1` — Интервал не рандомизирован.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное число запросов.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество ошибок.
- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество строк результата.
- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальный объем оперативной памяти в байтах, используемый для хранения результата запроса.
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросе.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество байтов, считываемых из всех таблиц и табличных функций, участвующих в запросе.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Максимальное время выполнения запроса, в секундах.
- `quota_name` ([String](../../sql-reference/data-types/string.md)) — имя квоты.
- `duration` ([UInt32](../../sql-reference/data-types/int-uint.md)) — длина временного интервала для расчета потребления ресурсов, в секундах.
- `is_randomized_interval` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — логическое значение. Оно показывает, является ли интервал рандомизированным. Интервал всегда начинается в одно и то же время, если он не рандомизирован. Например, интервал в 1 минуту всегда начинается с целого числа минут (то есть он может начинаться в 11:20:00, но никогда не начинается в 11:20:01), интервал в один день всегда начинается в полночь UTC. Если интервал рандомизирован, то самый первый интервал начинается в произвольное время, а последующие интервалы начинаются один за другим. Значения:
- `0` — интервал рандомизирован.
- `1` — интервал не рандомизирован.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное число запросов.
- `max_query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное число запросов `SELECT`.
- `max_query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное число запросов `INSERT`.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество ошибок.
- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк результата.
- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальный объем оперативной памяти в байтах, используемый для хранения результата запроса.
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросе.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество байтов, считываемых из всех таблиц и табличных функций, участвующих в запросе.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса, в секундах.

View File

@ -4,27 +4,28 @@
Столбцы:
- `quota_name` ([String](../../sql-reference/data-types/string.md)) — Имя квоты.
- `quota_key`([String](../../sql-reference/data-types/string.md)) — Значение ключа. Например, если keys = `ip_address`, `quota_key` может иметь значение '192.168.1.1'.
- `start_time`([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Время начала расчета потребления ресурсов.
- `end_time`([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Время окончания расчета потребления ресурс
- `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Длина временного интервала для расчета потребления ресурсов, в секундах.
- `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Общее количество запросов на этом интервале.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество запросов.
- `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Число запросов, вызвавших ошибки.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное число ошибок.
- `result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Общее количество строк результата.
- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество строк результата.
- `result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Объем оперативной памяти в байтах, используемый для хранения результата запроса.
- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальный объем оперативной памяти, используемый для хранения результата запроса, в байтах.
- `read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Общее число исходных строк, считываемых из таблиц для выполнения запроса на всех удаленных серверах.
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросах.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Общее количество байт, считанных из всех таблиц и табличных функций, участвующих в запросах.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество байт, считываемых из всех таблиц и табличных функций.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Общее время выполнения запроса, в секундах.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Максимальное время выполнения запроса.
- `quota_name` ([String](../../sql-reference/data-types/string.md)) — имя квоты.
- `quota_key`([String](../../sql-reference/data-types/string.md)) — значение ключа. Например, если keys = `ip_address`, `quota_key` может иметь значение '192.168.1.1'.
- `start_time`([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md))) — время начала расчета потребления ресурсов.
- `end_time`([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md))) — время окончания расчета потребления ресурс
- `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — длина временного интервала для расчета потребления ресурсов, в секундах.
- `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество запросов на этом интервале.
- `query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество запросов `SELECT` на этом интервале.
- `query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество запросов `INSERT` на этом интервале.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество запросов.
- `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — число запросов, вызвавших ошибки.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное число ошибок.
- `result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество строк результата.
- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк результата.
- `result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — объем оперативной памяти в байтах, используемый для хранения результата запроса.
- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальный объем оперативной памяти, используемый для хранения результата запроса, в байтах.
- `read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее число исходных строк, считываемых из таблиц для выполнения запроса на всех удаленных серверах.
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросах.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество байт, считанных из всех таблиц и табличных функций, участвующих в запросах.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество байт, считываемых из всех таблиц и табличных функций.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — общее время выполнения запроса, в секундах.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса.
## Смотрите также {#see-also}
- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement)
- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement)

View File

@ -4,28 +4,31 @@
Столбцы:
- `quota_name` ([String](../../sql-reference/data-types/string.md)) — Имя квоты.
- `quota_key` ([String](../../sql-reference/data-types/string.md)) — Ключ квоты.
- `is_current` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Квота используется для текущего пользователя.
- `start_time` ([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md)))) — Время начала расчета потребления ресурсов.
- `end_time` ([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md)))) — Время окончания расчета потребления ресурсов.
- `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt32](../../sql-reference/data-types/int-uint.md))) — Длина временного интервала для расчета потребления ресурсов, в секундах.
- `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Общее количество запросов на этом интервале.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное число запросов.
- `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Число запросов, вызвавших ошибки.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное число ошибок.
- `result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of rows given as a result.
- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum of source rows read from tables.
- `result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Объем оперативной памяти в байтах, используемый для хранения результата запроса.
- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальный объем оперативной памяти, используемый для хранения результата запроса, в байтах.
- `read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Общее число исходных строк, считываемых из таблиц для выполнения запроса на всех удаленных серверах.
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросах.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Общее количество байт, считанных из всех таблиц и табличных функций, участвующих в запросах.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Максимальное количество байт, считываемых из всех таблиц и табличных функций.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Общее время выполнения запроса, в секундах.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Максимальное время выполнения запроса.
- `quota_name` ([String](../../sql-reference/data-types/string.md)) — имя квоты.
- `quota_key` ([String](../../sql-reference/data-types/string.md)) — ключ квоты.
- `is_current` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — квота используется для текущего пользователя.
- `start_time` ([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md)))) — время начала расчета потребления ресурсов.
- `end_time` ([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md)))) — время окончания расчета потребления ресурсов.
- `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt32](../../sql-reference/data-types/int-uint.md))) — длина временного интервала для расчета потребления ресурсов, в секундах.
- `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество запросов на этом интервале.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное число запросов.
- `query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество запросов `SELECT` на этом интервале.
- `max_query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество запросов `SELECT` на этом интервале.
- `query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество запросов `INSERT` на этом интервале.
- `max_query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество запросов `INSERT` на этом интервале.
- `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — число запросов, вызвавших ошибки.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное число ошибок.
- `result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество строк, приведенных в результате.
- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество исходных строк, считываемых из таблиц.
- `result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — объем оперативной памяти в байтах, используемый для хранения результата запроса.
- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальный объем оперативной памяти, используемый для хранения результата запроса, в байтах.
- `read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее число исходных строк, считываемых из таблиц для выполнения запроса на всех удаленных серверах.
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросах.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество байт, считанных из всех таблиц и табличных функций, участвующих в запросах.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество байт, считываемых из всех таблиц и табличных функций.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — общее время выполнения запроса, в секундах.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса.
## Смотрите также {#see-also}
- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement)
- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement)

View File

@ -0,0 +1,69 @@
---
toc_priority: 141
---
# deltaSum {#agg_functions-deltasum}
Суммирует арифметическую разницу между последовательными строками. Если разница отрицательна — она будет проигнорирована.
**Синтаксис**
``` sql
deltaSum(value)
```
**Аргументы**
- `value` — входные значения, должны быть типа [Integer](../../data-types/int-uint.md) или [Float](../../data-types/float.md).
**Возвращаемое значение**
- накопленная арифметическая разница, типа `Integer` или `Float`.
**Примеры**
Запрос:
``` sql
SELECT deltaSum(arrayJoin([1, 2, 3]));
```
Результат:
``` text
┌─deltaSum(arrayJoin([1, 2, 3]))─┐
│ 2 │
└────────────────────────────────┘
```
Запрос:
``` sql
SELECT deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3]));
```
Результат:
``` text
┌─deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3]))─┐
│ 7 │
└───────────────────────────────────────────────┘
```
Запрос:
``` sql
SELECT deltaSum(arrayJoin([2.25, 3, 4.5]));
```
Результат:
``` text
┌─deltaSum(arrayJoin([2.25, 3, 4.5]))─┐
│ 2.25 │
└─────────────────────────────────────┘
```
## Смотрите также {#see-also}
- [runningDifference](../../functions/other-functions.md#runningdifferencex)

View File

@ -14,14 +14,14 @@ ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name]
[RENAME TO new_name]
[KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED]
[FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
{MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
{MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
NO LIMITS | TRACKING ONLY} [,...]]
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
```
Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md).
Параметры `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).
@ -37,5 +37,4 @@ ALTER QUOTA IF EXISTS qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER
``` sql
ALTER QUOTA IF EXISTS qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
```
```

View File

@ -13,13 +13,13 @@ toc_title: "Квота"
CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
[KEYED BY {user_name | ip_address | client_key | client_key, user_name | client_key, ip_address} | NOT KEYED]
[FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
{MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
{MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
NO LIMITS | TRACKING ONLY} [,...]]
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
```
Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md).
Параметры `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).
@ -35,6 +35,4 @@ CREATE QUOTA qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER;
``` sql
CREATE QUOTA qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
```
<!--hide-->
```

View File

@ -60,5 +60,5 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na
Представления выглядят так же, как обычные таблицы. Например, они перечисляются в результате запроса `SHOW TABLES`.
Отсутствует отдельный запрос для удаления представлений. Чтобы удалить представление, следует использовать `DROP TABLE`.
Чтобы удалить представление, следует использовать [DROP VIEW](../../../sql-reference/statements/drop.md#drop-view). Впрочем, `DROP TABLE` тоже работает для представлений.

View File

@ -23,15 +23,15 @@ toc_title: "Введение"
| Функция | Описание |
|-----------------------|---------------------------------------------------------------------------------------------------------------------------------------|
| [file](../../sql-reference/table-functions/file.md) | Создаёт таблицу с движком [File](../../engines/table-engines/special/file.md). |
| [merge](../../sql-reference/table-functions/merge.md) | Создаёт таблицу с движком [Merge](../../engines/table-engines/special/merge.md). |
| [numbers](../../sql-reference/table-functions/numbers.md) | Создаёт таблицу с единственным столбцом, заполненным целыми числами. |
| [remote](../../sql-reference/table-functions/remote.md) | Предоставляет доступ к удалённым серверам, не создавая таблицу с движком [Distributed](../../engines/table-engines/special/distributed.md). |
| [url](../../sql-reference/table-functions/url.md) | Создаёт таблицу с движком [Url](../../engines/table-engines/special/url.md). |
| [mysql](../../sql-reference/table-functions/mysql.md) | Создаёт таблицу с движком [MySQL](../../engines/table-engines/integrations/mysql.md). |
| [postgresql](../../sql-reference/table-functions/postgresql.md) | Создаёт таблицу с движком [PostgreSQL](../../engines/table-engines/integrations/postgresql.md). |
| [jdbc](../../sql-reference/table-functions/jdbc.md) | Создаёт таблицу с движком [JDBC](../../engines/table-engines/integrations/jdbc.md). |
| [odbc](../../sql-reference/table-functions/odbc.md) | Создаёт таблицу с движком [ODBC](../../engines/table-engines/integrations/odbc.md). |
| [hdfs](../../sql-reference/table-functions/hdfs.md) | Создаёт таблицу с движком [HDFS](../../engines/table-engines/integrations/hdfs.md). |
| [s3](../../sql-reference/table-functions/s3.md) | Создаёт таблицу с движком [S3](../../engines/table-engines/integrations/s3.md). |
| [file](file.md) | Создаёт таблицу с движком [File](../../engines/table-engines/special/file.md). |
| [merge](merge.md) | Создаёт таблицу с движком [Merge](../../engines/table-engines/special/merge.md). |
| [numbers](numbers.md) | Создаёт таблицу с единственным столбцом, заполненным целыми числами. |
| [remote](remote.md) | Предоставляет доступ к удалённым серверам, не создавая таблицу с движком [Distributed](../../engines/table-engines/special/distributed.md). |
| [url](url.md) | Создаёт таблицу с движком [Url](../../engines/table-engines/special/url.md). |
| [mysql](mysql.md) | Создаёт таблицу с движком [MySQL](../../engines/table-engines/integrations/mysql.md). |
| [jdbc](jdbc.md) | Создаёт таблицу с дижком [JDBC](../../engines/table-engines/integrations/jdbc.md). |
| [odbc](odbc.md) | Создаёт таблицу с движком [ODBC](../../engines/table-engines/integrations/odbc.md). |
| [hdfs](hdfs.md) | Создаёт таблицу с движком [HDFS](../../engines/table-engines/integrations/hdfs.md). |
| [s3](s3.md) | Создаёт таблицу с движком [S3](../../engines/table-engines/integrations/s3.md). |
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/table-functions/) <!--hide-->

View File

@ -3,35 +3,41 @@ toc_priority: 45
toc_title: s3
---
# s3 {#s3}
# Табличная Функция S3 {#s3-table-function}
Provides table-like interface to select/insert files in S3. This table function is similar to [hdfs](../../sql-reference/table-functions/hdfs.md).
Предоставляет табличный интерфейс для выбора/вставки файлов в [Amazon S3](https://aws.amazon.com/s3/). Эта табличная функция похожа на [hdfs](../../sql-reference/table-functions/hdfs.md), но обеспечивает специфические для S3 возможности.
**Синтаксис**
``` sql
s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression])
```
**Input parameters**
**Aргументы**
- `path`Bucket url with path to file. Supports following wildcards in readonly mode: *, ?, {abc,def} and {N..M} where N, M — numbers, `abc, def — strings.
- `format`The [format](../../interfaces/formats.md#formats) of the file.
- `structure`Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression`Parameter is optional. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. By default, it will autodetect compression by file extension.
- `path`URL-адрес бакета с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": `*, ?, {abc,def} и {N..M}` где `N, M` — числа, `'abc', 'def'` — строки. Подробнее смотри [здесь](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
- `format`[формат](../../interfaces/formats.md#formats) файла.
- `structure`руктура таблицы. Формат `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression`автоматически обнаруживает сжатие по расширению файла. Возможные значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Необязательный параметр.
**Returned value**
**Возвращаемые значения**
A table with the specified structure for reading or writing data in the specified file.
Таблица с указанной структурой для чтения или записи данных в указанный файл.
**Example**
**Примеры**
Table from S3 file `https://storage.yandexcloud.net/my-test-bucket-768/data.csv` and selection of the first two rows from it:
Создание таблицы из файла S3 `https://storage.yandexcloud.net/my-test-bucket-768/data.csv` и выбор первых трех столбцов из нее:
Запрос:
``` sql
SELECT *
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32')
LIMIT 2
LIMIT 2;
```
Результат:
``` text
┌─column1─┬─column2─┬─column3─┐
│ 1 │ 2 │ 3 │
@ -39,14 +45,18 @@ LIMIT 2
└─────────┴─────────┴─────────┘
```
The similar but from file with `gzip` compression:
То же самое, но файл со сжатием `gzip`:
Запрос:
``` sql
SELECT *
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip')
LIMIT 2
SELECT *
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip')
LIMIT 2;
```
Результат:
``` text
┌─column1─┬─column2─┬─column3─┐
│ 1 │ 2 │ 3 │
@ -54,37 +64,24 @@ LIMIT 2
└─────────┴─────────┴─────────┘
```
**Globs in path**
## Примеры использования {#usage-examples}
Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix).
Предположим, у нас есть несколько файлов со следующими URI на S3:
- `*` — Substitutes any number of any characters except `/` including empty string.
- `?` — Substitutes any single character.
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
- `{N..M}` — Substitutes any number in range from N to M including both borders. N and M can have leading zeroes e.g. `000..078`.
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_4.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv'
- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_4.csv'
Constructions with `{}` are similar to the [remote table function](../../sql-reference/table-functions/remote.md)).
**Example**
1. Suppose that we have several files with following URIs on S3:
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv
- https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_4.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv
- https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_4.csv
2. Query the amount of rows in files end with number from 1 to 3:
<!-- -->
Подсчитаем количество строк в файлах, заканчивающихся цифрами от 1 до 3:
``` sql
SELECT count(*)
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32')
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32');
```
``` text
@ -93,13 +90,11 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi
└─────────┘
```
3. Query the amount of rows in all files of these two directories:
<!-- -->
Подсчитаем общее количество строк во всех файлах этих двух каталогов:
``` sql
SELECT count(*)
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32')
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32');
```
``` text
@ -108,17 +103,14 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi
└─────────┘
```
!!! warning "Warning"
If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`.
**Example**
Query the data from files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
Подсчитаем общее количество строк в файлах с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
``` sql
SELECT count(*)
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32')
FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32');
```
``` text
@ -127,42 +119,23 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000
└─────────┘
```
**Data insert**
The S3 table function may be used for data insert as well.
**Example**
Insert a data into file `test-data.csv.gz`:
Запишем данные в файл `test-data.csv.gz`:
``` sql
INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
VALUES ('test-data', 1), ('test-data-2', 2)
VALUES ('test-data', 1), ('test-data-2', 2);
```
Insert a data into file `test-data.csv.gz` from existing table:
Запишем данные из существующей таблицы в файл `test-data.csv.gz`:
``` sql
INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
SELECT name, value FROM existing_table
SELECT name, value FROM existing_table;
```
## Virtual Columns {#virtual-columns}
**Смотрите также**
- `_path` — Path to the file.
- `_file` — Name of the file.
- [Движок таблиц S3](../../engines/table-engines/integrations/s3.md)
## S3-related settings {#settings}
The following settings can be set before query execution or placed into configuration file.
- `s3_max_single_part_upload_size` — Default value is `64Mb`. The maximum size of object to upload using singlepart upload to S3.
- `s3_min_upload_part_size` — Default value is `512Mb`. The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html).
- `s3_max_redirects` — Default value is `10`. Max number of S3 redirects hops allowed.
Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration.
**See Also**
- [Virtual columns](../../engines/table-engines/index.md#table_engines-virtual_columns)
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/table-functions/s3/) <!--hide-->

View File

@ -51,5 +51,5 @@ The easiest way to see the result is to use `--livereload=8888` argument of buil
At the moment theres no easy way to do just that, but you can consider:
- To hit the “Watch” button on top of GitHub web interface to know as early as possible, even during pull request. Alternative to this is `#github-activity` channel of [public ClickHouse Slack](https://join.slack.com/t/clickhousedb/shared_invite/enQtOTUzMjM4ODQwNTc5LWJmMjE3Yjc2YmI1ZDBlZmI4ZTc3OWY3ZTIwYTljYzY4MzBlODM3YzBjZTc1YmYyODRlZTJkYTgzYzBiNTA2Yjk).
- To hit the “Watch” button on top of GitHub web interface to know as early as possible, even during pull request. Alternative to this is `#github-activity` channel of [public ClickHouse Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-nwwakmk4-xOJ6cdy0sJC3It8j348~IA).
- Some search engines allow to subscribe on specific website changes via email and you can opt-in for that for https://clickhouse.tech.

View File

@ -1,5 +1,5 @@
---
toc_folder_title: Interfaces
toc_folder_title: 接口
toc_priority: 14
toc_title: 客户端
---

View File

@ -17,7 +17,7 @@ toc_title: ClickHouse的特性
在一些列式数据库管理系统中(例如InfiniDB CE 和 MonetDB) 并没有使用数据压缩。但是, 若想达到比较优异的性能,数据压缩确实起到了至关重要的作用。
除了在磁盘空间和CPU消耗之间进行不同权衡的高效通用压缩编解码器之外ClickHouse还提供针对特定类型数据的[专用编解码器](../sql-reference/statements/create/table.md#create-query-specialized-codecs)这使得ClickHouse能够与更小的数据库(如时间序列数据库)竞争并超越它们。
除了在磁盘空间和CPU消耗之间进行不同权衡的高效通用压缩编解码器之外ClickHouse还提供针对特定类型数据的[专用编解码器](../sql-reference/statements/create.md#create-query-specialized-codecs)这使得ClickHouse能够与更小的数据库(如时间序列数据库)竞争并超越它们。
## 数据的磁盘存储 {#shu-ju-de-ci-pan-cun-chu}

View File

@ -1,7 +1,5 @@
---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
toc_folder_title: "\u5BFC\u8A00"
toc_folder_title: 引言
toc_priority: 1
---

View File

@ -1,7 +1,7 @@
---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
toc_folder_title: "\u53D1\u8A00"
toc_folder_title: "\u8BED\u53E5"
toc_priority: 31
---

View File

@ -1,4 +1,5 @@
---
toc_folder_title: 表函数
toc_priority: 34
toc_title: "\u5BFC\u8A00"
---

View File

@ -188,6 +188,7 @@ add_subdirectory (format)
add_subdirectory (obfuscator)
add_subdirectory (install)
add_subdirectory (git-import)
add_subdirectory (bash-completion)
if (ENABLE_CLICKHOUSE_ODBC_BRIDGE)
add_subdirectory (odbc-bridge)

View File

@ -0,0 +1 @@
add_subdirectory(completions)

View File

@ -0,0 +1,28 @@
macro(configure_bash_completion)
set(out "/usr/share/bash-completion/completions")
find_program(pkg-config PKG_CONFIG_BIN)
if (PKG_CONFIG_BIN)
execute_process(
COMMAND ${PKG_CONFIG_BIN} --variable=completionsdir bash-completion
OUTPUT_VARIABLE ${out}
OUTPUT_STRIP_TRAILING_WHITESPACE
)
endif()
string(REPLACE /usr "${CMAKE_INSTALL_PREFIX}" out "${out}")
message(STATUS "bash_completion will be written to ${out}")
endmacro()
configure_bash_completion()
foreach (name
# set of functions
clickhouse-bootstrap
# binaries that accept settings as command line argument
clickhouse-client
clickhouse-local
clickhouse-benchmark
clickhouse
)
install(FILES ${name} DESTINATION ${out})
endforeach()

View File

@ -0,0 +1,43 @@
[[ -v $_CLICKHOUSE_COMPLETION_LOADED ]] || source "$(dirname "${BASH_SOURCE[0]}")/clickhouse-bootstrap"
function _clickhouse_get_utils()
{
local cmd=$1 && shift
"$cmd" --help |& awk '/^clickhouse.*args/ { print $2 }'
}
function _complete_for_clickhouse_entrypoint_bin()
{
local cur prev cword words
eval local cmd="$( _clickhouse_quote "$1" )"
_clickhouse_bin_exist "$cmd" || return 0
COMPREPLY=()
_get_comp_words_by_ref cur prev cword words
local util="$cur"
# complete utils, until it will be finished
if [[ $cword -lt 2 ]]; then
COMPREPLY=( $(compgen -W "$(_clickhouse_get_utils "$cmd")" -- "$cur") )
return
fi
util="${words[1]}"
case "$prev" in
-C|--config-file|--config)
return
;;
# Argh... This looks like a bash bug...
# Redirections are passed to the completion function
# although it is managed by the shell directly...
'<'|'>'|'>>'|[12]'>'|[12]'>>')
return
;;
esac
COMPREPLY=( $(compgen -W "$(_clickhouse_get_options "$cmd" "$util")" -- "$cur") )
return 0
}
_complete_clickhouse_generic clickhouse _complete_for_clickhouse_entrypoint_bin

View File

@ -0,0 +1,2 @@
[[ -v $_CLICKHOUSE_COMPLETION_LOADED ]] || source "$(dirname "${BASH_SOURCE[0]}")/clickhouse-bootstrap"
_complete_clickhouse_generic clickhouse-benchmark

View File

@ -0,0 +1,81 @@
#
# bash autocomplete, that can work with:
# a) --help of program
#
# Also you may like:
# $ bind "set completion-ignore-case on"
# $ bind "set show-all-if-ambiguous on"
#
# It uses bash-completion dynamic loader.
# Known to work with bash 3.* with programmable completion and extended
# pattern matching enabled (use 'shopt -s extglob progcomp' to enable
# these if they are not already enabled).
shopt -s extglob
export _CLICKHOUSE_COMPLETION_LOADED=1
function _clickhouse_bin_exist()
{ [ -x "$1" ] || command -v "$1" >& /dev/null; }
function _clickhouse_quote()
{
local quoted=${1//\'/\'\\\'\'};
printf "'%s'" "$quoted"
}
# Extract every option (everything that starts with "-") from the --help dialog.
function _clickhouse_get_options()
{
"$@" --help 2>&1 | awk -F '[ ,=<>]' '{ for (i=1; i <= NF; ++i) { if (substr($i, 0, 1) == "-" && length($i) > 1) print $i; } }' | sort -u
}
function _complete_for_clickhouse_generic_bin()
{
local cur prev
eval local cmd="$( _clickhouse_quote "$1" )"
_clickhouse_bin_exist "$cmd" || return 0
COMPREPLY=()
_get_comp_words_by_ref cur prev
case "$prev" in
-C|--config-file|--config)
return
;;
# Argh... This looks like a bash bug...
# Redirections are passed to the completion function
# although it is managed by the shell directly...
'<'|'>'|'>>'|[12]'>'|[12]'>>')
return
;;
esac
COMPREPLY=( $(compgen -W "$(_clickhouse_get_options "$cmd")" -- "$cur") )
return 0
}
function _complete_clickhouse_generic()
{
local bin=$1 && shift
local f=${1:-_complete_for_clickhouse_generic_bin}
local o=(
-o default
-o bashdefault
-o nospace
-F "$f"
"$bin"
)
complete "${o[@]}"
}
function _complete_clickhouse_bootstrap_main()
{
local runtime=/usr/share/bash-completion/bash_completion
if ! type _get_comp_words_by_ref >& /dev/null && [[ -f $runtime ]]; then
source $runtime
fi
type _get_comp_words_by_ref >& /dev/null || return 0
}
_complete_clickhouse_bootstrap_main "$@"

View File

@ -0,0 +1,2 @@
[[ -v $_CLICKHOUSE_COMPLETION_LOADED ]] || source "$(dirname "${BASH_SOURCE[0]}")/clickhouse-bootstrap"
_complete_clickhouse_generic clickhouse-client

View File

@ -0,0 +1,2 @@
[[ -v $_CLICKHOUSE_COMPLETION_LOADED ]] || source "$(dirname "${BASH_SOURCE[0]}")/clickhouse-bootstrap"
_complete_clickhouse_generic clickhouse-local

View File

@ -63,7 +63,7 @@ enum class AccessType
M(ALTER_SETTINGS, "ALTER SETTING, ALTER MODIFY SETTING, MODIFY SETTING", TABLE, ALTER_TABLE) /* allows to execute ALTER MODIFY SETTING */\
M(ALTER_MOVE_PARTITION, "ALTER MOVE PART, MOVE PARTITION, MOVE PART", TABLE, ALTER_TABLE) \
M(ALTER_FETCH_PARTITION, "FETCH PARTITION", TABLE, ALTER_TABLE) \
M(ALTER_FREEZE_PARTITION, "FREEZE PARTITION", TABLE, ALTER_TABLE) \
M(ALTER_FREEZE_PARTITION, "FREEZE PARTITION, UNFREEZE", TABLE, ALTER_TABLE) \
\
M(ALTER_TABLE, "", GROUP, ALTER) \
\

View File

@ -413,7 +413,7 @@ public:
for (const Field & f : keys_to_keep_)
{
keys_to_keep.emplace(f.safeGet<NearestFieldType<T>>());
keys_to_keep.emplace(f.safeGet<T>());
}
}

View File

@ -255,7 +255,7 @@ public:
/// The constant value. It is valid even if the size of the column is 0.
template <typename T>
T getValue() const { return getField().safeGet<NearestFieldType<T>>(); }
T getValue() const { return getField().safeGet<T>(); }
bool isCollationSupported() const override { return data->isCollationSupported(); }
};

View File

@ -107,7 +107,7 @@ public:
{
data.resize_fill(data.size() + length);
}
void insert(const Field & x) override { data.push_back(DB::get<NearestFieldType<T>>(x)); }
void insert(const Field & x) override { data.push_back(DB::get<T>(x)); }
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
void popBack(size_t n) override

View File

@ -261,7 +261,7 @@ public:
void insert(const Field & x) override
{
data.push_back(DB::get<NearestFieldType<T>>(x));
data.push_back(DB::get<T>(x));
}
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;

View File

@ -399,10 +399,10 @@ public:
template <typename T>
T & get();
NearestFieldType<std::decay_t<T>> & get();
template <typename T>
const T & get() const
const auto & get() const
{
auto mutable_this = const_cast<std::decay_t<decltype(*this)> *>(this);
return mutable_this->get<T>();
@ -436,21 +436,10 @@ public:
return true;
}
template <typename T> T & safeGet()
{
const Types::Which requested = TypeToEnum<std::decay_t<T>>::value;
if (which != requested)
throw Exception("Bad get: has " + std::string(getTypeName()) + ", requested " + std::string(Types::toString(requested)), ErrorCodes::BAD_GET);
return get<T>();
}
template <typename T> auto & safeGet() const
{ return const_cast<Field *>(this)->safeGet<T>(); }
template <typename T> const T & safeGet() const
{
const Types::Which requested = TypeToEnum<std::decay_t<T>>::value;
if (which != requested)
throw Exception("Bad get: has " + std::string(getTypeName()) + ", requested " + std::string(Types::toString(requested)), ErrorCodes::BAD_GET);
return get<T>();
}
template <typename T> auto & safeGet();
bool operator< (const Field & rhs) const
{
@ -778,22 +767,40 @@ inline constexpr bool isInt64FieldType(Field::Types::Which t)
// Field value getter with type checking in debug builds.
template <typename T>
T & Field::get()
NearestFieldType<std::decay_t<T>> & Field::get()
{
using ValueType = std::decay_t<T>;
// Before storing the value in the Field, we static_cast it to the field
// storage type, so here we return the value of storage type as well.
// Otherwise, it is easy to make a mistake of reinterpret_casting the stored
// value to a different and incompatible type.
// For example, a Float32 value is stored as Float64, and it is incorrect to
// return a reference to this value as Float32.
using StoredType = NearestFieldType<std::decay_t<T>>;
#ifndef NDEBUG
// Disregard signedness when converting between int64 types.
constexpr Field::Types::Which target = TypeToEnum<NearestFieldType<ValueType>>::value;
constexpr Field::Types::Which target = TypeToEnum<StoredType>::value;
if (target != which
&& (!isInt64FieldType(target) || !isInt64FieldType(which)))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid Field get from type {} to type {}", Types::toString(which), Types::toString(target));
#endif
ValueType * MAY_ALIAS ptr = reinterpret_cast<ValueType *>(&storage);
StoredType * MAY_ALIAS ptr = reinterpret_cast<StoredType *>(&storage);
return *ptr;
}
template <typename T>
auto & Field::safeGet()
{
const Types::Which requested = TypeToEnum<NearestFieldType<std::decay_t<T>>>::value;
if (which != requested)
throw Exception("Bad get: has " + std::string(getTypeName()) + ", requested " + std::string(Types::toString(requested)), ErrorCodes::BAD_GET);
return get<T>();
}
template <typename T>
T & Field::reinterpret()
{

View File

@ -105,7 +105,7 @@ DataTypeEnum<Type>::DataTypeEnum(const Values & values_) : values{values_}
template <typename Type>
void DataTypeEnum<Type>::serializeBinary(const Field & field, WriteBuffer & ostr) const
{
const FieldType x = get<NearestFieldType<FieldType>>(field);
const FieldType x = get<FieldType>(field);
writeBinary(x, ostr);
}
@ -405,7 +405,7 @@ static DataTypePtr createExact(const ASTPtr & arguments)
ErrorCodes::UNEXPECTED_AST_STRUCTURE);
const String & field_name = name_literal->value.get<String>();
const auto value = value_literal->value.get<NearestFieldType<FieldType>>();
const auto value = value_literal->value.get<FieldType>();
if (value > std::numeric_limits<FieldType>::max() || value < std::numeric_limits<FieldType>::min())
throw Exception{"Value " + toString(value) + " for element '" + field_name + "' exceeds range of " + EnumName<FieldType>::value,

View File

@ -152,7 +152,7 @@ template <typename T>
void DataTypeNumberBase<T>::serializeBinary(const Field & field, WriteBuffer & ostr) const
{
/// ColumnVector<T>::ValueType is a narrower type. For example, UInt8, when the Field type is UInt64
typename ColumnVector<T>::ValueType x = get<NearestFieldType<FieldType>>(field);
typename ColumnVector<T>::ValueType x = get<FieldType>(field);
writeBinary(x, ostr);
}

View File

@ -338,7 +338,7 @@ void ComplexKeyHashedDictionary::calculateBytesAllocated()
template <typename T>
void ComplexKeyHashedDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
attribute.null_values = T(null_value.get<T>());
attribute.maps.emplace<ContainerType<T>>();
}
@ -450,7 +450,7 @@ bool ComplexKeyHashedDictionary::setAttributeValue(Attribute & attribute, const
}
}
result = setAttributeValueImpl<AttributeType>(attribute, key, value.get<NearestFieldType<AttributeType>>());
result = setAttributeValueImpl<AttributeType>(attribute, key, value.get<AttributeType>());
};
callOnDictionaryAttributeType(attribute.type, type_call);

View File

@ -370,7 +370,7 @@ void FlatDictionary::calculateBytesAllocated()
template <typename T>
void FlatDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
attribute.null_values = T(null_value.get<T>());
const auto & null_value_ref = std::get<T>(attribute.null_values);
attribute.arrays.emplace<ContainerType<T>>(initial_array_size, null_value_ref);
}
@ -478,7 +478,7 @@ void FlatDictionary::setAttributeValue(Attribute & attribute, const Key id, cons
}
}
setAttributeValueImpl<AttributeType>(attribute, id, value.get<NearestFieldType<AttributeType>>());
setAttributeValueImpl<AttributeType>(attribute, id, value.get<AttributeType>());
};
callOnDictionaryAttributeType(attribute.type, type_call);

View File

@ -451,7 +451,7 @@ void HashedDictionary::calculateBytesAllocated()
template <typename T>
void HashedDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
attribute.null_values = T(null_value.get<T>());
if (!sparse)
attribute.maps = std::make_unique<CollectionType<T>>();
else
@ -565,7 +565,7 @@ bool HashedDictionary::setAttributeValue(Attribute & attribute, const Key id, co
}
}
result = setAttributeValueImpl<AttributeType>(attribute, id, value.get<NearestFieldType<AttributeType>>());
result = setAttributeValueImpl<AttributeType>(attribute, id, value.get<AttributeType>());
};
callOnDictionaryAttributeType(attribute.type, type_call);

View File

@ -595,7 +595,7 @@ void IPAddressDictionary::calculateBytesAllocated()
template <typename T>
void IPAddressDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
attribute.null_values = null_value.isNull() ? T{} : T(null_value.get<NearestFieldType<T>>());
attribute.null_values = null_value.isNull() ? T{} : T(null_value.get<T>());
attribute.maps.emplace<ContainerType<T>>();
}
@ -786,7 +786,7 @@ void IPAddressDictionary::setAttributeValue(Attribute & attribute, const Field &
}
else
{
setAttributeValueImpl<AttributeType>(attribute, value.get<NearestFieldType<AttributeType>>());
setAttributeValueImpl<AttributeType>(attribute, value.get<AttributeType>());
}
};

View File

@ -350,7 +350,7 @@ void RangeHashedDictionary::calculateBytesAllocated()
template <typename T>
void RangeHashedDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
attribute.null_values = T(null_value.get<T>());
attribute.maps = std::make_unique<Collection<T>>();
}
@ -458,7 +458,7 @@ void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const K
}
else
{
value_to_insert = Value<ValueType>{ range, { value.get<NearestFieldType<ValueType>>() }};
value_to_insert = Value<ValueType>{ range, { value.get<ValueType>() }};
}
}

View File

@ -872,15 +872,18 @@ bool FunctionArrayElement::matchKeyToIndexNumberConst(
if (!data_numeric)
return false;
bool is_integer_field = Field::dispatch([](const auto & value)
std::optional<DataType> index_as_integer;
Field::dispatch([&](const auto & value)
{
return is_integer_v<std::decay_t<decltype(value)>>;
using FieldType = std::decay_t<decltype(value)>;
if constexpr (is_integer_v<FieldType> && std::is_convertible_v<FieldType, DataType>)
index_as_integer = static_cast<DataType>(value);
}, index);
if (!is_integer_field)
if (!index_as_integer)
return false;
MatcherNumberConst<DataType> matcher{data_numeric->getData(), get<DataType>(index)};
MatcherNumberConst<DataType> matcher{data_numeric->getData(), *index_as_integer};
executeMatchKeyToIndex(offsets, matched_idxs, matcher);
return true;
}

View File

@ -159,7 +159,8 @@ void WriteBufferFromS3::writePart()
auto outcome = client_ptr->UploadPart(req);
LOG_TRACE(log, "Writing part. Bucket: {}, Key: {}, Upload_id: {}, Data size: {}", bucket, key, multipart_upload_id, temporary_buffer->tellp());
LOG_TRACE(
log, "Writing part. Bucket: {}, Key: {}, Upload_id: {}, Data size: {}", bucket, key, multipart_upload_id, req.GetContentLength());
if (outcome.IsSuccess())
{
@ -215,7 +216,7 @@ void WriteBufferFromS3::makeSinglepartUpload()
auto outcome = client_ptr->PutObject(req);
if (outcome.IsSuccess())
LOG_DEBUG(log, "Single part upload has completed. Bucket: {}, Key: {}", bucket, key);
LOG_DEBUG(log, "Single part upload has completed. Bucket: {}, Key: {}, Object size: {}", bucket, key, req.GetContentLength());
else
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
}

View File

@ -41,8 +41,8 @@ String makeStringsEnum(const std::set<String> & values)
void changeIfArguments(ASTPtr & first, ASTPtr & second)
{
String first_value = first->as<ASTLiteral>()->value.get<NearestFieldType<String>>();
String second_value = second->as<ASTLiteral>()->value.get<NearestFieldType<String>>();
String first_value = first->as<ASTLiteral>()->value.get<String>();
String second_value = second->as<ASTLiteral>()->value.get<String>();
std::set<String> values;
values.insert(first_value);
@ -67,9 +67,9 @@ void changeTransformArguments(ASTPtr & array_to, ASTPtr & other)
{
std::set<String> values;
for (const auto & item : array_to->as<ASTLiteral>()->value.get<NearestFieldType<Array>>())
values.insert(item.get<NearestFieldType<String>>());
values.insert(other->as<ASTLiteral>()->value.get<NearestFieldType<String>>());
for (const auto & item : array_to->as<ASTLiteral>()->value.get<Array>())
values.insert(item.get<String>());
values.insert(other->as<ASTLiteral>()->value.get<String>());
String enum_string = makeStringsEnum(values);
@ -197,7 +197,7 @@ struct ConvertStringsToEnumMatcher
String(literal_other->value.getTypeName()) != "String")
return;
Array array_to = literal_to->value.get<NearestFieldType<Array>>();
Array array_to = literal_to->value.get<Array>();
if (array_to.size() == 0)
return;

View File

@ -823,7 +823,9 @@ private:
if (info->loading_id < min_id)
startLoading(*info, forced_to_reload, *min_id);
return false; /// wait for the next event
/// Wait for the next event if loading wasn't completed, and stop otherwise.
return (info->state_id >= min_id);
};
if (timeout == WAIT)
@ -851,9 +853,10 @@ private:
if (info.state_id >= min_id)
continue;
all_ready = false;
if (info.loading_id < min_id)
startLoading(info, forced_to_reload, *min_id);
all_ready &= (info.state_id >= min_id);
}
return all_ready;
};

View File

@ -299,7 +299,9 @@ AccessRightsElements InterpreterAlterQuery::getRequiredAccessForCommand(const AS
break;
}
case ASTAlterCommand::FREEZE_PARTITION: [[fallthrough]];
case ASTAlterCommand::FREEZE_ALL:
case ASTAlterCommand::FREEZE_ALL: [[fallthrough]];
case ASTAlterCommand::UNFREEZE_PARTITION: [[fallthrough]];
case ASTAlterCommand::UNFREEZE_ALL:
{
required_access.emplace_back(AccessType::ALTER_FREEZE_PARTITION, database, table);
break;

View File

@ -271,6 +271,27 @@ void ASTAlterCommand::formatImpl(
<< " " << DB::quote << with_name;
}
}
else if (type == ASTAlterCommand::UNFREEZE_PARTITION)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "UNFREEZE PARTITION " << (settings.hilite ? hilite_none : "");
partition->formatImpl(settings, state, frame);
if (!with_name.empty())
{
settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << "WITH NAME" << (settings.hilite ? hilite_none : "")
<< " " << DB::quote << with_name;
}
}
else if (type == ASTAlterCommand::UNFREEZE_ALL)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "UNFREEZE";
if (!with_name.empty())
{
settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << "WITH NAME" << (settings.hilite ? hilite_none : "")
<< " " << DB::quote << with_name;
}
}
else if (type == ASTAlterCommand::DELETE)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "DELETE" << (settings.hilite ? hilite_none : "");
@ -368,7 +389,8 @@ bool ASTAlterQuery::isSettingsAlter() const
bool ASTAlterQuery::isFreezeAlter() const
{
return isOneCommandTypeOnly(ASTAlterCommand::FREEZE_PARTITION) || isOneCommandTypeOnly(ASTAlterCommand::FREEZE_ALL);
return isOneCommandTypeOnly(ASTAlterCommand::FREEZE_PARTITION) || isOneCommandTypeOnly(ASTAlterCommand::FREEZE_ALL)
|| isOneCommandTypeOnly(ASTAlterCommand::UNFREEZE_PARTITION) || isOneCommandTypeOnly(ASTAlterCommand::UNFREEZE_ALL);
}
/** Get the text that identifies this element. */

View File

@ -54,6 +54,8 @@ public:
FETCH_PARTITION,
FREEZE_PARTITION,
FREEZE_ALL,
UNFREEZE_PARTITION,
UNFREEZE_ALL,
DELETE,
UPDATE,
@ -153,7 +155,9 @@ public:
*/
String from;
/** For FREEZE PARTITION - place local backup to directory with specified name.
/**
* For FREEZE PARTITION - place local backup to directory with specified name.
* For UNFREEZE - delete local backup at directory with specified name.
*/
String with_name;

View File

@ -63,6 +63,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
ParserKeyword s_fetch_partition("FETCH PARTITION");
ParserKeyword s_replace_partition("REPLACE PARTITION");
ParserKeyword s_freeze("FREEZE");
ParserKeyword s_unfreeze("UNFREEZE");
ParserKeyword s_partition("PARTITION");
ParserKeyword s_first("FIRST");
@ -454,6 +455,37 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
command->with_name = ast_with_name->as<ASTLiteral &>().value.get<const String &>();
}
}
else if (s_unfreeze.ignore(pos, expected))
{
if (s_partition.ignore(pos, expected))
{
if (!parser_partition.parse(pos, command->partition, expected))
return false;
command->type = ASTAlterCommand::UNFREEZE_PARTITION;
}
else
{
command->type = ASTAlterCommand::UNFREEZE_ALL;
}
/// WITH NAME 'name' - remove local backup to directory with specified name
if (s_with.ignore(pos, expected))
{
if (!s_name.ignore(pos, expected))
return false;
ASTPtr ast_with_name;
if (!parser_string_literal.parse(pos, ast_with_name, expected))
return false;
command->with_name = ast_with_name->as<ASTLiteral &>().value.get<const String &>();
}
else
{
return false;
}
}
else if (s_modify_column.ignore(pos, expected))
{
if (s_if_exists.ignore(pos, expected))

View File

@ -500,7 +500,7 @@ void MergeTreeData::checkPartitionKeyAndInitMinMax(const KeyDescription & new_pa
else
{
/// There is more than one DateTime column in partition key and we don't know which one to choose.
minmax_idx_time_column_pos = -1;
minmax_idx_time_column_pos = -1;
}
}
}
@ -1849,11 +1849,6 @@ void MergeTreeData::changeSettings(
}
}
PartitionCommandsResultInfo MergeTreeData::freezeAll(const String & with_name, const StorageMetadataPtr & metadata_snapshot, const Context & context, TableLockHolder &)
{
return freezePartitionsByMatcher([] (const DataPartPtr &) { return true; }, metadata_snapshot, with_name, context);
}
void MergeTreeData::PartsTemporaryRename::addPart(const String & old_name, const String & new_name)
{
old_and_new_names.push_back({old_name, new_name});
@ -2690,44 +2685,6 @@ void MergeTreeData::removePartContributionToColumnSizes(const DataPartPtr & part
}
}
PartitionCommandsResultInfo MergeTreeData::freezePartition(const ASTPtr & partition_ast, const StorageMetadataPtr & metadata_snapshot, const String & with_name, const Context & context, TableLockHolder &)
{
std::optional<String> prefix;
String partition_id;
if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
{
/// Month-partitioning specific - partition value can represent a prefix of the partition to freeze.
if (const auto * partition_lit = partition_ast->as<ASTPartition &>().value->as<ASTLiteral>())
prefix = partition_lit->value.getType() == Field::Types::UInt64
? toString(partition_lit->value.get<UInt64>())
: partition_lit->value.safeGet<String>();
else
partition_id = getPartitionIDFromQuery(partition_ast, context);
}
else
partition_id = getPartitionIDFromQuery(partition_ast, context);
if (prefix)
LOG_DEBUG(log, "Freezing parts with prefix {}", *prefix);
else
LOG_DEBUG(log, "Freezing parts with partition ID {}", partition_id);
return freezePartitionsByMatcher(
[&prefix, &partition_id](const DataPartPtr & part)
{
if (prefix)
return startsWith(part->info.partition_id, *prefix);
else
return part->info.partition_id == partition_id;
},
metadata_snapshot,
with_name,
context);
}
void MergeTreeData::checkAlterPartitionIsPossible(const PartitionCommands & commands, const StorageMetadataPtr & /*metadata_snapshot*/, const Settings & settings) const
{
for (const auto & command : commands)
@ -2957,6 +2914,21 @@ Pipe MergeTreeData::alterPartition(
current_command_results = freezeAll(command.with_name, metadata_snapshot, query_context, lock);
}
break;
case PartitionCommand::UNFREEZE_PARTITION:
{
auto lock = lockForShare(query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout);
current_command_results = unfreezePartition(command.partition, command.with_name, query_context, lock);
}
break;
case PartitionCommand::UNFREEZE_ALL_PARTITIONS:
{
auto lock = lockForShare(query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout);
current_command_results = unfreezeAll(command.with_name, query_context, lock);
}
break;
}
for (auto & command_result : current_command_results)
command_result.command_type = command.typeToString();
@ -3719,7 +3691,60 @@ MergeTreeData::PathsWithDisks MergeTreeData::getRelativeDataPathsWithDisks() con
return res;
}
PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher(MatcherFn matcher, const StorageMetadataPtr & metadata_snapshot, const String & with_name, const Context & context)
MergeTreeData::MatcherFn MergeTreeData::getPartitionMatcher(const ASTPtr & partition_ast, const Context & context) const
{
bool prefixed = false;
String id;
if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
{
/// Month-partitioning specific - partition value can represent a prefix of the partition to freeze.
if (const auto * partition_lit = partition_ast->as<ASTPartition &>().value->as<ASTLiteral>())
{
id = partition_lit->value.getType() == Field::Types::UInt64
? toString(partition_lit->value.get<UInt64>())
: partition_lit->value.safeGet<String>();
prefixed = true;
}
else
id = getPartitionIDFromQuery(partition_ast, context);
}
else
id = getPartitionIDFromQuery(partition_ast, context);
return [prefixed, id](const String & partition_id)
{
if (prefixed)
return startsWith(partition_id, id);
else
return id == partition_id;
};
}
PartitionCommandsResultInfo MergeTreeData::freezePartition(
const ASTPtr & partition_ast,
const StorageMetadataPtr & metadata_snapshot,
const String & with_name,
const Context & context,
TableLockHolder &)
{
return freezePartitionsByMatcher(getPartitionMatcher(partition_ast, context), metadata_snapshot, with_name, context);
}
PartitionCommandsResultInfo MergeTreeData::freezeAll(
const String & with_name,
const StorageMetadataPtr & metadata_snapshot,
const Context & context,
TableLockHolder &)
{
return freezePartitionsByMatcher([] (const String &) { return true; }, metadata_snapshot, with_name, context);
}
PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher(
MatcherFn matcher,
const StorageMetadataPtr & metadata_snapshot,
const String & with_name,
const Context & context)
{
String clickhouse_path = Poco::Path(context.getPath()).makeAbsolute().toString();
String default_shadow_path = clickhouse_path + "shadow/";
@ -3742,7 +3767,7 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher(MatcherFn m
size_t parts_processed = 0;
for (const auto & part : data_parts)
{
if (!matcher(part))
if (!matcher(part->info.partition_id))
continue;
LOG_DEBUG(log, "Freezing part {} snapshot will be placed at {}", part->name, backup_path);
@ -3772,6 +3797,70 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher(MatcherFn m
return result;
}
PartitionCommandsResultInfo MergeTreeData::unfreezePartition(
const ASTPtr & partition,
const String & backup_name,
const Context & context,
TableLockHolder &)
{
return unfreezePartitionsByMatcher(getPartitionMatcher(partition, context), backup_name, context);
}
PartitionCommandsResultInfo MergeTreeData::unfreezeAll(
const String & backup_name,
const Context & context,
TableLockHolder &)
{
return unfreezePartitionsByMatcher([] (const String &) { return true; }, backup_name, context);
}
PartitionCommandsResultInfo MergeTreeData::unfreezePartitionsByMatcher(MatcherFn matcher, const String & backup_name, const Context &)
{
auto backup_path = std::filesystem::path("shadow") / escapeForFileName(backup_name) / relative_data_path;
LOG_DEBUG(log, "Unfreezing parts by path {}", backup_path.generic_string());
PartitionCommandsResultInfo result;
for (const auto & disk : getStoragePolicy()->getDisks())
{
if (!disk->exists(backup_path))
continue;
for (auto it = disk->iterateDirectory(backup_path); it->isValid(); it->next())
{
const auto & partition_directory = it->name();
/// Partition ID is prefix of part directory name: <partition id>_<rest of part directory name>
auto found = partition_directory.find('_');
if (found == std::string::npos)
continue;
auto partition_id = partition_directory.substr(0, found);
if (!matcher(partition_id))
continue;
const auto & path = it->path();
disk->removeRecursive(path);
result.push_back(PartitionCommandResultInfo{
.partition_id = partition_id,
.part_name = partition_directory,
.backup_path = disk->getPath() + backup_path.generic_string(),
.part_backup_path = disk->getPath() + path,
.backup_name = backup_name,
});
LOG_DEBUG(log, "Unfreezed part by path {}", disk->getPath() + path);
}
}
LOG_DEBUG(log, "Unfreezed {} parts", result.size());
return result;
}
bool MergeTreeData::canReplacePartition(const DataPartPtr & src_part) const
{
const auto settings = getSettings();

View File

@ -544,13 +544,6 @@ public:
const ASTPtr & new_settings,
TableLockHolder & table_lock_holder);
/// Freezes all parts.
PartitionCommandsResultInfo freezeAll(
const String & with_name,
const StorageMetadataPtr & metadata_snapshot,
const Context & context,
TableLockHolder & table_lock_holder);
/// Should be called if part data is suspected to be corrupted.
void reportBrokenPart(const String & name) const
{
@ -568,8 +561,32 @@ public:
* Backup is created in directory clickhouse_dir/shadow/i/, where i - incremental number,
* or if 'with_name' is specified - backup is created in directory with specified name.
*/
PartitionCommandsResultInfo freezePartition(const ASTPtr & partition, const StorageMetadataPtr & metadata_snapshot, const String & with_name, const Context & context, TableLockHolder & table_lock_holder);
PartitionCommandsResultInfo freezePartition(
const ASTPtr & partition,
const StorageMetadataPtr & metadata_snapshot,
const String & with_name,
const Context & context,
TableLockHolder & table_lock_holder);
/// Freezes all parts.
PartitionCommandsResultInfo freezeAll(
const String & with_name,
const StorageMetadataPtr & metadata_snapshot,
const Context & context,
TableLockHolder & table_lock_holder);
/// Unfreezes particular partition.
PartitionCommandsResultInfo unfreezePartition(
const ASTPtr & partition,
const String & backup_name,
const Context & context,
TableLockHolder & table_lock_holder);
/// Unfreezes all parts.
PartitionCommandsResultInfo unfreezeAll(
const String & backup_name,
const Context & context,
TableLockHolder & table_lock_holder);
public:
/// Moves partition to specified Disk
@ -939,8 +956,9 @@ protected:
bool isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node, const StorageMetadataPtr & metadata_snapshot) const;
/// Common part for |freezePartition()| and |freezeAll()|.
using MatcherFn = std::function<bool(const DataPartPtr &)>;
using MatcherFn = std::function<bool(const String &)>;
PartitionCommandsResultInfo freezePartitionsByMatcher(MatcherFn matcher, const StorageMetadataPtr & metadata_snapshot, const String & with_name, const Context & context);
PartitionCommandsResultInfo unfreezePartitionsByMatcher(MatcherFn matcher, const String & backup_name, const Context & context);
// Partition helpers
bool canReplacePartition(const DataPartPtr & src_part) const;
@ -1024,6 +1042,9 @@ private:
// Record all query ids which access the table. It's guarded by `query_id_set_mutex` and is always mutable.
mutable std::set<String> query_id_set;
mutable std::mutex query_id_set_mutex;
// Get partition matcher for FREEZE / UNFREEZE queries.
MatcherFn getPartitionMatcher(const ASTPtr & partition, const Context & context) const;
};
/// RAII struct to record big parts that are submerging or emerging.

View File

@ -94,10 +94,25 @@ std::optional<PartitionCommand> PartitionCommand::parse(const ASTAlterCommand *
}
else if (command_ast->type == ASTAlterCommand::FREEZE_ALL)
{
PartitionCommand command;
command.type = PartitionCommand::FREEZE_ALL_PARTITIONS;
command.with_name = command_ast->with_name;
return command;
PartitionCommand res;
res.type = PartitionCommand::FREEZE_ALL_PARTITIONS;
res.with_name = command_ast->with_name;
return res;
}
else if (command_ast->type == ASTAlterCommand::UNFREEZE_PARTITION)
{
PartitionCommand res;
res.type = PartitionCommand::UNFREEZE_PARTITION;
res.partition = command_ast->partition;
res.with_name = command_ast->with_name;
return res;
}
else if (command_ast->type == ASTAlterCommand::UNFREEZE_ALL)
{
PartitionCommand res;
res.type = PartitionCommand::UNFREEZE_ALL_PARTITIONS;
res.with_name = command_ast->with_name;
return res;
}
else
return {};
@ -130,6 +145,10 @@ std::string PartitionCommand::typeToString() const
return "FREEZE ALL";
case PartitionCommand::Type::FREEZE_PARTITION:
return "FREEZE PARTITION";
case PartitionCommand::Type::UNFREEZE_PARTITION:
return "UNFREEZE PARTITION";
case PartitionCommand::Type::UNFREEZE_ALL_PARTITIONS:
return "UNFREEZE ALL";
case PartitionCommand::Type::REPLACE_PARTITION:
return "REPLACE PARTITION";
}

View File

@ -27,6 +27,8 @@ struct PartitionCommand
FETCH_PARTITION,
FREEZE_ALL_PARTITIONS,
FREEZE_PARTITION,
UNFREEZE_ALL_PARTITIONS,
UNFREEZE_PARTITION,
REPLACE_PARTITION,
};
@ -52,7 +54,7 @@ struct PartitionCommand
/// For FETCH PARTITION - path in ZK to the shard, from which to download the partition.
String from_zookeeper_path;
/// For FREEZE PARTITION
/// For FREEZE PARTITION and UNFREEZE
String with_name;
enum MoveDestinationType

View File

@ -0,0 +1,4 @@
<yandex>
<catboost_dynamic_library_path>/etc/clickhouse-server/model/libcatboostmodel.so</catboost_dynamic_library_path>
<models_config>/etc/clickhouse-server/model/model_config.xml</models_config>
</yandex>

View File

@ -0,0 +1,8 @@
<models>
<model>
<type>catboost</type>
<name>titanic</name>
<path>/etc/clickhouse-server/model/model.bin</path>
<lifetime>0</lifetime>
</model>
</models>

View File

@ -0,0 +1,36 @@
import os
import sys
import time
import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance('node', stay_alive=True, main_configs=['config/models_config.xml'])
def copy_file_to_container(local_path, dist_path, container_id):
os.system("docker cp {local} {cont_id}:{dist}".format(local=local_path, cont_id=container_id, dist=dist_path))
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
copy_file_to_container(os.path.join(SCRIPT_DIR, 'model/.'), '/etc/clickhouse-server/model', node.docker_id)
node.restart_clickhouse()
yield cluster
finally:
cluster.shutdown()
def test(started_cluster):
node.query("select modelEvaluate('titanic', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);")

View File

@ -137,8 +137,17 @@ def test_insert_same_partition_and_merge(cluster, merge_vertical):
list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD_PER_PART_WIDE * 6 + FILES_OVERHEAD
node.query("SYSTEM START MERGES s3_test")
# Wait for merges and old parts deletion
time.sleep(3)
for attempt in range(0, 10):
parts_count = node.query("SELECT COUNT(*) FROM system.parts WHERE table = 's3_test' FORMAT Values")
if parts_count == "(1)":
break
if attempt == 9:
assert parts_count == "(1)"
time.sleep(1)
assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)"
assert node.query("SELECT count(distinct(id)) FROM s3_test FORMAT Values") == "(8192)"
@ -333,3 +342,28 @@ def test_move_replace_partition_to_another_table(cluster):
for obj in list(minio.list_objects(cluster.minio_bucket, 'data/')):
minio.remove_object(cluster.minio_bucket, obj.object_name)
def test_freeze_unfreeze(cluster):
create_table(cluster, "s3_test")
node = cluster.instances["node"]
minio = cluster.minio_client
node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 4096)))
node.query("ALTER TABLE s3_test FREEZE WITH NAME 'backup1'")
node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-04', 4096)))
node.query("ALTER TABLE s3_test FREEZE WITH NAME 'backup2'")
node.query("TRUNCATE TABLE s3_test")
assert len(
list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
# Unfreeze single partition from backup1.
node.query("ALTER TABLE s3_test UNFREEZE PARTITION '2020-01-03' WITH NAME 'backup1'")
# Unfreeze all partitions from backup2.
node.query("ALTER TABLE s3_test UNFREEZE WITH NAME 'backup2'")
# Data should be removed from S3.
assert len(
list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD

View File

@ -29,7 +29,7 @@ ALTER MATERIALIZE TTL ['MATERIALIZE TTL'] TABLE ALTER TABLE
ALTER SETTINGS ['ALTER SETTING','ALTER MODIFY SETTING','MODIFY SETTING'] TABLE ALTER TABLE
ALTER MOVE PARTITION ['ALTER MOVE PART','MOVE PARTITION','MOVE PART'] TABLE ALTER TABLE
ALTER FETCH PARTITION ['FETCH PARTITION'] TABLE ALTER TABLE
ALTER FREEZE PARTITION ['FREEZE PARTITION'] TABLE ALTER TABLE
ALTER FREEZE PARTITION ['FREEZE PARTITION','UNFREEZE'] TABLE ALTER TABLE
ALTER TABLE [] \N ALTER
ALTER VIEW REFRESH ['ALTER LIVE VIEW REFRESH','REFRESH VIEW'] VIEW ALTER VIEW
ALTER VIEW MODIFY QUERY ['ALTER TABLE MODIFY QUERY'] VIEW ALTER VIEW

View File

@ -16,3 +16,9 @@ ATTACH PARTITION 3 3_12_12_0 3_4_4_0
command_type partition_id part_name backup_name old_part_name
FREEZE PARTITION 7 7_8_8_0 test_01417_single_part_7
ATTACH PART 5 5_13_13_0 5_6_6_0
command_type partition_id part_name backup_name
UNFREEZE PARTITION 7 7_8_8_0 test_01417_single_part_7
command_type partition_id part_name backup_name
FREEZE PARTITION 202103 20210301_20210301_1_1_0 test_01417_single_part_old_syntax
command_type partition_id part_name backup_name
UNFREEZE PARTITION 20210301 20210301_20210301_1_1_0 test_01417_single_part_old_syntax

View File

@ -13,6 +13,11 @@ ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS table_for_freeze;"
${CLICKHOUSE_CLIENT} --query "CREATE TABLE table_for_freeze (key UInt64, value String) ENGINE = MergeTree() ORDER BY key PARTITION BY key % 10;"
${CLICKHOUSE_CLIENT} --query "INSERT INTO table_for_freeze SELECT number, toString(number) from numbers(10);"
# also for old syntax
${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS table_for_freeze_old_syntax;"
${CLICKHOUSE_CLIENT} --query "CREATE TABLE table_for_freeze_old_syntax (dt Date, value String) ENGINE = MergeTree(dt, (value), 8192);"
${CLICKHOUSE_CLIENT} --query "INSERT INTO table_for_freeze_old_syntax SELECT toDate('2021-03-01'), toString(number) from numbers(10);"
${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze FREEZE WITH NAME 'test_01417' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \
| ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE" \
--query "SELECT command_type, partition_id, part_name, backup_name FROM table"
@ -35,5 +40,21 @@ ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze FREEZE PARTITION '7'
| ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE, $ATTACH_OUT_STRUCTURE" \
--query "SELECT command_type, partition_id, part_name, backup_name, old_part_name FROM table"
# Unfreeze partition
${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze UNFREEZE PARTITION '7' WITH NAME 'test_01417_single_part_7' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \
| ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE" \
--query "SELECT command_type, partition_id, part_name, backup_name FROM table"
# Freeze partition with old syntax
${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze_old_syntax FREEZE PARTITION '202103' WITH NAME 'test_01417_single_part_old_syntax' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \
| ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE" \
--query "SELECT command_type, partition_id, part_name, backup_name FROM table"
# Unfreeze partition with old syntax
${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze_old_syntax UNFREEZE PARTITION '202103' WITH NAME 'test_01417_single_part_old_syntax' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \
| ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE" \
--query "SELECT command_type, partition_id, part_name, backup_name FROM table"
# teardown
${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS table_for_freeze;"
${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS table_for_freeze_old_syntax;"

View File

@ -1,6 +1,6 @@
AlterQuery t1 (children 1)
ExpressionList (children 1)
AlterCommand 25 (children 1)
AlterCommand 27 (children 1)
Function equals (children 1)
ExpressionList (children 2)
Identifier date

View File

@ -31,10 +31,12 @@ Map(Date, Int32)
Map(UUID, UInt16)
{'00001192-0000-4000-8000-000000000001':1,'00001192-0000-4000-7000-000000000001':2}
0 2 1
Map(Int128, Int32)
Map(Int128, String)
{-1:'a',0:'b',1234567898765432193024000:'c',-1234567898765432193024000:'d'}
a b c d
a
b
a b c d
b b b b

View File

@ -11,7 +11,7 @@ SELECT 'Map(Int8, Int8)';
SELECT m FROM table_map_with_key_integer;
SELECT m[127], m[1], m[0], m[-1] FROM table_map_with_key_integer;
SELECT m[toInt8(number - 2)] FROM table_map_with_key_integer ARRAY JOIN range(5) AS number;
SELECT m[toInt8(number - 2)] FROM table_map_with_key_integer ARRAY JOIN [0, 1, 2, 3, 4] AS number;
SELECT count() FROM table_map_with_key_integer WHERE m = map();
@ -26,7 +26,7 @@ SELECT 'Map(Int32, UInt16)';
SELECT m FROM table_map_with_key_integer;
SELECT m[-1], m[2147483647], m[-2147483648] FROM table_map_with_key_integer;
SELECT m[toInt32(number - 2)] FROM table_map_with_key_integer ARRAY JOIN range(5) AS number;
SELECT m[toInt32(number - 2)] FROM table_map_with_key_integer ARRAY JOIN [0, 1, 2, 3, 4] AS number;
DROP TABLE IF EXISTS table_map_with_key_integer;
@ -39,7 +39,7 @@ SELECT 'Map(Date, Int32)';
SELECT m FROM table_map_with_key_integer;
SELECT m[toDate('2020-01-01')], m[toDate('2020-01-02')], m[toDate('2020-01-03')] FROM table_map_with_key_integer;
SELECT m[toDate(number)] FROM table_map_with_key_integer ARRAY JOIN range(3) AS number;
SELECT m[toDate(number)] FROM table_map_with_key_integer ARRAY JOIN [0, 1, 2] AS number;
DROP TABLE IF EXISTS table_map_with_key_integer;
@ -51,12 +51,14 @@ INSERT INTO table_map_with_key_integer VALUES ('2020-01-01', map('00001192-0000-
SELECT 'Map(UUID, UInt16)';
SELECT m FROM table_map_with_key_integer;
SELECT
m[toUUID('00001192-0000-4000-6000-000000000001')],
m[toUUID('00001192-0000-4000-7000-000000000001')],
SELECT
m[toUUID('00001192-0000-4000-6000-000000000001')],
m[toUUID('00001192-0000-4000-7000-000000000001')],
m[toUUID('00001192-0000-4000-8000-000000000001')]
FROM table_map_with_key_integer;
SELECT m[257], m[1] FROM table_map_with_key_integer; -- { serverError 43 }
DROP TABLE IF EXISTS table_map_with_key_integer;
CREATE TABLE table_map_with_key_integer (d DATE, m Map(Int128, String))
@ -65,11 +67,14 @@ ENGINE = MergeTree() ORDER BY d;
INSERT INTO table_map_with_key_integer SELECT '2020-01-01', map(-1, 'a', 0, 'b', toInt128(1234567898765432123456789), 'c', toInt128(-1234567898765432123456789), 'd');
SELECT 'Map(Int128, Int32)';
SELECT 'Map(Int128, String)';
SELECT m FROM table_map_with_key_integer;
SELECT m[toInt128(-1)], m[toInt128(0)], m[toInt128(1234567898765432123456789)], m[toInt128(-1234567898765432123456789)] FROM table_map_with_key_integer;
SELECT m[toInt128(number - 2)] FROM table_map_with_key_integer ARRAY JOIN range(4) AS number;
SELECT m[toInt128(number - 2)] FROM table_map_with_key_integer ARRAY JOIN [0, 1, 2, 3] AS number;
SELECT m[-1], m[0], m[toInt128(1234567898765432123456789)], m[toInt128(-1234567898765432123456789)] FROM table_map_with_key_integer;
SELECT m[toUInt64(0)], m[toInt64(0)], m[toUInt8(0)], m[toUInt16(0)] FROM table_map_with_key_integer;
DROP TABLE IF EXISTS table_map_with_key_integer;

View File

@ -66,7 +66,7 @@
</div>
<div class="row mb-3">
<div class="col w-100">
<a href="https://join.slack.com/t/clickhousedb/shared_invite/zt-ly9m4w1x-6j7x5Ts_pQZqrctAbRZ3cg"
<a href="https://join.slack.com/t/clickhousedb/shared_invite/zt-nwwakmk4-xOJ6cdy0sJC3It8j348~IA"
rel="external nofollow noreferrer" target="_blank" class="text-decoration-none">
<div class="bg-dark p-4">
<img data-src="/images/index/slack.svg"