Merge branch 'master' of github.com:ClickHouse/ClickHouse into poco-file-to-std-fs

This commit is contained in:
kssenii 2021-05-01 17:24:31 +03:00
commit ee06936596
179 changed files with 2256 additions and 1136 deletions

1
.gitignore vendored
View File

@ -27,6 +27,7 @@
/docs/zh/single.md
/docs/ja/single.md
/docs/fa/single.md
/docs/en/development/cmake-in-clickhouse.md
# callgrind files
callgrind.out.*

View File

@ -159,17 +159,12 @@ void IBridge::initialize(Application & self)
if (port > 0xFFFF)
throw Exception("Out of range 'http-port': " + std::to_string(port), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
http_timeout = config().getUInt("http-timeout", DEFAULT_HTTP_READ_BUFFER_TIMEOUT);
http_timeout = config().getUInt64("http-timeout", DEFAULT_HTTP_READ_BUFFER_TIMEOUT);
max_server_connections = config().getUInt("max-server-connections", 1024);
keep_alive_timeout = config().getUInt("keep-alive-timeout", 10);
keep_alive_timeout = config().getUInt64("keep-alive-timeout", 10);
initializeTerminationAndSignalProcessing();
#if USE_ODBC
if (bridgeName() == "ODBCBridge")
Poco::Data::ODBC::Connector::registerConnector();
#endif
ServerApplication::initialize(self); // NOLINT
}

View File

@ -25,6 +25,10 @@ uint64_t getThreadId()
current_tid = syscall(SYS_gettid); /// This call is always successful. - man gettid
#elif defined(OS_FREEBSD)
current_tid = pthread_getthreadid_np();
#elif defined(OS_SUNOS)
// On Solaris-derived systems, this returns the ID of the LWP, analogous
// to a thread.
current_tid = static_cast<uint64_t>(pthread_self());
#else
if (0 != pthread_threadid_np(nullptr, &current_tid))
throw std::logic_error("pthread_threadid_np returned error");

View File

@ -2,7 +2,7 @@
#include <time.h>
#if defined (OS_DARWIN)
#if defined (OS_DARWIN) || defined (OS_SUNOS)
# define CLOCK_MONOTONIC_COARSE CLOCK_MONOTONIC
#elif defined (OS_FREEBSD)
# define CLOCK_MONOTONIC_COARSE CLOCK_MONOTONIC_FAST

View File

@ -13,7 +13,12 @@ using char8_t = unsigned char;
#endif
/// This is needed for more strict aliasing. https://godbolt.org/z/xpJBSb https://stackoverflow.com/a/57453713
#if !defined(PVS_STUDIO) /// But PVS-Studio does not treat it correctly.
using UInt8 = char8_t;
#else
using UInt8 = uint8_t;
#endif
using UInt16 = uint16_t;
using UInt32 = uint32_t;
using UInt64 = uint64_t;

View File

@ -35,7 +35,7 @@ PEERDIR(
CFLAGS(-g0)
SRCS(
<? find . -name '*.cpp' | grep -v -F tests/ | grep -v -F Replxx | grep -v -F Readline | sed 's/^\.\// /' | sort ?>
<? find . -name '*.cpp' | grep -v -F tests/ | grep -v -F examples | grep -v -F Replxx | grep -v -F Readline | sed 's/^\.\// /' | sort ?>
)
END()

View File

@ -1643,22 +1643,22 @@ typedef setseq_base<pcg128_t, pcg128_t, xsl_rr_rr_mixin>
template <bitcount_t table_pow2, bitcount_t advance_pow2,
typename BaseRNG, bool kdd = true>
using ext_std8 = extended<table_pow2, advance_pow2, BaseRNG,
using ext_std8 = pcg_detail::extended<table_pow2, advance_pow2, BaseRNG,
oneseq_rxs_m_xs_8_8, kdd>;
template <bitcount_t table_pow2, bitcount_t advance_pow2,
typename BaseRNG, bool kdd = true>
using ext_std16 = extended<table_pow2, advance_pow2, BaseRNG,
using ext_std16 = pcg_detail::extended<table_pow2, advance_pow2, BaseRNG,
oneseq_rxs_m_xs_16_16, kdd>;
template <bitcount_t table_pow2, bitcount_t advance_pow2,
typename BaseRNG, bool kdd = true>
using ext_std32 = extended<table_pow2, advance_pow2, BaseRNG,
using ext_std32 = pcg_detail::extended<table_pow2, advance_pow2, BaseRNG,
oneseq_rxs_m_xs_32_32, kdd>;
template <bitcount_t table_pow2, bitcount_t advance_pow2,
typename BaseRNG, bool kdd = true>
using ext_std64 = extended<table_pow2, advance_pow2, BaseRNG,
using ext_std64 = pcg_detail::extended<table_pow2, advance_pow2, BaseRNG,
oneseq_rxs_m_xs_64_64, kdd>;

View File

@ -24,9 +24,9 @@ if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/llvm/llvm/CMakeLists.txt")
endif ()
if (NOT USE_INTERNAL_LLVM_LIBRARY)
set (LLVM_PATHS "/usr/local/lib/llvm")
set (LLVM_PATHS "/usr/local/lib/llvm" "/usr/lib/llvm")
foreach(llvm_v 10 9 8)
foreach(llvm_v 11.1 11)
if (NOT LLVM_FOUND)
find_package (LLVM ${llvm_v} CONFIG PATHS ${LLVM_PATHS})
endif ()
@ -102,7 +102,6 @@ LLVMRuntimeDyld
LLVMX86CodeGen
LLVMX86Desc
LLVMX86Info
LLVMX86Utils
LLVMAsmPrinter
LLVMDebugInfoDWARF
LLVMGlobalISel

View File

@ -40,7 +40,7 @@ if (SANITIZE)
# RelWithDebInfo, and downgrade optimizations to -O1 but not to -Og, to
# keep the binary size down.
# TODO: try compiling with -Og and with ld.gold.
set (MSAN_FLAGS "-fsanitize=memory -fsanitize-memory-track-origins -fno-optimize-sibling-calls -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/msan_suppressions.txt")
set (MSAN_FLAGS "-fsanitize=memory -fsanitize-memory-use-after-dtor -fsanitize-memory-track-origins -fno-optimize-sibling-calls -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/msan_suppressions.txt")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}")

View File

@ -12,6 +12,9 @@ elseif (CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin")
set (OS_DARWIN 1)
add_definitions(-D OS_DARWIN)
elseif (CMAKE_SYSTEM_NAME MATCHES "SunOS")
set (OS_SUNOS 1)
add_definitions(-D OS_SUNOS)
endif ()
if (CMAKE_CROSSCOMPILING)

2
contrib/boost vendored

@ -1 +1 @@
Subproject commit 9f0ff347e50429686604002d8ad1fd07515c4f31
Subproject commit 1ccbb5a522a571ce83b606dbc2e1011c42ecccfb

2
contrib/llvm vendored

@ -1 +1 @@
Subproject commit 8f24d507c1cfeec66d27f48fe74518fd278e2d25
Subproject commit cfaf365cf96918999d09d976ec736b4518cf5d02

View File

@ -3,10 +3,10 @@ compilers and build settings. Correctly configured Docker daemon is single depen
Usage:
Build deb package with `gcc-9` in `debug` mode:
Build deb package with `clang-11` in `debug` mode:
```
$ mkdir deb/test_output
$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=gcc-9 --build-type=debug
$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-11 --build-type=debug
$ ls -l deb/test_output
-rw-r--r-- 1 root root 3730 clickhouse-client_18.14.2+debug_all.deb
-rw-r--r-- 1 root root 84221888 clickhouse-common-static_18.14.2+debug_amd64.deb
@ -18,11 +18,11 @@ $ ls -l deb/test_output
```
Build ClickHouse binary with `clang-10` and `address` sanitizer in `relwithdebuginfo`
Build ClickHouse binary with `clang-11` and `address` sanitizer in `relwithdebuginfo`
mode:
```
$ mkdir $HOME/some_clickhouse
$ ./packager --output-dir=$HOME/some_clickhouse --package-type binary --compiler=clang-10 --sanitizer=address
$ ./packager --output-dir=$HOME/some_clickhouse --package-type binary --compiler=clang-11 --sanitizer=address
$ ls -l $HOME/some_clickhouse
-rwxr-xr-x 1 root root 787061952 clickhouse
lrwxrwxrwx 1 root root 10 clickhouse-benchmark -> clickhouse

View File

@ -37,23 +37,18 @@ RUN apt-get update \
bash \
build-essential \
ccache \
clang-10 \
clang-11 \
clang-tidy-10 \
clang-tidy-11 \
cmake \
curl \
g++-9 \
gcc-9 \
g++-10 \
gcc-10 \
gdb \
git \
gperf \
libicu-dev \
libreadline-dev \
lld-10 \
lld-11 \
llvm-10 \
llvm-10-dev \
llvm-11 \
llvm-11-dev \
moreutils \

View File

@ -35,22 +35,15 @@ RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb \
RUN apt-get update \
&& apt-get install \
alien \
clang-10 \
clang-11 \
clang-tidy-10 \
clang-tidy-11 \
cmake \
debhelper \
devscripts \
g++-9 \
gcc-9 \
gdb \
git \
gperf \
lld-10 \
lld-11 \
llvm-10 \
llvm-10-dev \
llvm-11 \
llvm-11-dev \
moreutils \
@ -68,7 +61,7 @@ RUN apt-get update \
RUN echo 'deb http://archive.ubuntu.com/ubuntu/ focal-proposed restricted main multiverse universe' > /etc/apt/sources.list.d/proposed-repositories.list
RUN apt-get update \
&& apt-get install gcc-10 g++-10 --yes --no-install-recommends
&& apt-get install gcc-10 g++-10 --yes
RUN rm /etc/apt/sources.list.d/proposed-repositories.list && apt-get update

View File

@ -181,9 +181,8 @@ if __name__ == "__main__":
parser.add_argument("--clickhouse-repo-path", default=os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir))
parser.add_argument("--output-dir", required=True)
parser.add_argument("--build-type", choices=("debug", ""), default="")
parser.add_argument("--compiler", choices=("clang-10", "clang-10-darwin", "clang-10-aarch64", "clang-10-freebsd",
"clang-11", "clang-11-darwin", "clang-11-aarch64", "clang-11-freebsd",
"gcc-9", "gcc-10"), default="gcc-9")
parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-aarch64", "clang-11-freebsd",
"gcc-10"), default="clang-11")
parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="")
parser.add_argument("--unbundled", action="store_true")
parser.add_argument("--split-binary", action="store_true")

View File

@ -64,6 +64,8 @@ RUN groupadd -r clickhouse --gid=101 \
clickhouse-client=$version \
clickhouse-server=$version ; \
fi \
&& wget --progress=bar:force:noscroll "https://github.com/tianon/gosu/releases/download/$gosu_ver/gosu-$(dpkg --print-architecture)" -O /bin/gosu \
&& chmod +x /bin/gosu \
&& clickhouse-local -q 'SELECT * FROM system.build_options' \
&& rm -rf \
/var/lib/apt/lists/* \
@ -76,8 +78,6 @@ RUN groupadd -r clickhouse --gid=101 \
# we need to allow "others" access to clickhouse folder, because docker container
# can be started with arbitrary uid (openshift usecase)
ADD https://github.com/tianon/gosu/releases/download/$gosu_ver/gosu-amd64 /bin/gosu
RUN locale-gen en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US:en
@ -88,10 +88,7 @@ RUN mkdir /docker-entrypoint-initdb.d
COPY docker_related_config.xml /etc/clickhouse-server/config.d/
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x \
/entrypoint.sh \
/bin/gosu
RUN chmod +x /entrypoint.sh
EXPOSE 9000 8123 9009
VOLUME /var/lib/clickhouse

View File

@ -51,13 +51,13 @@ RUN apt-get update \
# Sanitizer options for services (clickhouse-server)
RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7'" >> /etc/environment; \
echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment; \
echo "MSAN_OPTIONS='abort_on_error=1'" >> /etc/environment; \
echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'" >> /etc/environment; \
echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment; \
ln -s /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-symbolizer /usr/bin/llvm-symbolizer;
# Sanitizer options for current shell (not current, but the one that will be spawned on "docker run")
# (but w/o verbosity for TSAN, otherwise test.reference will not match)
ENV TSAN_OPTIONS='halt_on_error=1 history_size=7'
ENV UBSAN_OPTIONS='print_stacktrace=1'
ENV MSAN_OPTIONS='abort_on_error=1'
ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'
CMD sleep 1

View File

@ -73,9 +73,9 @@ The build requires the following components:
- Git (is used only to checkout the sources, its not needed for the build)
- CMake 3.10 or newer
- Ninja (recommended) or Make
- C++ compiler: gcc 10 or clang 8 or newer
- Linker: lld or gold (the classic GNU ld wont work)
- Ninja
- C++ compiler: clang-11 or newer
- Linker: lld
- Python (is only used inside LLVM build and it is optional)
If all the components are installed, you may build in the same way as the steps above.
@ -83,7 +83,7 @@ If all the components are installed, you may build in the same way as the steps
Example for Ubuntu Eoan:
``` bash
sudo apt update
sudo apt install git cmake ninja-build g++ python
sudo apt install git cmake ninja-build clang++ python
git clone --recursive https://github.com/ClickHouse/ClickHouse.git
mkdir build && cd build
cmake ../ClickHouse
@ -92,7 +92,7 @@ ninja
Example for OpenSUSE Tumbleweed:
``` bash
sudo zypper install git cmake ninja gcc-c++ python lld
sudo zypper install git cmake ninja clang-c++ python lld
git clone --recursive https://github.com/ClickHouse/ClickHouse.git
mkdir build && cd build
cmake ../ClickHouse
@ -102,7 +102,7 @@ ninja
Example for Fedora Rawhide:
``` bash
sudo yum update
yum --nogpg install git cmake make gcc-c++ python3
yum --nogpg install git cmake make clang-c++ python3
git clone --recursive https://github.com/ClickHouse/ClickHouse.git
mkdir build && cd build
cmake ../ClickHouse

View File

@ -1,288 +0,0 @@
# CMake in ClickHouse
## TL; DR How to make ClickHouse compile and link faster?
Developer only! This command will likely fulfill most of your needs. Run before calling `ninja`.
```cmake
cmake .. \
-DCMAKE_C_COMPILER=/bin/clang-10 \
-DCMAKE_CXX_COMPILER=/bin/clang++-10 \
-DCMAKE_BUILD_TYPE=Debug \
-DENABLE_CLICKHOUSE_ALL=OFF \
-DENABLE_CLICKHOUSE_SERVER=ON \
-DENABLE_CLICKHOUSE_CLIENT=ON \
-DUSE_STATIC_LIBRARIES=OFF \
-DSPLIT_SHARED_LIBRARIES=ON \
-DENABLE_LIBRARIES=OFF \
-DUSE_UNWIND=ON \
-DENABLE_UTILS=OFF \
-DENABLE_TESTS=OFF
```
## CMake files types
1. ClickHouse's source CMake files (located in the root directory and in `/src`).
2. Arch-dependent CMake files (located in `/cmake/*os_name*`).
3. Libraries finders (search for contrib libraries, located in `/cmake/find`).
3. Contrib build CMake files (used instead of libraries' own CMake files, located in `/cmake/modules`)
## List of CMake flags
* This list is auto-generated by [this Python script](https://github.com/clickhouse/clickhouse/blob/master/docs/tools/cmake_in_clickhouse_generator.py).
* The flag name is a link to its position in the code.
* If an option's default value is itself an option, it's also a link to its position in this list.
### ClickHouse modes
| Name | Default value | Description | Comment |
|------|---------------|-------------|---------|
| <a name="enable-clickhouse-all"></a>[`ENABLE_CLICKHOUSE_ALL`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L8) | `ON` | Enable all ClickHouse modes by default | The `clickhouse` binary is a multi purpose tool that contains multiple execution modes (client, server, etc.), each of them may be built and linked as a separate library. If you do not know what modes you need, turn this option OFF and enable SERVER and CLIENT only. |
| <a name="enable-clickhouse-benchmark"></a>[`ENABLE_CLICKHOUSE_BENCHMARK`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L18) | `ENABLE_CLICKHOUSE_ALL` | Queries benchmarking mode | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-benchmark/ |
| <a name="enable-clickhouse-client"></a>[`ENABLE_CLICKHOUSE_CLIENT`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L11) | `ENABLE_CLICKHOUSE_ALL` | Client mode (interactive tui/shell that connects to the server) | |
| <a name="enable-clickhouse-compressor"></a>[`ENABLE_CLICKHOUSE_COMPRESSOR`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L23) | `ENABLE_CLICKHOUSE_ALL` | Data compressor and decompressor | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-compressor/ |
| <a name="enable-clickhouse-copier"></a>[`ENABLE_CLICKHOUSE_COPIER`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L26) | `ENABLE_CLICKHOUSE_ALL` | Inter-cluster data copying mode | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-copier/ |
| <a name="enable-clickhouse-extract-from-config"></a>[`ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L20) | `ENABLE_CLICKHOUSE_ALL` | Configs processor (extract values etc.) | |
| <a name="enable-clickhouse-format"></a>[`ENABLE_CLICKHOUSE_FORMAT`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L28) | `ENABLE_CLICKHOUSE_ALL` | Queries pretty-printer and formatter with syntax highlighting | |
| <a name="enable-clickhouse-git-import"></a>[`ENABLE_CLICKHOUSE_GIT_IMPORT`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L47) | `ENABLE_CLICKHOUSE_ALL` | A tool to analyze Git repositories | https://presentations.clickhouse.tech/matemarketing_2020/ |
| <a name="enable-clickhouse-install"></a>[`ENABLE_CLICKHOUSE_INSTALL`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L51) | `OFF` | Install ClickHouse without .deb/.rpm/.tgz packages (having the binary only) | |
| <a name="enable-clickhouse-library-bridge"></a>[`ENABLE_CLICKHOUSE_LIBRARY_BRIDGE`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L43) | `ENABLE_CLICKHOUSE_ALL` | HTTP-server working like a proxy to Library dictionary source | |
| <a name="enable-clickhouse-local"></a>[`ENABLE_CLICKHOUSE_LOCAL`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L15) | `ENABLE_CLICKHOUSE_ALL` | Local files fast processing mode | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-local/ |
| <a name="enable-clickhouse-obfuscator"></a>[`ENABLE_CLICKHOUSE_OBFUSCATOR`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L32) | `ENABLE_CLICKHOUSE_ALL` | Table data obfuscator (convert real data to benchmark-ready one) | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-obfuscator/ |
| <a name="enable-clickhouse-odbc-bridge"></a>[`ENABLE_CLICKHOUSE_ODBC_BRIDGE`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L37) | `ENABLE_CLICKHOUSE_ALL` | HTTP-server working like a proxy to ODBC driver | |
| <a name="enable-clickhouse-server"></a>[`ENABLE_CLICKHOUSE_SERVER`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L10) | `ENABLE_CLICKHOUSE_ALL` | Server mode (main mode) | |
### External libraries
Note that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.
| Name | Default value | Description | Comment |
|------|---------------|-------------|---------|
| <a name="enable-amqpcpp"></a>[`ENABLE_AMQPCPP`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/amqpcpp.cmake#L6) | `ENABLE_LIBRARIES` | Enalbe AMQP-CPP | |
| <a name="enable-avro"></a>[`ENABLE_AVRO`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/avro.cmake#L2) | `ENABLE_LIBRARIES` | Enable Avro | Needed when using Apache Avro serialization format |
| <a name="enable-base"></a>[`ENABLE_BASE64`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/base64.cmake#L2) | `ENABLE_LIBRARIES` | Enable base64 | |
| <a name="enable-brotli"></a>[`ENABLE_BROTLI`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/brotli.cmake#L1) | `ENABLE_LIBRARIES` | Enable brotli | |
| <a name="enable-capnp"></a>[`ENABLE_CAPNP`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/capnp.cmake#L1) | `ENABLE_LIBRARIES` | Enable Cap'n Proto | |
| <a name="enable-cassandra"></a>[`ENABLE_CASSANDRA`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/cassandra.cmake#L6) | `ENABLE_LIBRARIES` | Enable Cassandra | |
| <a name="enable-ccache"></a>[`ENABLE_CCACHE`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ccache.cmake#L22) | `ENABLE_CCACHE_BY_DEFAULT` | Speedup re-compilations using ccache (external tool) | https://ccache.dev/ |
| <a name="enable-clang-tidy"></a>[`ENABLE_CLANG_TIDY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/analysis.cmake#L2) | `OFF` | Use clang-tidy static analyzer | https://clang.llvm.org/extra/clang-tidy/ |
| <a name="enable-curl"></a>[`ENABLE_CURL`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/curl.cmake#L1) | `ENABLE_LIBRARIES` | Enable curl | |
| <a name="enable-datasketches"></a>[`ENABLE_DATASKETCHES`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/datasketches.cmake#L1) | `ENABLE_LIBRARIES` | Enable DataSketches | |
| <a name="enable-embedded-compiler"></a>[`ENABLE_EMBEDDED_COMPILER`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/llvm.cmake#L5) | `ENABLE_LIBRARIES` | Set to TRUE to enable support for 'compile_expressions' option for query execution | |
| <a name="enable-fastops"></a>[`ENABLE_FASTOPS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/fastops.cmake#L2) | `ENABLE_LIBRARIES` | Enable fast vectorized mathematical functions library by Mikhail Parakhin | |
| <a name="enable-gperf"></a>[`ENABLE_GPERF`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/gperf.cmake#L5) | `ENABLE_LIBRARIES` | Use gperf function hash generator tool | |
| <a name="enable-grpc"></a>[`ENABLE_GRPC`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/grpc.cmake#L8) | `ENABLE_GRPC_DEFAULT` | Use gRPC | |
| <a name="enable-gsasl-library"></a>[`ENABLE_GSASL_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/libgsasl.cmake#L1) | `ENABLE_LIBRARIES` | Enable gsasl library | |
| <a name="enable-h"></a>[`ENABLE_H3`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/h3.cmake#L1) | `ENABLE_LIBRARIES` | Enable H3 | |
| <a name="enable-hdfs"></a>[`ENABLE_HDFS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/hdfs3.cmake#L2) | `ENABLE_LIBRARIES` | Enable HDFS | |
| <a name="enable-icu"></a>[`ENABLE_ICU`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/icu.cmake#L2) | `ENABLE_LIBRARIES` | Enable ICU | |
| <a name="enable-ldap"></a>[`ENABLE_LDAP`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ldap.cmake#L5) | `ENABLE_LIBRARIES` | Enable LDAP | |
| <a name="enable-libpqxx"></a>[`ENABLE_LIBPQXX`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/libpqxx.cmake#L1) | `ENABLE_LIBRARIES` | Enalbe libpqxx | |
| <a name="enable-msgpack"></a>[`ENABLE_MSGPACK`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/msgpack.cmake#L1) | `ENABLE_LIBRARIES` | Enable msgpack library | |
| <a name="enable-mysql"></a>[`ENABLE_MYSQL`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/mysqlclient.cmake#L2) | `ENABLE_LIBRARIES` | Enable MySQL | |
| <a name="enable-nuraft"></a>[`ENABLE_NURAFT`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/nuraft.cmake#L1) | `ENABLE_LIBRARIES` | Enable NuRaft | |
| <a name="enable-odbc"></a>[`ENABLE_ODBC`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/odbc.cmake#L1) | `ENABLE_LIBRARIES` | Enable ODBC library | |
| <a name="enable-orc"></a>[`ENABLE_ORC`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/orc.cmake#L1) | `ENABLE_LIBRARIES` | Enable ORC | |
| <a name="enable-parquet"></a>[`ENABLE_PARQUET`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/parquet.cmake#L2) | `ENABLE_LIBRARIES` | Enable parquet | |
| <a name="enable-protobuf"></a>[`ENABLE_PROTOBUF`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/protobuf.cmake#L1) | `ENABLE_LIBRARIES` | Enable protobuf | |
| <a name="enable-rapidjson"></a>[`ENABLE_RAPIDJSON`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rapidjson.cmake#L1) | `ENABLE_LIBRARIES` | Use rapidjson | |
| <a name="enable-rdkafka"></a>[`ENABLE_RDKAFKA`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rdkafka.cmake#L1) | `ENABLE_LIBRARIES` | Enable kafka | |
| <a name="enable-rocksdb"></a>[`ENABLE_ROCKSDB`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rocksdb.cmake#L5) | `ENABLE_LIBRARIES` | Enable ROCKSDB | |
| <a name="enable-s"></a>[`ENABLE_S3`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/s3.cmake#L2) | `ENABLE_LIBRARIES` | Enable S3 | |
| <a name="enable-ssl"></a>[`ENABLE_SSL`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ssl.cmake#L3) | `ENABLE_LIBRARIES` | Enable ssl | Needed when securely connecting to an external server, e.g. clickhouse-client --host ... --secure |
| <a name="enable-stats"></a>[`ENABLE_STATS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/stats.cmake#L1) | `ENABLE_LIBRARIES` | Enalbe StatsLib library | |
### External libraries system/bundled mode
| Name | Default value | Description | Comment |
|------|---------------|-------------|---------|
| <a name="use-internal-avro-library"></a>[`USE_INTERNAL_AVRO_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/avro.cmake#L11) | `ON` | Set to FALSE to use system avro library instead of bundled | |
| <a name="use-internal-aws-s-library"></a>[`USE_INTERNAL_AWS_S3_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/s3.cmake#L14) | `ON` | Set to FALSE to use system S3 instead of bundled (experimental set to OFF on your own risk) | |
| <a name="use-internal-brotli-library"></a>[`USE_INTERNAL_BROTLI_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/brotli.cmake#L12) | `USE_STATIC_LIBRARIES` | Set to FALSE to use system libbrotli library instead of bundled | Many system ship only dynamic brotly libraries, so we back off to bundled by default |
| <a name="use-internal-capnp-library"></a>[`USE_INTERNAL_CAPNP_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/capnp.cmake#L10) | `NOT_UNBUNDLED` | Set to FALSE to use system capnproto library instead of bundled | |
| <a name="use-internal-curl"></a>[`USE_INTERNAL_CURL`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/curl.cmake#L10) | `NOT_UNBUNDLED` | Use internal curl library | |
| <a name="use-internal-datasketches-library"></a>[`USE_INTERNAL_DATASKETCHES_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/datasketches.cmake#L5) | `NOT_UNBUNDLED` | Set to FALSE to use system DataSketches library instead of bundled | |
| <a name="use-internal-grpc-library"></a>[`USE_INTERNAL_GRPC_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/grpc.cmake#L25) | `NOT_UNBUNDLED` | Set to FALSE to use system gRPC library instead of bundled. (Experimental. Set to OFF on your own risk) | Normally we use the internal gRPC framework. You can set USE_INTERNAL_GRPC_LIBRARY to OFF to force using the external gRPC framework, which should be installed in the system in this case. The external gRPC framework can be installed in the system by running sudo apt-get install libgrpc++-dev protobuf-compiler-grpc |
| <a name="use-internal-gtest-library"></a>[`USE_INTERNAL_GTEST_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/gtest.cmake#L3) | `NOT_UNBUNDLED` | Set to FALSE to use system Google Test instead of bundled | |
| <a name="use-internal-h-library"></a>[`USE_INTERNAL_H3_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/h3.cmake#L9) | `ON` | Set to FALSE to use system h3 library instead of bundled | |
| <a name="use-internal-hdfs-library"></a>[`USE_INTERNAL_HDFS3_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/hdfs3.cmake#L14) | `ON` | Set to FALSE to use system HDFS3 instead of bundled (experimental - set to OFF on your own risk) | |
| <a name="use-internal-icu-library"></a>[`USE_INTERNAL_ICU_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/icu.cmake#L15) | `NOT_UNBUNDLED` | Set to FALSE to use system ICU library instead of bundled | |
| <a name="use-internal-ldap-library"></a>[`USE_INTERNAL_LDAP_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ldap.cmake#L14) | `NOT_UNBUNDLED` | Set to FALSE to use system *LDAP library instead of bundled | |
| <a name="use-internal-libcxx-library"></a>[`USE_INTERNAL_LIBCXX_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/cxx.cmake#L15) | `USE_INTERNAL_LIBCXX_LIBRARY_DEFAULT` | Disable to use system libcxx and libcxxabi libraries instead of bundled | |
| <a name="use-internal-libgsasl-library"></a>[`USE_INTERNAL_LIBGSASL_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/libgsasl.cmake#L12) | `USE_STATIC_LIBRARIES` | Set to FALSE to use system libgsasl library instead of bundled | when USE_STATIC_LIBRARIES we usually need to pick up hell a lot of dependencies for libgsasl |
| <a name="use-internal-libxml-library"></a>[`USE_INTERNAL_LIBXML2_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/libxml2.cmake#L1) | `NOT_UNBUNDLED` | Set to FALSE to use system libxml2 library instead of bundled | |
| <a name="use-internal-llvm-library"></a>[`USE_INTERNAL_LLVM_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/llvm.cmake#L8) | `NOT_UNBUNDLED` | Use bundled or system LLVM library. | |
| <a name="use-internal-msgpack-library"></a>[`USE_INTERNAL_MSGPACK_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/msgpack.cmake#L10) | `NOT_UNBUNDLED` | Set to FALSE to use system msgpack library instead of bundled | |
| <a name="use-internal-mysql-library"></a>[`USE_INTERNAL_MYSQL_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/mysqlclient.cmake#L15) | `NOT_UNBUNDLED` | Set to FALSE to use system mysqlclient library instead of bundled | |
| <a name="use-internal-odbc-library"></a>[`USE_INTERNAL_ODBC_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/odbc.cmake#L22) | `NOT_UNBUNDLED` | Use internal ODBC library | |
| <a name="use-internal-orc-library"></a>[`USE_INTERNAL_ORC_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/orc.cmake#L11) | `ON` | Set to FALSE to use system ORC instead of bundled (experimental set to OFF on your own risk) | |
| <a name="use-internal-parquet-library"></a>[`USE_INTERNAL_PARQUET_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/parquet.cmake#L16) | `NOT_UNBUNDLED` | Set to FALSE to use system parquet library instead of bundled | |
| <a name="use-internal-poco-library"></a>[`USE_INTERNAL_POCO_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/poco.cmake#L1) | `ON` | Use internal Poco library | |
| <a name="use-internal-protobuf-library"></a>[`USE_INTERNAL_PROTOBUF_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/protobuf.cmake#L14) | `NOT_UNBUNDLED` | Set to FALSE to use system protobuf instead of bundled. (Experimental. Set to OFF on your own risk) | Normally we use the internal protobuf library. You can set USE_INTERNAL_PROTOBUF_LIBRARY to OFF to force using the external protobuf library, which should be installed in the system in this case. The external protobuf library can be installed in the system by running sudo apt-get install libprotobuf-dev protobuf-compiler libprotoc-dev |
| <a name="use-internal-rapidjson-library"></a>[`USE_INTERNAL_RAPIDJSON_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rapidjson.cmake#L9) | `NOT_UNBUNDLED` | Set to FALSE to use system rapidjson library instead of bundled | |
| <a name="use-internal-rdkafka-library"></a>[`USE_INTERNAL_RDKAFKA_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rdkafka.cmake#L10) | `NOT_UNBUNDLED` | Set to FALSE to use system librdkafka instead of the bundled | |
| <a name="use-internal-re-library"></a>[`USE_INTERNAL_RE2_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/re2.cmake#L1) | `NOT_UNBUNDLED` | Set to FALSE to use system re2 library instead of bundled [slower] | |
| <a name="use-internal-rocksdb-library"></a>[`USE_INTERNAL_ROCKSDB_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rocksdb.cmake#L14) | `NOT_UNBUNDLED` | Set to FALSE to use system ROCKSDB library instead of bundled | |
| <a name="use-internal-snappy-library"></a>[`USE_INTERNAL_SNAPPY_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/snappy.cmake#L10) | `NOT_UNBUNDLED` | Set to FALSE to use system snappy library instead of bundled | |
| <a name="use-internal-sparsehash-library"></a>[`USE_INTERNAL_SPARSEHASH_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/sparsehash.cmake#L1) | `ON` | Set to FALSE to use system sparsehash library instead of bundled | |
| <a name="use-internal-ssl-library"></a>[`USE_INTERNAL_SSL_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ssl.cmake#L12) | `NOT_UNBUNDLED` | Set to FALSE to use system *ssl library instead of bundled | |
| <a name="use-internal-xz-library"></a>[`USE_INTERNAL_XZ_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/xz.cmake#L1) | `NOT_UNBUNDLED` | Set to OFF to use system xz (lzma) library instead of bundled | |
| <a name="use-internal-zlib-library"></a>[`USE_INTERNAL_ZLIB_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/zlib.cmake#L1) | `NOT_UNBUNDLED` | Set to FALSE to use system zlib library instead of bundled | |
| <a name="use-internal-zstd-library"></a>[`USE_INTERNAL_ZSTD_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/zstd.cmake#L1) | `NOT_UNBUNDLED` | Set to FALSE to use system zstd library instead of bundled | |
### Other flags
| Name | Default value | Description | Comment |
|------|---------------|-------------|---------|
| <a name="add-gdb-index-for-gold"></a>[`ADD_GDB_INDEX_FOR_GOLD`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L209) | `OFF` | Add .gdb-index to resulting binaries for gold linker. | Ignored if `lld` is used |
| <a name="arch-native"></a>[`ARCH_NATIVE`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L260) | `OFF` | Add -march=native compiler flag | |
| <a name="clickhouse-split-binary"></a>[`CLICKHOUSE_SPLIT_BINARY`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L113) | `OFF` | Make several binaries (clickhouse-server, clickhouse-client etc.) instead of one bundled | |
| <a name="compiler-pipe"></a>[`COMPILER_PIPE`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L249) | `ON` | -pipe compiler option | Less `/tmp` usage, more RAM usage. |
| <a name="enable-check-heavy-builds"></a>[`ENABLE_CHECK_HEAVY_BUILDS`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L80) | `OFF` | Don't allow C++ translation units to compile too long or to take too much memory while compiling. | Take care to add prlimit in command line before ccache, or else ccache thinks that prlimit is compiler, and clang++ is its input file, and refuses to work with multiple inputs, e.g in ccache log: [2021-03-31T18:06:32.655327 36900] Command line: /usr/bin/ccache prlimit --as=10000000000 --data=5000000000 --cpu=600 /usr/bin/clang++-11 - ...... std=gnu++2a -MD -MT src/CMakeFiles/dbms.dir/Storages/MergeTree/IMergeTreeDataPart.cpp.o -MF src/CMakeFiles/dbms.dir/Storages/MergeTree/IMergeTreeDataPart.cpp.o.d -o src/CMakeFiles/dbms.dir/Storages/MergeTree/IMergeTreeDataPart.cpp.o -c ../src/Storages/MergeTree/IMergeTreeDataPart.cpp [2021-03-31T18:06:32.656704 36900] Multiple input files: /usr/bin/clang++-11 and ../src/Storages/MergeTree/IMergeTreeDataPart.cpp Another way would be to use --ccache-skip option before clang++-11 to make ccache ignore it. |
| <a name="enable-fuzzing"></a>[`ENABLE_FUZZING`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L130) | `OFF` | Fuzzy testing using libfuzzer | Implies `WITH_COVERAGE` |
| <a name="enable-libraries"></a>[`ENABLE_LIBRARIES`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L383) | `ON` | Enable all external libraries by default | Turns on all external libs like s3, kafka, ODBC, ... |
| <a name="enable-multitarget-code"></a>[`ENABLE_MULTITARGET_CODE`](https://github.com/clickhouse/clickhouse/blob/master/src/Functions/CMakeLists.txt#L102) | `ON` | Enable platform-dependent code | ClickHouse developers may use platform-dependent code under some macro (e.g. `ifdef ENABLE_MULTITARGET`). If turned ON, this option defines such macro. See `src/Functions/TargetSpecific.h` |
| <a name="enable-tests"></a>[`ENABLE_TESTS`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L169) | `ON` | Provide unit_test_dbms target with Google.Test unit tests | If turned `ON`, assumes the user has either the system GTest library or the bundled one. |
| <a name="enable-thinlto"></a>[`ENABLE_THINLTO`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L339) | `ON` | Clang-specific link time optimization | https://clang.llvm.org/docs/ThinLTO.html Applies to clang only. Disabled when building with tests or sanitizers. |
| <a name="fail-on-unsupported-options-combination"></a>[`FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATION`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L32) | `ON` | Stop/Fail CMake configuration if some ENABLE_XXX option is defined (either ON or OFF) but is not possible to satisfy | If turned off: e.g. when ENABLE_FOO is ON, but FOO tool was not found, the CMake will continue. |
| <a name="glibc-compatibility"></a>[`GLIBC_COMPATIBILITY`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L173) | `ON` | Enable compatibility with older glibc libraries. | Only for Linux, x86_64. |
| <a name="linker-name"></a>[`LINKER_NAME`](https://github.com/clickhouse/clickhouse/blob/master/cmake/tools.cmake#L44) | `OFF` | Linker name or full path | Example values: `lld-10`, `gold`. |
| <a name="llvm-has-rtti"></a>[`LLVM_HAS_RTTI`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/llvm.cmake#L40) | `ON` | Enable if LLVM was build with RTTI enabled | |
| <a name="make-static-libraries"></a>[`MAKE_STATIC_LIBRARIES`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L106) | `USE_STATIC_LIBRARIES` | Disable to make shared libraries | |
| <a name="parallel-compile-jobs"></a>[`PARALLEL_COMPILE_JOBS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/limit_jobs.cmake#L10) | `""` | Maximum number of concurrent compilation jobs | 1 if not set |
| <a name="parallel-link-jobs"></a>[`PARALLEL_LINK_JOBS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/limit_jobs.cmake#L13) | `""` | Maximum number of concurrent link jobs | 1 if not set |
| <a name="sanitize"></a>[`SANITIZE`](https://github.com/clickhouse/clickhouse/blob/master/cmake/sanitize.cmake#L7) | `""` | Enable one of the code sanitizers | Possible values: - `address` (ASan) - `memory` (MSan) - `thread` (TSan) - `undefined` (UBSan) - "" (no sanitizing) |
| <a name="split-shared-libraries"></a>[`SPLIT_SHARED_LIBRARIES`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L111) | `OFF` | Keep all internal libraries as separate .so files | DEVELOPER ONLY. Faster linking if turned on. |
| <a name="strip-debug-symbols-functions"></a>[`STRIP_DEBUG_SYMBOLS_FUNCTIONS`](https://github.com/clickhouse/clickhouse/blob/master/src/Functions/CMakeLists.txt#L51) | `STRIP_DSF_DEFAULT` | Do not generate debugger info for ClickHouse functions | Provides faster linking and lower binary size. Tradeoff is the inability to debug some source files with e.g. gdb (empty stack frames and no local variables)." |
| <a name="unbundled"></a>[`UNBUNDLED`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L389) | `OFF` | Use system libraries instead of ones in contrib/ | We recommend avoiding this mode for production builds because we can't guarantee all needed libraries exist in your system. This mode exists for enthusiastic developers who are searching for trouble. Useful for maintainers of OS packages. |
| <a name="use-include-what-you-use"></a>[`USE_INCLUDE_WHAT_YOU_USE`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L444) | `OFF` | Automatically reduce unneeded includes in source code (external tool) | https://github.com/include-what-you-use/include-what-you-use |
| <a name="use-libcxx"></a>[`USE_LIBCXX`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/cxx.cmake#L1) | `NOT_UNBUNDLED` | Use libc++ and libc++abi instead of libstdc++ | |
| <a name="use-sentry"></a>[`USE_SENTRY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/sentry.cmake#L13) | `ENABLE_LIBRARIES` | Use Sentry | |
| <a name="use-simdjson"></a>[`USE_SIMDJSON`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/simdjson.cmake#L1) | `ENABLE_LIBRARIES` | Use simdjson | |
| <a name="use-snappy"></a>[`USE_SNAPPY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/snappy.cmake#L1) | `ENABLE_LIBRARIES` | Enable snappy library | |
| <a name="use-static-libraries"></a>[`USE_STATIC_LIBRARIES`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L105) | `ON` | Disable to use shared libraries | |
| <a name="use-unwind"></a>[`USE_UNWIND`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/unwind.cmake#L1) | `ENABLE_LIBRARIES` | Enable libunwind (better stacktraces) | |
| <a name="werror"></a>[`WERROR`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L399) | `OFF` | Enable -Werror compiler option | Using system libs can cause a lot of warnings in includes (on macro expansion). |
| <a name="weverything"></a>[`WEVERYTHING`](https://github.com/clickhouse/clickhouse/blob/master/cmake/warnings.cmake#L17) | `ON` | Enable -Weverything option with some exceptions. | Add some warnings that are not available even with -Wall -Wextra -Wpedantic. Intended for exploration of new compiler warnings that may be found useful. Applies to clang only |
| <a name="with-coverage"></a>[`WITH_COVERAGE`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L300) | `OFF` | Profile the resulting binary/binaries | Compiler-specific coverage flags e.g. -fcoverage-mapping for gcc |
## Developer's guide for adding new CMake options
### Don't be obvious. Be informative.
Bad:
```cmake
option (ENABLE_TESTS "Enables testing" OFF)
```
This description is quite useless as is neither gives the viewer any additional information nor explains the option purpose.
Better:
```cmake
option(ENABLE_TESTS "Provide unit_test_dbms target with Google.test unit tests" OFF)
```
If the option's purpose can't be guessed by its name, or the purpose guess may be misleading, or option has some
pre-conditions, leave a comment above the `option()` line and explain what it does.
The best way would be linking the docs page (if it exists).
The comment is parsed into a separate column (see below).
Even better:
```cmake
# implies ${TESTS_ARE_ENABLED}
# see tests/CMakeLists.txt for implementation detail.
option(ENABLE_TESTS "Provide unit_test_dbms target with Google.test unit tests" OFF)
```
### If the option's state could produce unwanted (or unusual) result, explicitly warn the user.
Suppose you have an option that may strip debug symbols from the ClickHouse's part.
This can speed up the linking process, but produces a binary that cannot be debugged.
In that case, prefer explicitly raising a warning telling the developer that he may be doing something wrong.
Also, such options should be disabled if applies.
Bad:
```cmake
option(STRIP_DEBUG_SYMBOLS_FUNCTIONS
"Do not generate debugger info for ClickHouse functions.
${STRIP_DSF_DEFAULT})
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
target_compile_options(clickhouse_functions PRIVATE "-g0")
endif()
```
Better:
```cmake
# Provides faster linking and lower binary size.
# Tradeoff is the inability to debug some source files with e.g. gdb
# (empty stack frames and no local variables)."
option(STRIP_DEBUG_SYMBOLS_FUNCTIONS
"Do not generate debugger info for ClickHouse functions."
${STRIP_DSF_DEFAULT})
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
message(WARNING "Not generating debugger info for ClickHouse functions")
target_compile_options(clickhouse_functions PRIVATE "-g0")
endif()
```
### In the option's description, explain WHAT the option does rather than WHY it does something.
The WHY explanation should be placed in the comment.
You may find that the option's name is self-descriptive.
Bad:
```cmake
option(ENABLE_THINLTO "Enable Thin LTO. Only applicable for clang. It's also suppressed when building with tests or sanitizers." ON)
```
Better:
```cmake
# Only applicable for clang.
# Turned off when building with tests or sanitizers.
option(ENABLE_THINLTO "Clang-specific link time optimisation" ON).
```
### Don't assume other developers know as much as you do.
In ClickHouse, there are many tools used that an ordinary developer may not know. If you are in doubt, give a link to
the tool's docs. It won't take much of your time.
Bad:
```cmake
option(ENABLE_THINLTO "Enable Thin LTO. Only applicable for clang. It's also suppressed when building with tests or sanitizers." ON)
```
Better (combined with the above hint):
```cmake
# https://clang.llvm.org/docs/ThinLTO.html
# Only applicable for clang.
# Turned off when building with tests or sanitizers.
option(ENABLE_THINLTO "Clang-specific link time optimisation" ON).
```
Other example, bad:
```cmake
option (USE_INCLUDE_WHAT_YOU_USE "Use 'include-what-you-use' tool" OFF)
```
Better:
```cmake
# https://github.com/include-what-you-use/include-what-you-use
option (USE_INCLUDE_WHAT_YOU_USE "Reduce unneeded #include s (external tool)" OFF)
```
### Prefer consistent default values.
CMake allows you to pass a plethora of values representing boolean `true/false`, e.g. `1, ON, YES, ...`.
Prefer the `ON/OFF` values, if possible.

View File

@ -102,7 +102,9 @@ For non-Linux operating systems and for AArch64 CPU arhitecture, ClickHouse buil
- [FreeBSD](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse`
- [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse`
After downloading, you can use the `clickhouse client` to connect to the server, or `clickhouse local` to process local data. To run `clickhouse server`, you have to additionally download [server](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.xml) and [users](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/users.xml) configuration files from GitHub.
After downloading, you can use the `clickhouse client` to connect to the server, or `clickhouse local` to process local data.
Run `sudo ./clickhouse install` if you want to install clickhouse system-wide (also with needed condiguration files, configuring users etc.). After that run `clickhouse start` commands to start the clickhouse-server and `clickhouse-client` to connect to it.
These builds are not recommended for use in production environments because they are less thoroughly tested, but you can do so on your own risk. They also have only a subset of ClickHouse features available.

View File

@ -2864,6 +2864,50 @@ Possible values:
Default value: `0`.
## prefer_column_name_to_alias {#prefer-column-name-to-alias}
Enables or disables using the original column names instead of aliases in query expressions and clauses. It especially matters when alias is the same as the column name, see [Expression Aliases](../../sql-reference/syntax.md#notes-on-usage). Enable this setting to make aliases syntax rules in ClickHouse more compatible with most other database engines.
Possible values:
- 0 — The column name is substituted with the alias.
- 1 — The column name is not substituted with the alias.
Default value: `0`.
**Example**
The difference between enabled and disabled:
Query:
```sql
SET prefer_column_name_to_alias = 0;
SELECT avg(number) AS number, max(number) FROM numbers(10);
```
Result:
```text
Received exception from server (version 21.5.1):
Code: 184. DB::Exception: Received from localhost:9000. DB::Exception: Aggregate function avg(number) is found inside another aggregate function in query: While processing avg(number) AS number.
```
Query:
```sql
SET prefer_column_name_to_alias = 1;
SELECT avg(number) AS number, max(number) FROM numbers(10);
```
Result:
```text
┌─number─┬─max(number)─┐
│ 4.5 │ 9 │
└────────┴─────────────┘
```
## limit {#limit}
Sets the maximum number of rows to get from the query result. It adjusts the value set by the [LIMIT](../../sql-reference/statements/select/limit.md#limit-clause) clause, so that the limit, specified in the query, cannot exceed the limit, set by this setting.

View File

@ -171,7 +171,7 @@ Received exception from server (version 18.14.17):
Code: 184. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: Aggregate function sum(b) is found inside another aggregate function in query.
```
In this example, we declared table `t` with column `b`. Then, when selecting data, we defined the `sum(b) AS b` alias. As aliases are global, ClickHouse substituted the literal `b` in the expression `argMax(a, b)` with the expression `sum(b)`. This substitution caused the exception.
In this example, we declared table `t` with column `b`. Then, when selecting data, we defined the `sum(b) AS b` alias. As aliases are global, ClickHouse substituted the literal `b` in the expression `argMax(a, b)` with the expression `sum(b)`. This substitution caused the exception. You can change this default behavior by setting [prefer_column_name_to_alias](../operations/settings/settings.md#prefer_column_name_to_alias) to `1`.
## Asterisk {#asterisk}

View File

@ -2755,6 +2755,50 @@ SELECT * FROM test2;
Значение по умолчанию: `0`.
## prefer_column_name_to_alias {#prefer-column-name-to-alias}
Включает или отключает замену названий столбцов на синонимы в выражениях и секциях запросов, см. [Примечания по использованию синонимов](../../sql-reference/syntax.md#syntax-expression_aliases). Включите эту настройку, чтобы синтаксис синонимов в ClickHouse был более совместим с большинством других СУБД.
Возможные значения:
- 0 — синоним подставляется вместо имени столбца.
- 1 — синоним не подставляется вместо имени столбца.
Значение по умолчанию: `0`.
**Пример**
Какие изменения привносит включение и выключение настройки:
Запрос:
```sql
SET prefer_column_name_to_alias = 0;
SELECT avg(number) AS number, max(number) FROM numbers(10);
```
Результат:
```text
Received exception from server (version 21.5.1):
Code: 184. DB::Exception: Received from localhost:9000. DB::Exception: Aggregate function avg(number) is found inside another aggregate function in query: While processing avg(number) AS number.
```
Запрос:
```sql
SET prefer_column_name_to_alias = 1;
SELECT avg(number) AS number, max(number) FROM numbers(10);
```
Результат:
```text
┌─number─┬─max(number)─┐
│ 4.5 │ 9 │
└────────┴─────────────┘
```
## limit {#limit}
Устанавливает максимальное количество строк, возвращаемых запросом. Ограничивает сверху значение, установленное в запросе в секции [LIMIT](../../sql-reference/statements/select/limit.md#limit-clause).

View File

@ -128,7 +128,7 @@ expr AS alias
Например, `SELECT table_name_alias.column_name FROM table_name table_name_alias`.
В функции [CAST](sql_reference/syntax.md#type_conversion_function-cast), ключевое слово `AS` имеет другое значение. Смотрите описание функции.
В функции [CAST](../sql_reference/syntax.md#type_conversion_function-cast), ключевое слово `AS` имеет другое значение. Смотрите описание функции.
- `expr` — любое выражение, которое поддерживает ClickHouse.
@ -138,7 +138,7 @@ expr AS alias
Например, `SELECT "table t".column_name FROM table_name AS "table t"`.
### Примечания по использованию {#primechaniia-po-ispolzovaniiu}
### Примечания по использованию {#notes-on-usage}
Синонимы являются глобальными для запроса или подзапроса, и вы можете определить синоним в любой части запроса для любого выражения. Например, `SELECT (1 AS n) + 2, n`.
@ -169,9 +169,9 @@ Received exception from server (version 18.14.17):
Code: 184. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: Aggregate function sum(b) is found inside another aggregate function in query.
```
В этом примере мы объявили таблицу `t` со столбцом `b`. Затем, при выборе данных, мы определили синоним `sum(b) AS b`. Поскольку синонимы глобальные, то ClickHouse заменил литерал `b` в выражении `argMax(a, b)` выражением `sum(b)`. Эта замена вызвала исключение.
В этом примере мы объявили таблицу `t` со столбцом `b`. Затем, при выборе данных, мы определили синоним `sum(b) AS b`. Поскольку синонимы глобальные, то ClickHouse заменил литерал `b` в выражении `argMax(a, b)` выражением `sum(b)`. Эта замена вызвала исключение. Можно изменить это поведение, включив настройку [prefer_column_name_to_alias](../operations/settings/settings.md#prefer_column_name_to_alias), для этого нужно установить ее в значение `1`.
## Звёздочка {#zviozdochka}
## Звёздочка {#asterisk}
В запросе `SELECT`, вместо выражения может стоять звёздочка. Подробнее смотрите раздел «SELECT».
@ -180,4 +180,3 @@ Code: 184. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception
Выражение представляет собой функцию, идентификатор, литерал, применение оператора, выражение в скобках, подзапрос, звёздочку. А также может содержать синоним.
Список выражений - одно выражение или несколько выражений через запятую.
Функции и операторы, в свою очередь, в качестве аргументов, могут иметь произвольные выражения.

View File

@ -1,13 +1,30 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
---
# system.functions {#system-functions}
# 系统。功能 {#system-functions}
包含有关正常函数和聚合函数的信息。
包含有关常规函数和聚合函数的信息。
列:
- `name`(`String`) The name of the function.
- `is_aggregate`(`UInt8`) — Whether the function is aggregate.
**举例**
```
SELECT * FROM system.functions LIMIT 10;
```
```
┌─name─────────────────────┬─is_aggregate─┬─case_insensitive─┬─alias_to─┐
│ sumburConsistentHash │ 0 │ 0 │ │
│ yandexConsistentHash │ 0 │ 0 │ │
│ demangle │ 0 │ 0 │ │
│ addressToLine │ 0 │ 0 │ │
│ JSONExtractRaw │ 0 │ 0 │ │
│ JSONExtractKeysAndValues │ 0 │ 0 │ │
│ JSONExtract │ 0 │ 0 │ │
│ JSONExtractString │ 0 │ 0 │ │
│ JSONExtractFloat │ 0 │ 0 │ │
│ JSONExtractInt │ 0 │ 0 │ │
└──────────────────────────┴──────────────┴──────────────────┴──────────┘
10 rows in set. Elapsed: 0.002 sec.
```

View File

@ -606,7 +606,7 @@ SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res;
如果要获取数组中唯一项的列表可以使用arrayReducegroupUniqArrayarr
## arryjoin(arr) {#array-functions-join}
## arrayJoin(arr) {#array-functions-join}
一个特殊的功能。请参见[«ArrayJoin函数»](array-join.md#functions_arrayjoin)部分。

View File

@ -43,15 +43,15 @@ ClickHouse中提供的其他联接类型:
Also the behavior of ClickHouse server for `ANY JOIN` operations depends on the [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys) setting.
### ASOF加入使用 {#asof-join-usage}
### ASOF JOIN使用 {#asof-join-usage}
`ASOF JOIN` 当您需要连接没有完全匹配的记录时非常有用。
算法需要表中的特殊列。 本专栏:
该算法需要表中的特殊列。 该列需要满足:
- 必须包含有序序列。
- 可以是以下类型之一: [Int*UInt*](../../../sql-reference/data-types/int-uint.md), [浮动\*](../../../sql-reference/data-types/float.md), [日期](../../../sql-reference/data-types/date.md), [日期时间](../../../sql-reference/data-types/datetime.md), [十进制\*](../../../sql-reference/data-types/decimal.md).
- 不能是唯一的列 `JOIN`
- 可以是以下类型之一: [Int*UInt*](../../../sql-reference/data-types/int-uint.md), [Float\*](../../../sql-reference/data-types/float.md), [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md), [Decimal\*](../../../sql-reference/data-types/decimal.md).
- 不能是`JOIN`子句中唯一的列
语法 `ASOF JOIN ... ON`:
@ -62,9 +62,9 @@ ASOF LEFT JOIN table_2
ON equi_cond AND closest_match_cond
```
您可以使用任意数量的相等条件和恰好一个最接近的匹配条件。 例如, `SELECT count() FROM table_1 ASOF LEFT JOIN table_2 ON table_1.a == table_2.b AND table_2.t <= table_1.t`.
您可以使用任意数量的相等条件和一个且只有一个最接近的匹配条件。 例如, `SELECT count() FROM table_1 ASOF LEFT JOIN table_2 ON table_1.a == table_2.b AND table_2.t <= table_1.t`.
支持最接近匹配的条件: `>`, `>=`, `<`, `<=`.
支持最接近匹配的运算符: `>`, `>=`, `<`, `<=`.
语法 `ASOF JOIN ... USING`:
@ -75,9 +75,9 @@ ASOF JOIN table_2
USING (equi_column1, ... equi_columnN, asof_column)
```
`ASOF JOIN` 用途 `equi_columnX` 对于加入平等和 `asof_column` 用于加入与最接近的比赛 `table_1.asof_column >= table_2.asof_column` 条件。 该 `asof_column` 列总是在最后一个 `USING` 条款
`table_1.asof_column >= table_2.asof_column` 中, `ASOF JOIN` 使用 `equi_columnX` 来进行条件匹配, `asof_column` 用于JOIN最接近匹配。 `asof_column` 列总是在最后一个 `USING` 条件中。
例如,请考虑下表:
例如,参考下表:
table_1 table_2
event | ev_time | user_id event | ev_time | user_id
@ -88,10 +88,10 @@ USING (equi_column1, ... equi_columnN, asof_column)
event_1_2 | 13:00 | 42 event_2_3 | 13:00 | 42
... ...
`ASOF JOIN` 可以从用户事件的时间戳 `table_1` 并找到一个事件 `table_2` 其中时间戳最接近事件的时间戳 `table_1` 对应于最接近的匹配条件。 如果可用,则相等的时间戳值是最接近的值。 在这里,该 `user_id` 列可用于连接相等和 `ev_time` 列可用于在最接近的匹配加入。 在我们的例子中, `event_1_1` 可以加入 `event_2_1``event_1_2` 可以加入 `event_2_3`,但是 `event_2_2` 不能加入
`ASOF JOIN`会从 `table_2` 中的用户事件时间戳找出和 `table_1` 中用户事件时间戳中最近的一个时间戳,来满足最接近匹配的条件。如果有得话,则相等的时间戳值是最接近的值。在此例中,`user_id` 列可用于条件匹配,`ev_time` 列可用于最接近匹配。在此例中,`event_1_1` 可以 JOIN `event_2_1``event_1_2` 可以JOIN `event_2_3`,但是 `event_2_2` 不能被JOIN
!!! note "注"
`ASOF` 加入是 **不** 支持在 [加入我们](../../../engines/table-engines/special/join.md) 表引擎。
`ASOF JOIN`在 [JOIN](../../../engines/table-engines/special/join.md) 表引擎**不受** 支持
## 分布式联接 {#global-join}

View File

@ -2252,30 +2252,27 @@ private:
return;
processed_rows += block.rows();
/// Even if all blocks are empty, we still need to initialize the output stream to write empty resultset.
initBlockOutputStream(block);
/// The header block containing zero rows was used to initialize
/// block_out_stream, do not output it.
/// Also do not output too much data if we're fuzzing.
if (block.rows() != 0
&& (query_fuzzer_runs == 0 || processed_rows < 100))
{
block_out_stream->write(block);
written_first_block = true;
}
if (block.rows() == 0 || (query_fuzzer_runs != 0 && processed_rows >= 100))
return;
bool clear_progress = false;
if (need_render_progress)
clear_progress = std_out.offset() > 0;
if (clear_progress)
clearProgress();
block_out_stream->write(block);
written_first_block = true;
/// Received data block is immediately displayed to the user.
block_out_stream->flush();
/// Restore progress bar after data block.
if (clear_progress)
if (need_render_progress)
writeProgress();
}

View File

@ -844,8 +844,8 @@ namespace
fmt::print("The pidof command returned unusual output.\n");
}
WriteBufferFromFileDescriptor stderr(STDERR_FILENO);
copyData(sh->err, stderr);
WriteBufferFromFileDescriptor std_err(STDERR_FILENO);
copyData(sh->err, std_err);
sh->tryWait();
}

View File

@ -133,6 +133,7 @@ enum class AccessType
M(SYSTEM_RELOAD_MODEL, "SYSTEM RELOAD MODELS, RELOAD MODEL, RELOAD MODELS", GLOBAL, SYSTEM_RELOAD) \
M(SYSTEM_RELOAD_EMBEDDED_DICTIONARIES, "RELOAD EMBEDDED DICTIONARIES", GLOBAL, SYSTEM_RELOAD) /* implicitly enabled by the grant SYSTEM_RELOAD_DICTIONARY ON *.* */\
M(SYSTEM_RELOAD, "", GROUP, SYSTEM) \
M(SYSTEM_RESTART_DISK, "SYSTEM RESTART DISK", GLOBAL, SYSTEM) \
M(SYSTEM_MERGES, "SYSTEM STOP MERGES, SYSTEM START MERGES, STOP_MERGES, START MERGES", TABLE, SYSTEM) \
M(SYSTEM_TTL_MERGES, "SYSTEM STOP TTL MERGES, SYSTEM START TTL MERGES, STOP TTL MERGES, START TTL MERGES", TABLE, SYSTEM) \
M(SYSTEM_FETCHES, "SYSTEM STOP FETCHES, SYSTEM START FETCHES, STOP FETCHES, START FETCHES", TABLE, SYSTEM) \

View File

@ -8,7 +8,7 @@ PEERDIR(
SRCS(
<? find . -name '*.cpp' | sed 's/^\.\// /' | sort ?>
<? find . -name '*.cpp' | grep -v -F examples | sed 's/^\.\// /' | sort ?>
)
END()

View File

@ -8,7 +8,7 @@ PEERDIR(
SRCS(
<? find . -name '*.cpp' | grep -v -F tests | grep -v -F GroupBitmap | sed 's/^\.\// /' | sort ?>
<? find . -name '*.cpp' | grep -v -F tests | grep -v -F examples | grep -v -F GroupBitmap | sed 's/^\.\// /' | sort ?>
)
END()

View File

@ -55,7 +55,7 @@ protected:
virtual Poco::Logger * getLog() const = 0;
virtual const Poco::Timespan & getHTTPTimeout() const = 0;
virtual Poco::Timespan getHTTPTimeout() const = 0;
virtual Poco::URI createBaseURI() const = 0;

View File

@ -28,7 +28,7 @@ LibraryBridgeHelper::LibraryBridgeHelper(
, log(&Poco::Logger::get("LibraryBridgeHelper"))
, sample_block(sample_block_)
, config(context_->getConfigRef())
, http_timeout(context_->getSettingsRef().http_receive_timeout.value.totalSeconds())
, http_timeout(context_->getSettingsRef().http_receive_timeout.value)
, dictionary_id(dictionary_id_)
{
bridge_port = config.getUInt("library_bridge.port", DEFAULT_PORT);

View File

@ -57,7 +57,7 @@ protected:
Poco::Logger * getLog() const override { return log; }
const Poco::Timespan & getHTTPTimeout() const override { return http_timeout; }
Poco::Timespan getHTTPTimeout() const override { return http_timeout; }
Poco::URI createBaseURI() const override;

View File

@ -62,9 +62,9 @@ public:
static constexpr inline auto SCHEMA_ALLOWED_HANDLER = "/schema_allowed";
XDBCBridgeHelper(
ContextPtr global_context_,
const Poco::Timespan & http_timeout_,
const std::string & connection_string_)
ContextPtr global_context_,
Poco::Timespan http_timeout_,
const std::string & connection_string_)
: IXDBCBridgeHelper(global_context_)
, log(&Poco::Logger::get(BridgeHelperMixin::getName() + "BridgeHelper"))
, connection_string(connection_string_)
@ -90,7 +90,7 @@ protected:
String configPrefix() const override { return BridgeHelperMixin::configPrefix(); }
const Poco::Timespan & getHTTPTimeout() const override { return http_timeout; }
Poco::Timespan getHTTPTimeout() const override { return http_timeout; }
const Poco::Util::AbstractConfiguration & getConfig() const override { return config; }
@ -118,7 +118,7 @@ private:
Poco::Logger * log;
std::string connection_string;
const Poco::Timespan & http_timeout;
Poco::Timespan http_timeout;
std::string bridge_host;
size_t bridge_port;

View File

@ -8,7 +8,7 @@ PEERDIR(
SRCS(
<? find . -name '*.cpp' | grep -v -F tests | sed 's/^\.\// /' | sort ?>
<? find . -name '*.cpp' | grep -v -F tests | grep -v -F examples | sed 's/^\.\// /' | sort ?>
)
END()

View File

@ -680,8 +680,12 @@ void Connection::sendExternalTablesData(ExternalTablesData & data)
PipelineExecutorPtr executor;
auto on_cancel = [& executor]() { executor->cancel(); };
if (!elem->pipe)
elem->pipe = elem->creating_pipe_callback();
QueryPipeline pipeline;
pipeline.init(std::move(*elem->pipe));
elem->pipe.reset();
pipeline.resize(1);
auto sink = std::make_shared<ExternalTableDataSink>(pipeline.getHeader(), *this, *elem, std::move(on_cancel));
pipeline.setSinks([&](const Block &, QueryPipeline::StreamType type) -> ProcessorPtr

View File

@ -41,6 +41,7 @@ struct ExternalTableData
/// Pipe of data form table;
std::unique_ptr<Pipe> pipe;
std::string table_name;
std::function<std::unique_ptr<Pipe>()> creating_pipe_callback;
/// Flag if need to stop reading.
std::atomic_bool is_cancelled = false;
};

View File

@ -116,7 +116,7 @@ ConnectionEstablisherAsync::ConnectionEstablisherAsync(
epoll.add(receive_timeout.getDescriptor());
}
void ConnectionEstablisherAsync::Routine::ReadCallback::operator()(int fd, const Poco::Timespan & timeout, const std::string &)
void ConnectionEstablisherAsync::Routine::ReadCallback::operator()(int fd, Poco::Timespan timeout, const std::string &)
{
/// Check if it's the first time and we need to add socket fd to epoll.
if (connection_establisher_async.socket_fd == -1)

View File

@ -92,7 +92,7 @@ private:
ConnectionEstablisherAsync & connection_establisher_async;
Fiber & fiber;
void operator()(int fd, const Poco::Timespan & timeout, const std::string &);
void operator()(int fd, Poco::Timespan timeout, const std::string &);
};
Fiber operator()(Fiber && sink);

View File

@ -98,7 +98,7 @@ private:
PacketReceiver & receiver;
Fiber & sink;
void operator()(int, const Poco::Timespan & timeout, const std::string &)
void operator()(int, Poco::Timespan timeout, const std::string &)
{
receiver.receive_timeout.setRelative(timeout);
receiver.is_read_in_process = true;

View File

@ -9,7 +9,7 @@ PEERDIR(
SRCS(
<? find . -name '*.cpp' | grep -v -F tests | sed 's/^\.\// /' | sort ?>
<? find . -name '*.cpp' | grep -v -F tests | grep -v -F examples | sed 's/^\.\// /' | sort ?>
)
END()

View File

@ -187,6 +187,7 @@ public:
* So LC(Nullable(T)) would return true, LC(U) -- false.
*/
bool nestedIsNullable() const { return isColumnNullable(*dictionary.getColumnUnique().getNestedColumn()); }
void nestedToNullable() { dictionary.getColumnUnique().nestedToNullable(); }
const IColumnUnique & getDictionary() const { return dictionary.getColumnUnique(); }
const ColumnPtr & getDictionaryPtr() const { return dictionary.getColumnUniquePtr(); }

View File

@ -50,6 +50,7 @@ public:
const ColumnPtr & getNestedColumn() const override;
const ColumnPtr & getNestedNotNullableColumn() const override { return column_holder; }
bool nestedColumnIsNullable() const override { return is_nullable; }
void nestedToNullable() override;
size_t uniqueInsert(const Field & x) override;
size_t uniqueInsertFrom(const IColumn & src, size_t n) override;
@ -263,6 +264,13 @@ void ColumnUnique<ColumnType>::updateNullMask()
}
}
template <typename ColumnType>
void ColumnUnique<ColumnType>::nestedToNullable()
{
is_nullable = true;
createNullMask();
}
template <typename ColumnType>
const ColumnPtr & ColumnUnique<ColumnType>::getNestedColumn() const
{

View File

@ -24,6 +24,7 @@ public:
virtual const ColumnPtr & getNestedNotNullableColumn() const = 0;
virtual bool nestedColumnIsNullable() const = 0;
virtual void nestedToNullable() = 0;
/// Returns array with StringRefHash calculated for each row of getNestedNotNullableColumn() column.
/// Returns nullptr if nested column doesn't contain strings. Otherwise calculates hash (if it wasn't).

View File

@ -17,7 +17,7 @@ PEERDIR(
)
SRCS(
<? find . -name '*.cpp' | grep -v -F tests | sed 's/^\.\// /' | sort ?>
<? find . -name '*.cpp' | grep -v -F tests | grep -v -F examples | sed 's/^\.\// /' | sort ?>
)
END()

View File

@ -232,7 +232,7 @@ void ConfigProcessor::merge(XMLDocumentPtr config, XMLDocumentPtr with)
static std::string layerFromHost()
{
utsname buf;
struct utsname buf;
if (uname(&buf))
throw Poco::Exception(std::string("uname failed: ") + errnoToString(errno));

View File

@ -129,7 +129,12 @@ String Elf::getBuildID() const
return {};
}
#if defined(OS_SUNOS)
String Elf::getBuildID(const char * nhdr_pos, size_t size)
{
return {};
}
#else
String Elf::getBuildID(const char * nhdr_pos, size_t size)
{
const char * nhdr_end = nhdr_pos + size;
@ -149,6 +154,7 @@ String Elf::getBuildID(const char * nhdr_pos, size_t size)
return {};
}
#endif // OS_SUNOS
String Elf::getBinaryHash() const

View File

@ -9,7 +9,7 @@
namespace DB
{
using AsyncCallback = std::function<void(int, const Poco::Timespan &, const std::string &)>;
using AsyncCallback = std::function<void(int, Poco::Timespan, const std::string &)>;
class Epoll
{

View File

@ -12,6 +12,7 @@
#include <Core/Defines.h>
#include <common/types.h>
#include <Common/Exception.h>
#include <Common/MemorySanitizer.h>
#include <IO/WriteBuffer.h>
#include <IO/WriteHelpers.h>
@ -584,8 +585,19 @@ protected:
void destroyElements()
{
if (!std::is_trivially_destructible_v<Cell>)
{
for (iterator it = begin(), it_end = end(); it != it_end; ++it)
{
it.ptr->~Cell();
/// In case of poison_in_dtor=1 it will be poisoned,
/// but it maybe used later, during iteration.
///
/// NOTE, that technically this is UB [1], but OK for now.
///
/// [1]: https://github.com/google/sanitizers/issues/854#issuecomment-329661378
__msan_unpoison(it.ptr, sizeof(*it.ptr));
}
}
}

View File

@ -11,7 +11,7 @@ struct OpenTelemetryTraceContext
// The incoming tracestate header and the trace flags, we just pass them
// downstream. See https://www.w3.org/TR/trace-context/
String tracestate;
__uint8_t trace_flags = 0;
uint8_t trace_flags = 0;
// Parse/compose OpenTelemetry traceparent header.
bool parseTraceparentHeader(const std::string & traceparent, std::string & error);

View File

@ -7,8 +7,12 @@ StopwatchRUsage::Timestamp StopwatchRUsage::Timestamp::current()
::rusage rusage {};
#if !defined(__APPLE__)
#if defined(OS_SUNOS)
::getrusage(RUSAGE_LWP, &rusage);
#else
::getrusage(RUSAGE_THREAD, &rusage);
#endif
#endif // OS_SUNOS
#endif // __APPLE__
res.user_ns = rusage.ru_utime.tv_sec * 1000000000UL + rusage.ru_utime.tv_usec * 1000UL;
res.sys_ns = rusage.ru_stime.tv_sec * 1000000000UL + rusage.ru_stime.tv_usec * 1000UL;
return res;

View File

@ -1,5 +1,8 @@
#include <unistd.h>
#include <sys/ioctl.h>
#if defined(OS_SUNOS)
# include <sys/termios.h>
#endif
#include <Common/Exception.h>
#include <Common/TerminalSize.h>
#include <boost/program_options.hpp>
@ -14,7 +17,7 @@ uint16_t getTerminalWidth()
{
if (isatty(STDIN_FILENO))
{
winsize terminal_size {};
struct winsize terminal_size {};
if (ioctl(STDIN_FILENO, TIOCGWINSZ, &terminal_size))
DB::throwFromErrno("Cannot obtain terminal window size (ioctl TIOCGWINSZ)", DB::ErrorCodes::SYSTEM_ERROR);

View File

@ -205,6 +205,13 @@ size_t ThreadPoolImpl<Thread>::active() const
return scheduled_jobs;
}
template <typename Thread>
bool ThreadPoolImpl<Thread>::finished() const
{
std::unique_lock lock(mutex);
return shutdown;
}
template <typename Thread>
void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_it)
{

View File

@ -67,6 +67,10 @@ public:
/// Returns number of running and scheduled jobs.
size_t active() const;
/// Returns true if the pool already terminated
/// (and any further scheduling will produce CANNOT_SCHEDULE_TASK exception)
bool finished() const;
void setMaxThreads(size_t value);
void setMaxFreeThreads(size_t value);
void setQueueSize(size_t value);

View File

@ -105,8 +105,12 @@ struct RUsageCounters
{
::rusage rusage {};
#if !defined(__APPLE__)
#if defined(OS_SUNOS)
::getrusage(RUSAGE_LWP, &rusage);
#else
::getrusage(RUSAGE_THREAD, &rusage);
#endif
#endif // OS_SUNOS
#endif // __APPLE
return RUsageCounters(rusage, getClockMonotonic());
}

View File

@ -74,7 +74,7 @@ void TimerDescriptor::drain() const
}
}
void TimerDescriptor::setRelative(const Poco::Timespan & timespan) const
void TimerDescriptor::setRelative(Poco::Timespan timespan) const
{
itimerspec spec;
spec.it_interval.tv_nsec = 0;

View File

@ -24,7 +24,7 @@ public:
void reset() const;
void drain() const;
void setRelative(const Poco::Timespan & timespan) const;
void setRelative(Poco::Timespan timespan) const;
};
}

View File

@ -49,7 +49,7 @@ __attribute__((__weak__)) void checkStackSize()
stack_address = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(pthread_get_stackaddr_np(thread)) - max_stack_size);
#else
pthread_attr_t attr;
# if defined(__FreeBSD__)
# if defined(__FreeBSD__) || defined(OS_SUNOS)
pthread_attr_init(&attr);
if (0 != pthread_attr_get_np(pthread_self(), &attr))
throwFromErrno("Cannot pthread_attr_get_np", ErrorCodes::CANNOT_PTHREAD_ATTR);

View File

@ -12,7 +12,7 @@
static void setAffinity()
{
#if !defined(__APPLE__) && !defined(__FreeBSD__)
#if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__sun)
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(0, &mask);

View File

@ -85,19 +85,19 @@ inline bool parseIPv6(const char * src, unsigned char * dst)
return clear_dst();
unsigned char tmp[IPV6_BINARY_LENGTH]{};
auto * tp = tmp;
auto * endp = tp + IPV6_BINARY_LENGTH;
const auto * curtok = src;
auto saw_xdigit = false;
unsigned char * tp = tmp;
unsigned char * endp = tp + IPV6_BINARY_LENGTH;
const char * curtok = src;
bool saw_xdigit = false;
UInt32 val{};
unsigned char * colonp = nullptr;
/// Assuming zero-terminated string.
while (const auto ch = *src++)
while (char ch = *src++)
{
const auto num = unhex(ch);
UInt8 num = unhex(ch);
if (num != u8'\xff')
if (num != 0xFF)
{
val <<= 4;
val |= num;

View File

@ -1,6 +1,6 @@
#include <pthread.h>
#if defined(__APPLE__)
#if defined(__APPLE__) || defined(OS_SUNOS)
#elif defined(__FreeBSD__)
#include <pthread_np.h>
#else
@ -34,6 +34,8 @@ void setThreadName(const char * name)
if ((false))
#elif defined(OS_DARWIN)
if (0 != pthread_setname_np(name))
#elif defined(OS_SUNOS)
if (0 != pthread_setname_np(pthread_self(), name))
#else
if (0 != prctl(PR_SET_NAME, name, 0, 0, 0))
#endif
@ -44,7 +46,7 @@ std::string getThreadName()
{
std::string name(16, '\0');
#if defined(__APPLE__)
#if defined(__APPLE__) || defined(OS_SUNOS)
if (pthread_getname_np(pthread_self(), name.data(), name.size()))
throw DB::Exception("Cannot get thread name with pthread_getname_np()", DB::ErrorCodes::PTHREAD_ERROR);
#elif defined(__FreeBSD__)

View File

@ -24,7 +24,7 @@ INCLUDE(${ARCADIA_ROOT}/clickhouse/cmake/yandex/ya.make.versions.inc)
SRCS(
<? find . -name '*.cpp' | grep -v -F tests | sed 's/^\.\// /' | sort ?>
<? find . -name '*.cpp' | grep -v -F tests | grep -v -F examples | sed 's/^\.\// /' | sort ?>
)
END()

View File

@ -15,7 +15,7 @@ PEERDIR(
SRCS(
<? find . -name '*.cpp' | grep -v -F tests | sed 's/^\.\// /' | sort ?>
<? find . -name '*.cpp' | grep -v -F tests | grep -v -F examples | sed 's/^\.\// /' | sort ?>
)
END()

View File

@ -101,7 +101,7 @@ struct SettingFieldTimespan
Poco::Timespan value;
bool changed = false;
explicit SettingFieldTimespan(const Poco::Timespan & x = {}) : value(x) {}
explicit SettingFieldTimespan(Poco::Timespan x = {}) : value(x) {}
template <class Rep, class Period = std::ratio<1>>
explicit SettingFieldTimespan(const std::chrono::duration<Rep, Period> & x)
@ -110,7 +110,7 @@ struct SettingFieldTimespan
explicit SettingFieldTimespan(UInt64 x) : SettingFieldTimespan(Poco::Timespan{static_cast<Poco::Timespan::TimeDiff>(x * microseconds_per_unit)}) {}
explicit SettingFieldTimespan(const Field & f);
SettingFieldTimespan & operator =(const Poco::Timespan & x) { value = x; changed = true; return *this; }
SettingFieldTimespan & operator =(Poco::Timespan x) { value = x; changed = true; return *this; }
template <class Rep, class Period = std::ratio<1>>
SettingFieldTimespan & operator =(const std::chrono::duration<Rep, Period> & x) { *this = Poco::Timespan{static_cast<Poco::Timespan::TimeDiff>(std::chrono::duration_cast<std::chrono::microseconds>(x).count())}; return *this; }

View File

@ -10,7 +10,7 @@ PEERDIR(
SRCS(
<? find . -name '*.cpp' | grep -v -F tests | sed 's/^\.\// /' | sort ?>
<? find . -name '*.cpp' | grep -v -F tests | grep -v -F examples | sed 's/^\.\// /' | sort ?>
)
END()

View File

@ -458,8 +458,6 @@ void RemoteQueryExecutor::sendScalars()
void RemoteQueryExecutor::sendExternalTables()
{
SelectQueryInfo query_info;
size_t count = connections->size();
{
@ -474,24 +472,29 @@ void RemoteQueryExecutor::sendExternalTables()
for (const auto & table : external_tables)
{
StoragePtr cur = table.second;
auto metadata_snapshot = cur->getInMemoryMetadataPtr();
QueryProcessingStage::Enum read_from_table_stage = cur->getQueryProcessingStage(
context, QueryProcessingStage::Complete, query_info);
Pipe pipe = cur->read(
metadata_snapshot->getColumns().getNamesOfPhysical(),
metadata_snapshot, query_info, context,
read_from_table_stage, DEFAULT_BLOCK_SIZE, 1);
auto data = std::make_unique<ExternalTableData>();
data->table_name = table.first;
data->creating_pipe_callback = [cur, context = this->context]()
{
SelectQueryInfo query_info;
auto metadata_snapshot = cur->getInMemoryMetadataPtr();
QueryProcessingStage::Enum read_from_table_stage = cur->getQueryProcessingStage(
context, QueryProcessingStage::Complete, query_info);
if (pipe.empty())
data->pipe = std::make_unique<Pipe>(
Pipe pipe = cur->read(
metadata_snapshot->getColumns().getNamesOfPhysical(),
metadata_snapshot, query_info, context,
read_from_table_stage, DEFAULT_BLOCK_SIZE, 1);
if (pipe.empty())
return std::make_unique<Pipe>(
std::make_shared<SourceFromSingleChunk>(metadata_snapshot->getSampleBlock(), Chunk()));
else
data->pipe = std::make_unique<Pipe>(std::move(pipe));
return std::make_unique<Pipe>(std::move(pipe));
};
data->pipe = data->creating_pipe_callback();
res.emplace_back(std::move(data));
}
external_tables_data.push_back(std::move(res));

View File

@ -19,7 +19,7 @@ struct RemoteQueryExecutorRoutine
RemoteQueryExecutorReadContext & read_context;
Fiber & fiber;
void operator()(int fd, const Poco::Timespan & timeout = 0, const std::string fd_description = "")
void operator()(int fd, Poco::Timespan timeout = 0, const std::string fd_description = "")
{
try
{
@ -89,7 +89,7 @@ RemoteQueryExecutorReadContext::RemoteQueryExecutorReadContext(IConnections & co
fiber = boost::context::fiber(std::allocator_arg_t(), stack, std::move(routine));
}
void RemoteQueryExecutorReadContext::setConnectionFD(int fd, const Poco::Timespan & timeout, const std::string & fd_description)
void RemoteQueryExecutorReadContext::setConnectionFD(int fd, Poco::Timespan timeout, const std::string & fd_description)
{
if (fd == connection_fd)
return;

View File

@ -58,7 +58,7 @@ public:
bool checkTimeout(bool blocking = false);
bool checkTimeoutImpl(bool blocking);
void setConnectionFD(int fd, const Poco::Timespan & timeout = 0, const std::string & fd_description = "");
void setConnectionFD(int fd, Poco::Timespan timeout = 0, const std::string & fd_description = "");
void setTimer() const;
bool resumeRoutine();

View File

@ -12,7 +12,7 @@ NO_COMPILER_WARNINGS()
SRCS(
<? find . -name '*.cpp' | grep -v -P 'tests|PostgreSQL' | sed 's/^\.\// /' | sort ?>
<? find . -name '*.cpp' | grep -v -P 'tests|PostgreSQL' | grep -v -F examples | sed 's/^\.\// /' | sort ?>
)
END()

View File

@ -9,7 +9,7 @@ PEERDIR(
SRCS(
<? find . -name '*.cpp' | grep -v -F tests | sed 's/^\.\// /' | sort ?>
<? find . -name '*.cpp' | grep -v -F tests | grep -v -F examples | sed 's/^\.\// /' | sort ?>
)
END()

View File

@ -8,7 +8,7 @@ PEERDIR(
SRCS(
<? find . -name '*.cpp' | grep -v -F 'PostgreSQL' | sed 's/^\.\// /' | sort ?>
<? find . -name '*.cpp' | grep -v -F examples | grep -v -F 'PostgreSQL' | sed 's/^\.\// /' | sort ?>
)
END()

View File

@ -30,14 +30,14 @@ FlatDictionary::FlatDictionary(
DictionarySourcePtr source_ptr_,
const DictionaryLifetime dict_lifetime_,
Configuration configuration_,
BlockPtr previously_loaded_block_)
BlockPtr update_field_loaded_block_)
: IDictionary(dict_id_)
, dict_struct(dict_struct_)
, source_ptr{std::move(source_ptr_)}
, dict_lifetime(dict_lifetime_)
, configuration(configuration_)
, loaded_keys(configuration.initial_array_size, false)
, previously_loaded_block(std::move(previously_loaded_block_))
, update_field_loaded_block(std::move(update_field_loaded_block_))
{
createAttributes();
loadData();
@ -273,7 +273,7 @@ void FlatDictionary::blockToAttributes(const Block & block)
void FlatDictionary::updateData()
{
if (!previously_loaded_block || previously_loaded_block->rows() == 0)
if (!update_field_loaded_block || update_field_loaded_block->rows() == 0)
{
auto stream = source_ptr->loadUpdatedAll();
stream->readPrefix();
@ -281,13 +281,13 @@ void FlatDictionary::updateData()
while (const auto block = stream->read())
{
/// We are using this to keep saved data if input stream consists of multiple blocks
if (!previously_loaded_block)
previously_loaded_block = std::make_shared<DB::Block>(block.cloneEmpty());
if (!update_field_loaded_block)
update_field_loaded_block = std::make_shared<DB::Block>(block.cloneEmpty());
for (size_t column_index = 0; column_index < block.columns(); ++column_index)
{
const IColumn & update_column = *block.getByPosition(column_index).column.get();
MutableColumnPtr saved_column = previously_loaded_block->getByPosition(column_index).column->assumeMutable();
MutableColumnPtr saved_column = update_field_loaded_block->getByPosition(column_index).column->assumeMutable();
saved_column->insertRangeFrom(update_column, 0, update_column.size());
}
}
@ -298,12 +298,12 @@ void FlatDictionary::updateData()
auto stream = source_ptr->loadUpdatedAll();
mergeBlockWithStream<DictionaryKeyType::simple>(
dict_struct.getKeysSize(),
*previously_loaded_block,
*update_field_loaded_block,
stream);
}
if (previously_loaded_block)
blockToAttributes(*previously_loaded_block.get());
if (update_field_loaded_block)
blockToAttributes(*update_field_loaded_block.get());
}
void FlatDictionary::loadData()
@ -347,6 +347,9 @@ void FlatDictionary::calculateBytesAllocated()
callOnDictionaryAttributeType(attribute.type, type_call);
}
if (update_field_loaded_block)
bytes_allocated += update_field_loaded_block->allocatedBytes();
}
FlatDictionary::Attribute FlatDictionary::createAttribute(const DictionaryAttribute & dictionary_attribute, const Field & null_value)

View File

@ -39,7 +39,7 @@ public:
DictionarySourcePtr source_ptr_,
const DictionaryLifetime dict_lifetime_,
Configuration configuration_,
BlockPtr previously_loaded_block_ = nullptr);
BlockPtr update_field_loaded_block_ = nullptr);
std::string getTypeName() const override { return "Flat"; }
@ -55,7 +55,7 @@ public:
std::shared_ptr<const IExternalLoadable> clone() const override
{
return std::make_shared<FlatDictionary>(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, configuration, previously_loaded_block);
return std::make_shared<FlatDictionary>(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, configuration, update_field_loaded_block);
}
const IDictionarySource * getSource() const override { return source_ptr.get(); }
@ -184,7 +184,7 @@ private:
size_t bucket_count = 0;
mutable std::atomic<size_t> query_count{0};
BlockPtr previously_loaded_block;
BlockPtr update_field_loaded_block;
};
}

View File

@ -42,13 +42,13 @@ HashedDictionary<dictionary_key_type, sparse>::HashedDictionary(
DictionarySourcePtr source_ptr_,
const DictionaryLifetime dict_lifetime_,
bool require_nonempty_,
BlockPtr previously_loaded_block_)
BlockPtr update_field_loaded_block_)
: IDictionary(dict_id_)
, dict_struct(dict_struct_)
, source_ptr(std::move(source_ptr_))
, dict_lifetime(dict_lifetime_)
, require_nonempty(require_nonempty_)
, previously_loaded_block(std::move(previously_loaded_block_))
, update_field_loaded_block(std::move(update_field_loaded_block_))
{
createAttributes();
loadData();
@ -343,7 +343,7 @@ void HashedDictionary<dictionary_key_type, sparse>::createAttributes()
template <DictionaryKeyType dictionary_key_type, bool sparse>
void HashedDictionary<dictionary_key_type, sparse>::updateData()
{
if (!previously_loaded_block || previously_loaded_block->rows() == 0)
if (!update_field_loaded_block || update_field_loaded_block->rows() == 0)
{
auto stream = source_ptr->loadUpdatedAll();
stream->readPrefix();
@ -351,13 +351,13 @@ void HashedDictionary<dictionary_key_type, sparse>::updateData()
while (const auto block = stream->read())
{
/// We are using this to keep saved data if input stream consists of multiple blocks
if (!previously_loaded_block)
previously_loaded_block = std::make_shared<DB::Block>(block.cloneEmpty());
if (!update_field_loaded_block)
update_field_loaded_block = std::make_shared<DB::Block>(block.cloneEmpty());
for (const auto attribute_idx : ext::range(0, attributes.size() + 1))
{
const IColumn & update_column = *block.getByPosition(attribute_idx).column.get();
MutableColumnPtr saved_column = previously_loaded_block->getByPosition(attribute_idx).column->assumeMutable();
MutableColumnPtr saved_column = update_field_loaded_block->getByPosition(attribute_idx).column->assumeMutable();
saved_column->insertRangeFrom(update_column, 0, update_column.size());
}
}
@ -368,14 +368,14 @@ void HashedDictionary<dictionary_key_type, sparse>::updateData()
auto stream = source_ptr->loadUpdatedAll();
mergeBlockWithStream<dictionary_key_type>(
dict_struct.getKeysSize(),
*previously_loaded_block,
*update_field_loaded_block,
stream);
}
if (previously_loaded_block)
if (update_field_loaded_block)
{
resize(previously_loaded_block->rows());
blockToAttributes(*previously_loaded_block.get());
resize(update_field_loaded_block->rows());
blockToAttributes(*update_field_loaded_block.get());
}
}
@ -586,6 +586,9 @@ void HashedDictionary<dictionary_key_type, sparse>::calculateBytesAllocated()
}
bytes_allocated += complex_key_arena.size();
if (update_field_loaded_block)
bytes_allocated += update_field_loaded_block->allocatedBytes();
}
template <DictionaryKeyType dictionary_key_type, bool sparse>

View File

@ -41,7 +41,7 @@ public:
DictionarySourcePtr source_ptr_,
const DictionaryLifetime dict_lifetime_,
bool require_nonempty_,
BlockPtr previously_loaded_block_ = nullptr);
BlockPtr update_field_loaded_block_ = nullptr);
std::string getTypeName() const override
{
@ -67,7 +67,7 @@ public:
std::shared_ptr<const IExternalLoadable> clone() const override
{
return std::make_shared<HashedDictionary<dictionary_key_type, sparse>>(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty, previously_loaded_block);
return std::make_shared<HashedDictionary<dictionary_key_type, sparse>>(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty, update_field_loaded_block);
}
const IDictionarySource * getSource() const override { return source_ptr.get(); }
@ -220,7 +220,7 @@ private:
size_t bucket_count = 0;
mutable std::atomic<size_t> query_count{0};
BlockPtr previously_loaded_block;
BlockPtr update_field_loaded_block;
Arena complex_key_arena;
};

View File

@ -19,7 +19,7 @@ NO_COMPILER_WARNINGS()
SRCS(
<? find . -name '*.cpp' | grep -v -P 'tests|PostgreSQL' | sed 's/^\.\// /' | sort ?>
<? find . -name '*.cpp' | grep -v -P 'tests|PostgreSQL' | grep -v -F examples | sed 's/^\.\// /' | sort ?>
)
END()

View File

@ -1,20 +1,22 @@
#include "DiskCacheWrapper.h"
#include <IO/copyData.h>
#include <IO/ReadBufferFromFileDecorator.h>
#include <IO/WriteBufferFromFileDecorator.h>
#include <Common/quoteString.h>
#include <condition_variable>
namespace DB
{
/**
* Write buffer with possibility to set and invoke callback when buffer is finalized.
* Write buffer with possibility to set and invoke callback after 'finalize' call.
*/
class CompletionAwareWriteBuffer : public WriteBufferFromFileBase
class CompletionAwareWriteBuffer : public WriteBufferFromFileDecorator
{
public:
CompletionAwareWriteBuffer(std::unique_ptr<WriteBufferFromFileBase> impl_, std::function<void()> completion_callback_, size_t buf_size_)
: WriteBufferFromFileBase(buf_size_, nullptr, 0), impl(std::move(impl_)), completion_callback(completion_callback_) { }
CompletionAwareWriteBuffer(std::unique_ptr<WriteBufferFromFileBase> impl_, std::function<void()> completion_callback_)
: WriteBufferFromFileDecorator(std::move(impl_)), completion_callback(completion_callback_) { }
~CompletionAwareWriteBuffer() override
virtual ~CompletionAwareWriteBuffer() override
{
try
{
@ -31,31 +33,13 @@ public:
if (finalized)
return;
next();
impl->finalize();
finalized = true;
WriteBufferFromFileDecorator::finalize();
completion_callback();
}
void sync() override { impl->sync(); }
std::string getFileName() const override { return impl->getFileName(); }
private:
void nextImpl() override
{
impl->swap(*this);
impl->next();
impl->swap(*this);
}
/// Actual write buffer.
std::unique_ptr<WriteBufferFromFileBase> impl;
/// Callback is invoked when finalize is completed.
const std::function<void()> completion_callback;
bool finalized = false;
};
enum FileDownloadStatus
@ -200,8 +184,7 @@ DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode
auto dst_buffer = DiskDecorator::writeFile(path, buf_size, mode);
copyData(*src_buffer, *dst_buffer);
dst_buffer->finalize();
},
buf_size);
});
}
void DiskCacheWrapper::clearDirectory(const String & path)
@ -316,22 +299,6 @@ void DiskCacheWrapper::createDirectories(const String & path)
DiskDecorator::createDirectories(path);
}
/// TODO: Current reservation mechanism leaks IDisk abstraction details.
/// This hack is needed to return proper disk pointer (wrapper instead of implementation) from reservation object.
class ReservationDelegate : public IReservation
{
public:
ReservationDelegate(ReservationPtr delegate_, DiskPtr wrapper_) : delegate(std::move(delegate_)), wrapper(wrapper_) { }
UInt64 getSize() const override { return delegate->getSize(); }
DiskPtr getDisk(size_t) const override { return wrapper; }
Disks getDisks() const override { return {wrapper}; }
void update(UInt64 new_size) override { delegate->update(new_size); }
private:
ReservationPtr delegate;
DiskPtr wrapper;
};
ReservationPtr DiskCacheWrapper::reserve(UInt64 bytes)
{
auto ptr = DiskDecorator::reserve(bytes);

View File

@ -196,4 +196,19 @@ void DiskDecorator::onFreeze(const String & path)
delegate->onFreeze(path);
}
void DiskDecorator::shutdown()
{
delegate->shutdown();
}
void DiskDecorator::startup()
{
delegate->startup();
}
void DiskDecorator::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextConstPtr context)
{
delegate->applyNewSettings(config, context);
}
}

View File

@ -65,12 +65,33 @@ public:
String getUniqueId(const String & path) const override { return delegate->getUniqueId(path); }
bool checkUniqueId(const String & id) const override { return delegate->checkUniqueId(id); }
DiskType::Type getType() const override { return delegate->getType(); }
Executor & getExecutor() override;
void onFreeze(const String & path) override;
SyncGuardPtr getDirectorySyncGuard(const String & path) const override;
void shutdown() override;
void startup() override;
void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextConstPtr context) override;
protected:
Executor & getExecutor() override;
DiskPtr delegate;
};
/// TODO: Current reservation mechanism leaks IDisk abstraction details.
/// This hack is needed to return proper disk pointer (wrapper instead of implementation) from reservation object.
class ReservationDelegate : public IReservation
{
public:
ReservationDelegate(ReservationPtr delegate_, DiskPtr wrapper_) : delegate(std::move(delegate_)), wrapper(wrapper_) { }
UInt64 getSize() const override { return delegate->getSize(); }
DiskPtr getDisk(size_t) const override { return wrapper; }
Disks getDisks() const override { return {wrapper}; }
void update(UInt64 new_size) override { delegate->update(new_size); }
private:
ReservationPtr delegate;
DiskPtr wrapper;
};
}

View File

@ -0,0 +1,311 @@
#include "DiskRestartProxy.h"
#include <IO/ReadBufferFromFileDecorator.h>
#include <IO/WriteBufferFromFileDecorator.h>
namespace DB
{
namespace ErrorCodes
{
extern const int DEADLOCK_AVOIDED;
}
using Millis = std::chrono::milliseconds;
using Seconds = std::chrono::seconds;
/// Holds restart read lock till buffer destruction.
class RestartAwareReadBuffer : public ReadBufferFromFileDecorator
{
public:
RestartAwareReadBuffer(const DiskRestartProxy & disk, std::unique_ptr<ReadBufferFromFileBase> impl_)
: ReadBufferFromFileDecorator(std::move(impl_)), lock(disk.mutex) { }
private:
ReadLock lock;
};
/// Holds restart read lock till buffer finalize.
class RestartAwareWriteBuffer : public WriteBufferFromFileDecorator
{
public:
RestartAwareWriteBuffer(const DiskRestartProxy & disk, std::unique_ptr<WriteBuffer> impl_)
: WriteBufferFromFileDecorator(std::move(impl_)), lock(disk.mutex) { }
virtual ~RestartAwareWriteBuffer() override
{
try
{
RestartAwareWriteBuffer::finalize();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
void finalize() override
{
if (finalized)
return;
WriteBufferFromFileDecorator::finalize();
lock.unlock();
}
private:
ReadLock lock;
};
DiskRestartProxy::DiskRestartProxy(DiskPtr & delegate_)
: DiskDecorator(delegate_) { }
ReservationPtr DiskRestartProxy::reserve(UInt64 bytes)
{
ReadLock lock (mutex);
auto ptr = DiskDecorator::reserve(bytes);
if (ptr)
{
auto disk_ptr = std::static_pointer_cast<DiskRestartProxy>(shared_from_this());
return std::make_unique<ReservationDelegate>(std::move(ptr), disk_ptr);
}
return ptr;
}
const String & DiskRestartProxy::getPath() const
{
ReadLock lock (mutex);
return DiskDecorator::getPath();
}
UInt64 DiskRestartProxy::getTotalSpace() const
{
ReadLock lock (mutex);
return DiskDecorator::getTotalSpace();
}
UInt64 DiskRestartProxy::getAvailableSpace() const
{
ReadLock lock (mutex);
return DiskDecorator::getAvailableSpace();
}
UInt64 DiskRestartProxy::getUnreservedSpace() const
{
ReadLock lock (mutex);
return DiskDecorator::getUnreservedSpace();
}
UInt64 DiskRestartProxy::getKeepingFreeSpace() const
{
ReadLock lock (mutex);
return DiskDecorator::getKeepingFreeSpace();
}
bool DiskRestartProxy::exists(const String & path) const
{
ReadLock lock (mutex);
return DiskDecorator::exists(path);
}
bool DiskRestartProxy::isFile(const String & path) const
{
ReadLock lock (mutex);
return DiskDecorator::isFile(path);
}
bool DiskRestartProxy::isDirectory(const String & path) const
{
ReadLock lock (mutex);
return DiskDecorator::isDirectory(path);
}
size_t DiskRestartProxy::getFileSize(const String & path) const
{
ReadLock lock (mutex);
return DiskDecorator::getFileSize(path);
}
void DiskRestartProxy::createDirectory(const String & path)
{
ReadLock lock (mutex);
DiskDecorator::createDirectory(path);
}
void DiskRestartProxy::createDirectories(const String & path)
{
ReadLock lock (mutex);
DiskDecorator::createDirectories(path);
}
void DiskRestartProxy::clearDirectory(const String & path)
{
ReadLock lock (mutex);
DiskDecorator::clearDirectory(path);
}
void DiskRestartProxy::moveDirectory(const String & from_path, const String & to_path)
{
ReadLock lock (mutex);
DiskDecorator::moveDirectory(from_path, to_path);
}
DiskDirectoryIteratorPtr DiskRestartProxy::iterateDirectory(const String & path)
{
ReadLock lock (mutex);
return DiskDecorator::iterateDirectory(path);
}
void DiskRestartProxy::createFile(const String & path)
{
ReadLock lock (mutex);
DiskDecorator::createFile(path);
}
void DiskRestartProxy::moveFile(const String & from_path, const String & to_path)
{
ReadLock lock (mutex);
DiskDecorator::moveFile(from_path, to_path);
}
void DiskRestartProxy::replaceFile(const String & from_path, const String & to_path)
{
ReadLock lock (mutex);
DiskDecorator::replaceFile(from_path, to_path);
}
void DiskRestartProxy::copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
{
ReadLock lock (mutex);
DiskDecorator::copy(from_path, to_disk, to_path);
}
void DiskRestartProxy::listFiles(const String & path, std::vector<String> & file_names)
{
ReadLock lock (mutex);
DiskDecorator::listFiles(path, file_names);
}
std::unique_ptr<ReadBufferFromFileBase> DiskRestartProxy::readFile(
const String & path, size_t buf_size, size_t estimated_size, size_t aio_threshold, size_t mmap_threshold, MMappedFileCache * mmap_cache)
const
{
ReadLock lock (mutex);
auto impl = DiskDecorator::readFile(path, buf_size, estimated_size, aio_threshold, mmap_threshold, mmap_cache);
return std::make_unique<RestartAwareReadBuffer>(*this, std::move(impl));
}
std::unique_ptr<WriteBufferFromFileBase> DiskRestartProxy::writeFile(const String & path, size_t buf_size, WriteMode mode)
{
ReadLock lock (mutex);
auto impl = DiskDecorator::writeFile(path, buf_size, mode);
return std::make_unique<RestartAwareWriteBuffer>(*this, std::move(impl));
}
void DiskRestartProxy::removeFile(const String & path)
{
ReadLock lock (mutex);
DiskDecorator::removeFile(path);
}
void DiskRestartProxy::removeFileIfExists(const String & path)
{
ReadLock lock (mutex);
DiskDecorator::removeFileIfExists(path);
}
void DiskRestartProxy::removeDirectory(const String & path)
{
ReadLock lock (mutex);
DiskDecorator::removeDirectory(path);
}
void DiskRestartProxy::removeRecursive(const String & path)
{
ReadLock lock (mutex);
DiskDecorator::removeRecursive(path);
}
void DiskRestartProxy::removeSharedFile(const String & path, bool keep_s3)
{
ReadLock lock (mutex);
DiskDecorator::removeSharedFile(path, keep_s3);
}
void DiskRestartProxy::removeSharedRecursive(const String & path, bool keep_s3)
{
ReadLock lock (mutex);
DiskDecorator::removeSharedRecursive(path, keep_s3);
}
void DiskRestartProxy::setLastModified(const String & path, const Poco::Timestamp & timestamp)
{
ReadLock lock (mutex);
DiskDecorator::setLastModified(path, timestamp);
}
Poco::Timestamp DiskRestartProxy::getLastModified(const String & path)
{
ReadLock lock (mutex);
return DiskDecorator::getLastModified(path);
}
void DiskRestartProxy::setReadOnly(const String & path)
{
ReadLock lock (mutex);
DiskDecorator::setReadOnly(path);
}
void DiskRestartProxy::createHardLink(const String & src_path, const String & dst_path)
{
ReadLock lock (mutex);
DiskDecorator::createHardLink(src_path, dst_path);
}
void DiskRestartProxy::truncateFile(const String & path, size_t size)
{
ReadLock lock (mutex);
DiskDecorator::truncateFile(path, size);
}
String DiskRestartProxy::getUniqueId(const String & path) const
{
ReadLock lock (mutex);
return DiskDecorator::getUniqueId(path);
}
bool DiskRestartProxy::checkUniqueId(const String & id) const
{
ReadLock lock (mutex);
return DiskDecorator::checkUniqueId(id);
}
void DiskRestartProxy::restart()
{
/// Speed up processing unhealthy requests.
DiskDecorator::shutdown();
WriteLock lock (mutex, std::defer_lock);
LOG_INFO(log, "Acquiring lock to restart disk {}", DiskDecorator::getName());
auto start_time = std::chrono::steady_clock::now();
auto lock_timeout = Seconds(120);
do
{
/// Use a small timeout to not block read operations for a long time.
if (lock.try_lock_for(Millis(10)))
break;
} while (std::chrono::steady_clock::now() - start_time < lock_timeout);
if (!lock.owns_lock())
throw Exception("Failed to acquire restart lock within timeout. Client should retry.", ErrorCodes::DEADLOCK_AVOIDED);
LOG_INFO(log, "Restart lock acquired. Restarting disk {}", DiskDecorator::getName());
DiskDecorator::startup();
LOG_INFO(log, "Disk restarted {}", DiskDecorator::getName());
}
}

View File

@ -0,0 +1,80 @@
#pragma once
#include "DiskDecorator.h"
#include <common/logger_useful.h>
#include <shared_mutex>
namespace DB
{
using ReadLock = std::shared_lock<std::shared_timed_mutex>;
using WriteLock = std::unique_lock<std::shared_timed_mutex>;
class RestartAwareReadBuffer;
class RestartAwareWriteBuffer;
/**
* Gives possibility to change underlying disk settings at runtime calling 'restart' method.
* All disk methods are protected by read-lock. Read/Write buffers produced by disk holds read-lock till buffer is finalized/destructed.
* When 'restart' method is called write-lock is acquired to make sure that no operations are running on that disk.
*/
class DiskRestartProxy : public DiskDecorator
{
public:
explicit DiskRestartProxy(DiskPtr & delegate_);
ReservationPtr reserve(UInt64 bytes) override;
const String & getPath() const override;
UInt64 getTotalSpace() const override;
UInt64 getAvailableSpace() const override;
UInt64 getUnreservedSpace() const override;
UInt64 getKeepingFreeSpace() const override;
bool exists(const String & path) const override;
bool isFile(const String & path) const override;
bool isDirectory(const String & path) const override;
size_t getFileSize(const String & path) const override;
void createDirectory(const String & path) override;
void createDirectories(const String & path) override;
void clearDirectory(const String & path) override;
void moveDirectory(const String & from_path, const String & to_path) override;
DiskDirectoryIteratorPtr iterateDirectory(const String & path) override;
void createFile(const String & path) override;
void moveFile(const String & from_path, const String & to_path) override;
void replaceFile(const String & from_path, const String & to_path) override;
void copy(const String & from_path, const DiskPtr & to_disk, const String & to_path) override;
void listFiles(const String & path, std::vector<String> & file_names) override;
std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path,
size_t buf_size,
size_t estimated_size,
size_t aio_threshold,
size_t mmap_threshold,
MMappedFileCache * mmap_cache) const override;
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode) override;
void removeFile(const String & path) override;
void removeFileIfExists(const String & path) override;
void removeDirectory(const String & path) override;
void removeRecursive(const String & path) override;
void removeSharedFile(const String & path, bool keep_s3) override;
void removeSharedRecursive(const String & path, bool keep_s3) override;
void setLastModified(const String & path, const Poco::Timestamp & timestamp) override;
Poco::Timestamp getLastModified(const String & path) override;
void setReadOnly(const String & path) override;
void createHardLink(const String & src_path, const String & dst_path) override;
void truncateFile(const String & path, size_t size) override;
String getUniqueId(const String & path) const override;
bool checkUniqueId(const String & id) const override;
void restart();
private:
friend class RestartAwareReadBuffer;
friend class RestartAwareWriteBuffer;
/// Mutex to protect RW access.
mutable std::shared_timed_mutex mutex;
Poco::Logger * log = &Poco::Logger::get("DiskRestartProxy");
};
}

View File

@ -55,11 +55,7 @@ DiskSelectorPtr DiskSelector::updateFromConfig(
std::shared_ptr<DiskSelector> result = std::make_shared<DiskSelector>(*this);
constexpr auto default_disk_name = "default";
std::set<String> old_disks_minus_new_disks;
for (const auto & [disk_name, _] : result->getDisksMap())
{
old_disks_minus_new_disks.insert(disk_name);
}
DisksMap old_disks_minus_new_disks (result->getDisksMap());
for (const auto & disk_name : keys)
{
@ -73,10 +69,11 @@ DiskSelectorPtr DiskSelector::updateFromConfig(
}
else
{
old_disks_minus_new_disks.erase(disk_name);
auto disk = old_disks_minus_new_disks[disk_name];
/// TODO: Ideally ClickHouse shall complain if disk has changed, but
/// implementing that may appear as not trivial task.
disk->applyNewSettings(config, context);
old_disks_minus_new_disks.erase(disk_name);
}
}
@ -91,7 +88,7 @@ DiskSelectorPtr DiskSelector::updateFromConfig(
writeString("Disks ", warning);
int index = 0;
for (const String & name : old_disks_minus_new_disks)
for (const auto & [name, _] : old_disks_minus_new_disks)
{
if (index++ > 0)
writeString(", ", warning);

View File

@ -1,5 +1,6 @@
#pragma once
#include <Interpreters/Context_fwd.h>
#include <Core/Defines.h>
#include <common/types.h>
#include <Common/CurrentMetrics.h>
@ -14,6 +15,7 @@
#include <Poco/Timestamp.h>
#include <filesystem>
#include <Poco/Path.h>
#include "Poco/Util/AbstractConfiguration.h"
namespace fs = std::filesystem;
@ -213,24 +215,33 @@ public:
/// Invoked when Global Context is shutdown.
virtual void shutdown() { }
/// Performs action on disk startup.
virtual void startup() { }
/// Return some uniq string for file, overrode for S3
/// Required for distinguish different copies of the same part on S3
virtual String getUniqueId(const String & path) const { return path; }
/// Check file exists and ClickHouse has an access to it
/// Overrode in DiskS3
/// Required for S3 to ensure that replica has access to data wroten by other node
/// Required for S3 to ensure that replica has access to data written by other node
virtual bool checkUniqueId(const String & id) const { return exists(id); }
/// Returns executor to perform asynchronous operations.
virtual Executor & getExecutor() { return *executor; }
/// Invoked on partitions freeze query.
virtual void onFreeze(const String &) { }
/// Returns guard, that insures synchronization of directory metadata with storage device.
virtual SyncGuardPtr getDirectorySyncGuard(const String & path) const;
/// Applies new settings for disk in runtime.
virtual void applyNewSettings(const Poco::Util::AbstractConfiguration &, ContextConstPtr) { }
protected:
friend class DiskDecorator;
/// Returns executor to perform asynchronous operations.
virtual Executor & getExecutor() { return *executor; }
private:
std::unique_ptr<Executor> executor;
};

View File

@ -7,11 +7,13 @@
#include <optional>
#include <utility>
#include <IO/ReadBufferFromString.h>
#include <Interpreters/Context.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadBufferFromS3.h>
#include <IO/ReadHelpers.h>
#include <IO/SeekAvoidingReadBuffer.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteBufferFromFileDecorator.h>
#include <IO/WriteBufferFromS3.h>
#include <IO/WriteHelpers.h>
#include <Poco/File.h>
@ -235,6 +237,28 @@ struct DiskS3::Metadata
}
};
DiskS3::Metadata DiskS3::readOrCreateMetaForWriting(const String & path, WriteMode mode)
{
bool exist = exists(path);
if (exist)
{
auto metadata = readMeta(path);
if (metadata.read_only)
throw Exception("File is read-only: " + path, ErrorCodes::PATH_ACCESS_DENIED);
if (mode == WriteMode::Rewrite)
removeFile(path); /// Remove for re-write.
else
return metadata;
}
auto metadata = createMeta(path);
/// Save empty metadata to disk to have ability to get file size while buffer is not finalized.
metadata.save();
return metadata;
}
DiskS3::Metadata DiskS3::readMeta(const String & path) const
{
return Metadata(s3_root_path, metadata_path, path);
@ -250,7 +274,11 @@ class ReadIndirectBufferFromS3 final : public ReadBufferFromFileBase
{
public:
ReadIndirectBufferFromS3(
std::shared_ptr<Aws::S3::S3Client> client_ptr_, const String & bucket_, DiskS3::Metadata metadata_, UInt64 s3_max_single_read_retries_, size_t buf_size_)
std::shared_ptr<Aws::S3::S3Client> client_ptr_,
const String & bucket_,
DiskS3::Metadata metadata_,
size_t s3_max_single_read_retries_,
size_t buf_size_)
: client_ptr(std::move(client_ptr_))
, bucket(bucket_)
, metadata(std::move(metadata_))
@ -351,7 +379,7 @@ private:
std::shared_ptr<Aws::S3::S3Client> client_ptr;
const String & bucket;
DiskS3::Metadata metadata;
UInt64 s3_max_single_read_retries;
size_t s3_max_single_read_retries;
size_t buf_size;
size_t absolute_position = 0;
@ -360,30 +388,22 @@ private:
};
/// Stores data in S3 and adds the object key (S3 path) and object size to metadata file on local FS.
class WriteIndirectBufferFromS3 final : public WriteBufferFromFileBase
class WriteIndirectBufferFromS3 final : public WriteBufferFromFileDecorator
{
public:
WriteIndirectBufferFromS3(
std::shared_ptr<Aws::S3::S3Client> & client_ptr_,
const String & bucket_,
std::unique_ptr<WriteBufferFromS3> impl_,
DiskS3::Metadata metadata_,
const String & s3_path_,
std::optional<DiskS3::ObjectMetadata> object_metadata_,
size_t min_upload_part_size,
size_t max_single_part_upload_size,
size_t buf_size_)
: WriteBufferFromFileBase(buf_size_, nullptr, 0)
, impl(WriteBufferFromS3(client_ptr_, bucket_, metadata_.s3_root_path + s3_path_, min_upload_part_size, max_single_part_upload_size,std::move(object_metadata_), buf_size_))
String & s3_path_)
: WriteBufferFromFileDecorator(std::move(impl_))
, metadata(std::move(metadata_))
, s3_path(s3_path_)
{
}
, s3_path(s3_path_) { }
~WriteIndirectBufferFromS3() override
virtual ~WriteIndirectBufferFromS3() override
{
try
{
finalize();
WriteIndirectBufferFromS3::finalize();
}
catch (...)
{
@ -396,13 +416,10 @@ public:
if (finalized)
return;
next();
impl.finalize();
WriteBufferFromFileDecorator::finalize();
metadata.addObject(s3_path, count());
metadata.save();
finalized = true;
}
void sync() override
@ -414,20 +431,6 @@ public:
std::string getFileName() const override { return metadata.metadata_file_path; }
private:
void nextImpl() override
{
/// Transfer current working buffer to WriteBufferFromS3.
impl.swap(*this);
/// Write actual data to S3.
impl.next();
/// Return back working buffer.
impl.swap(*this);
}
WriteBufferFromS3 impl;
bool finalized = false;
DiskS3::Metadata metadata;
String s3_path;
};
@ -530,7 +533,6 @@ public:
std::future<void> execute(std::function<void()> task) override
{
auto promise = std::make_shared<std::promise<void>>();
pool.scheduleOrThrowOnError(
[promise, task]()
{
@ -553,6 +555,10 @@ public:
return promise->get_future();
}
void setMaxThreads(size_t threads)
{
pool.setMaxThreads(threads);
}
private:
ThreadPool pool;
};
@ -560,32 +566,18 @@ private:
DiskS3::DiskS3(
String name_,
std::shared_ptr<Aws::S3::S3Client> client_,
std::shared_ptr<S3::ProxyConfiguration> proxy_configuration_,
String bucket_,
String s3_root_path_,
String metadata_path_,
UInt64 s3_max_single_read_retries_,
size_t min_upload_part_size_,
size_t max_single_part_upload_size_,
size_t min_bytes_for_seek_,
bool send_metadata_,
int thread_pool_size_,
int list_object_keys_size_)
: IDisk(std::make_unique<AsyncExecutor>(thread_pool_size_))
SettingsPtr settings_,
GetDiskSettings settings_getter_)
: IDisk(std::make_unique<AsyncExecutor>(settings_->thread_pool_size))
, name(std::move(name_))
, client(std::move(client_))
, proxy_configuration(std::move(proxy_configuration_))
, bucket(std::move(bucket_))
, s3_root_path(std::move(s3_root_path_))
, metadata_path(std::move(metadata_path_))
, s3_max_single_read_retries(s3_max_single_read_retries_)
, min_upload_part_size(min_upload_part_size_)
, max_single_part_upload_size(max_single_part_upload_size_)
, min_bytes_for_seek(min_bytes_for_seek_)
, send_metadata(send_metadata_)
, revision_counter(0)
, list_object_keys_size(list_object_keys_size_)
, current_settings(std::move(settings_))
, settings_getter(settings_getter_)
{
}
@ -649,6 +641,13 @@ void DiskS3::clearDirectory(const String & path)
}
void DiskS3::moveFile(const String & from_path, const String & to_path)
{
auto settings = current_settings.get();
moveFile(from_path, to_path, settings->send_metadata);
}
void DiskS3::moveFile(const String & from_path, const String & to_path, bool send_metadata)
{
if (exists(to_path))
throw Exception("File already exists: " + to_path, ErrorCodes::FILE_ALREADY_EXISTS);
@ -681,26 +680,26 @@ void DiskS3::replaceFile(const String & from_path, const String & to_path)
std::unique_ptr<ReadBufferFromFileBase> DiskS3::readFile(const String & path, size_t buf_size, size_t, size_t, size_t, MMappedFileCache *) const
{
auto settings = current_settings.get();
auto metadata = readMeta(path);
LOG_DEBUG(log, "Read from file by path: {}. Existing S3 objects: {}",
backQuote(metadata_path + path), metadata.s3_objects.size());
auto reader = std::make_unique<ReadIndirectBufferFromS3>(client, bucket, metadata, s3_max_single_read_retries, buf_size);
return std::make_unique<SeekAvoidingReadBuffer>(std::move(reader), min_bytes_for_seek);
auto reader = std::make_unique<ReadIndirectBufferFromS3>(settings->client, bucket, metadata, settings->s3_max_single_read_retries, buf_size);
return std::make_unique<SeekAvoidingReadBuffer>(std::move(reader), settings->min_bytes_for_seek);
}
std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path, size_t buf_size, WriteMode mode)
{
bool exist = exists(path);
if (exist && readMeta(path).read_only)
throw Exception("File is read-only: " + path, ErrorCodes::PATH_ACCESS_DENIED);
auto settings = current_settings.get();
auto metadata = readOrCreateMetaForWriting(path, mode);
/// Path to store new S3 object.
auto s3_path = getRandomName();
std::optional<ObjectMetadata> object_metadata;
if (send_metadata)
if (settings->send_metadata)
{
auto revision = ++revision_counter;
object_metadata = {
@ -709,31 +708,19 @@ std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path,
s3_path = "r" + revisionToString(revision) + "-file-" + s3_path;
}
if (!exist || mode == WriteMode::Rewrite)
{
/// If metadata file exists - remove and create new.
if (exist)
removeFile(path);
LOG_DEBUG(log, "{} to file by path: {}. S3 path: {}",
mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_path + path), s3_root_path + s3_path);
auto metadata = createMeta(path);
/// Save empty metadata to disk to have ability to get file size while buffer is not finalized.
metadata.save();
auto s3_buffer = std::make_unique<WriteBufferFromS3>(
settings->client,
bucket,
metadata.s3_root_path + s3_path,
settings->s3_min_upload_part_size,
settings->s3_max_single_part_upload_size,
std::move(object_metadata),
buf_size);
LOG_DEBUG(log, "Write to file by path: {}. New S3 path: {}", backQuote(metadata_path + path), s3_root_path + s3_path);
return std::make_unique<WriteIndirectBufferFromS3>(
client, bucket, metadata, s3_path, object_metadata, min_upload_part_size, max_single_part_upload_size, buf_size);
}
else
{
auto metadata = readMeta(path);
LOG_DEBUG(log, "Append to file by path: {}. New S3 path: {}. Existing S3 objects: {}.",
backQuote(metadata_path + path), s3_root_path + s3_path, metadata.s3_objects.size());
return std::make_unique<WriteIndirectBufferFromS3>(
client, bucket, metadata, s3_path, object_metadata, min_upload_part_size, max_single_part_upload_size, buf_size);
}
return std::make_unique<WriteIndirectBufferFromS3>(std::move(s3_buffer), std::move(metadata), s3_path);
}
void DiskS3::removeMeta(const String & path, AwsS3KeyKeeper & keys)
@ -803,6 +790,8 @@ void DiskS3::removeAws(const AwsS3KeyKeeper & keys)
{
if (!keys.empty())
{
auto settings = current_settings.get();
for (const auto & chunk : keys)
{
Aws::S3::Model::Delete delkeys;
@ -812,7 +801,7 @@ void DiskS3::removeAws(const AwsS3KeyKeeper & keys)
Aws::S3::Model::DeleteObjectsRequest request;
request.SetBucket(bucket);
request.SetDelete(delkeys);
auto outcome = client->DeleteObjects(request);
auto outcome = settings->client->DeleteObjects(request);
throwIfError(outcome);
}
}
@ -889,6 +878,13 @@ Poco::Timestamp DiskS3::getLastModified(const String & path)
}
void DiskS3::createHardLink(const String & src_path, const String & dst_path)
{
auto settings = current_settings.get();
createHardLink(src_path, dst_path, settings->send_metadata);
}
void DiskS3::createHardLink(const String & src_path, const String & dst_path, bool send_metadata)
{
/// We don't need to record hardlinks created to shadow folder.
if (send_metadata && !dst_path.starts_with("shadow/"))
@ -928,28 +924,41 @@ void DiskS3::setReadOnly(const String & path)
void DiskS3::shutdown()
{
auto settings = current_settings.get();
/// This call stops any next retry attempts for ongoing S3 requests.
/// If S3 request is failed and the method below is executed S3 client immediately returns the last failed S3 request outcome.
/// If S3 is healthy nothing wrong will be happened and S3 requests will be processed in a regular way without errors.
/// This should significantly speed up shutdown process if S3 is unhealthy.
client->DisableRequestProcessing();
settings->client->DisableRequestProcessing();
}
void DiskS3::createFileOperationObject(const String & operation_name, UInt64 revision, const DiskS3::ObjectMetadata & metadata)
{
auto settings = current_settings.get();
const String key = "operations/r" + revisionToString(revision) + "-" + operation_name;
WriteBufferFromS3 buffer(client, bucket, s3_root_path + key, min_upload_part_size, max_single_part_upload_size, metadata);
WriteBufferFromS3 buffer(
settings->client,
bucket,
s3_root_path + key,
settings->s3_min_upload_part_size,
settings->s3_max_single_part_upload_size,
metadata);
buffer.write('0');
buffer.finalize();
}
void DiskS3::startup()
{
if (!send_metadata)
auto settings = current_settings.get();
if (!settings->send_metadata)
return;
LOG_INFO(log, "Starting up disk {}", name);
restore();
if (readSchemaVersion(bucket, s3_root_path) < RESTORABLE_SCHEMA_VERSION)
migrateToRestorableSchema();
@ -986,7 +995,13 @@ int DiskS3::readSchemaVersion(const String & source_bucket, const String & sourc
if (!checkObjectExists(source_bucket, source_path + SCHEMA_VERSION_OBJECT))
return version;
ReadBufferFromS3 buffer(client, source_bucket, source_path + SCHEMA_VERSION_OBJECT, s3_max_single_read_retries);
auto settings = current_settings.get();
ReadBufferFromS3 buffer(
settings->client,
source_bucket,
source_path + SCHEMA_VERSION_OBJECT,
settings->s3_max_single_read_retries);
readIntText(version, buffer);
return version;
@ -994,13 +1009,22 @@ int DiskS3::readSchemaVersion(const String & source_bucket, const String & sourc
void DiskS3::saveSchemaVersion(const int & version)
{
WriteBufferFromS3 buffer (client, bucket, s3_root_path + SCHEMA_VERSION_OBJECT, min_upload_part_size, max_single_part_upload_size);
auto settings = current_settings.get();
WriteBufferFromS3 buffer(
settings->client,
bucket,
s3_root_path + SCHEMA_VERSION_OBJECT,
settings->s3_min_upload_part_size,
settings->s3_max_single_part_upload_size);
writeIntText(version, buffer);
buffer.finalize();
}
void DiskS3::updateObjectMetadata(const String & key, const ObjectMetadata & metadata)
{
auto settings = current_settings.get();
Aws::S3::Model::CopyObjectRequest request;
request.SetCopySource(bucket + "/" + key);
request.SetBucket(bucket);
@ -1008,7 +1032,7 @@ void DiskS3::updateObjectMetadata(const String & key, const ObjectMetadata & met
request.SetMetadata(metadata);
request.SetMetadataDirective(Aws::S3::Model::MetadataDirective::REPLACE);
auto outcome = client->CopyObject(request);
auto outcome = settings->client->CopyObject(request);
throwIfError(outcome);
}
@ -1097,14 +1121,15 @@ void DiskS3::migrateToRestorableSchema()
}
}
bool DiskS3::checkObjectExists(const String & source_bucket, const String & prefix)
bool DiskS3::checkObjectExists(const String & source_bucket, const String & prefix) const
{
auto settings = current_settings.get();
Aws::S3::Model::ListObjectsV2Request request;
request.SetBucket(source_bucket);
request.SetPrefix(prefix);
request.SetMaxKeys(1);
auto outcome = client->ListObjectsV2(request);
auto outcome = settings->client->ListObjectsV2(request);
throwIfError(outcome);
return !outcome.GetResult().GetContents().empty();
@ -1112,12 +1137,13 @@ bool DiskS3::checkObjectExists(const String & source_bucket, const String & pref
bool DiskS3::checkUniqueId(const String & id) const
{
auto settings = current_settings.get();
/// Check that we have right s3 and have access rights
/// Actually interprets id as s3 object name and checks if it exists
Aws::S3::Model::ListObjectsV2Request request;
request.SetBucket(bucket);
request.SetPrefix(id);
auto resp = client->ListObjectsV2(request);
auto resp = settings->client->ListObjectsV2(request);
throwIfError(resp);
Aws::Vector<Aws::S3::Model::Object> object_list = resp.GetResult().GetContents();
@ -1127,29 +1153,31 @@ bool DiskS3::checkUniqueId(const String & id) const
return false;
}
Aws::S3::Model::HeadObjectResult DiskS3::headObject(const String & source_bucket, const String & key)
Aws::S3::Model::HeadObjectResult DiskS3::headObject(const String & source_bucket, const String & key) const
{
auto settings = current_settings.get();
Aws::S3::Model::HeadObjectRequest request;
request.SetBucket(source_bucket);
request.SetKey(key);
auto outcome = client->HeadObject(request);
auto outcome = settings->client->HeadObject(request);
throwIfError(outcome);
return outcome.GetResultWithOwnership();
}
void DiskS3::listObjects(const String & source_bucket, const String & source_path, std::function<bool(const Aws::S3::Model::ListObjectsV2Result &)> callback)
void DiskS3::listObjects(const String & source_bucket, const String & source_path, std::function<bool(const Aws::S3::Model::ListObjectsV2Result &)> callback) const
{
auto settings = current_settings.get();
Aws::S3::Model::ListObjectsV2Request request;
request.SetBucket(source_bucket);
request.SetPrefix(source_path);
request.SetMaxKeys(list_object_keys_size);
request.SetMaxKeys(settings->list_object_keys_size);
Aws::S3::Model::ListObjectsV2Outcome outcome;
do
{
outcome = client->ListObjectsV2(request);
outcome = settings->client->ListObjectsV2(request);
throwIfError(outcome);
bool should_continue = callback(outcome.GetResult());
@ -1161,14 +1189,15 @@ void DiskS3::listObjects(const String & source_bucket, const String & source_pat
} while (outcome.GetResult().GetIsTruncated());
}
void DiskS3::copyObject(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key)
void DiskS3::copyObject(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key) const
{
auto settings = current_settings.get();
Aws::S3::Model::CopyObjectRequest request;
request.SetCopySource(src_bucket + "/" + src_key);
request.SetBucket(dst_bucket);
request.SetKey(dst_key);
auto outcome = client->CopyObject(request);
auto outcome = settings->client->CopyObject(request);
throwIfError(outcome);
}
@ -1375,13 +1404,15 @@ void DiskS3::processRestoreFiles(const String & source_bucket, const String & so
void DiskS3::restoreFileOperations(const RestoreInformation & restore_information)
{
auto settings = current_settings.get();
LOG_INFO(log, "Starting restore file operations for disk {}", name);
/// Enable recording file operations if we restore to different bucket / path.
send_metadata = bucket != restore_information.source_bucket || s3_root_path != restore_information.source_path;
bool send_metadata = bucket != restore_information.source_bucket || s3_root_path != restore_information.source_path;
std::set<String> renames;
auto restore_file_operations = [this, &restore_information, &renames](auto list_result)
auto restore_file_operations = [this, &restore_information, &renames, &send_metadata](auto list_result)
{
const String rename = "rename";
const String hardlink = "hardlink";
@ -1413,7 +1444,7 @@ void DiskS3::restoreFileOperations(const RestoreInformation & restore_informatio
auto to_path = object_metadata["to_path"];
if (exists(from_path))
{
moveFile(from_path, to_path);
moveFile(from_path, to_path, send_metadata);
LOG_DEBUG(log, "Revision {}. Restored rename {} -> {}", revision, from_path, to_path);
if (restore_information.detached && isDirectory(to_path))
@ -1440,7 +1471,7 @@ void DiskS3::restoreFileOperations(const RestoreInformation & restore_informatio
if (exists(src_path))
{
createDirectories(directoryPath(dst_path));
createHardLink(src_path, dst_path);
createHardLink(src_path, dst_path, send_metadata);
LOG_DEBUG(log, "Revision {}. Restored hardlink {} -> {}", revision, src_path, dst_path);
}
}
@ -1477,8 +1508,6 @@ void DiskS3::restoreFileOperations(const RestoreInformation & restore_informatio
}
}
send_metadata = true;
LOG_INFO(log, "File operations restored for disk {}", name);
}
@ -1518,4 +1547,34 @@ void DiskS3::onFreeze(const String & path)
revision_file_buf.finalize();
}
void DiskS3::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextConstPtr context)
{
auto new_settings = settings_getter(config, "storage_configuration.disks." + name, context);
current_settings.set(std::move(new_settings));
if (AsyncExecutor * exec = dynamic_cast<AsyncExecutor*>(&getExecutor()))
exec->setMaxThreads(current_settings.get()->thread_pool_size);
}
DiskS3Settings::DiskS3Settings(
const std::shared_ptr<Aws::S3::S3Client> & client_,
size_t s3_max_single_read_retries_,
size_t s3_min_upload_part_size_,
size_t s3_max_single_part_upload_size_,
size_t min_bytes_for_seek_,
bool send_metadata_,
int thread_pool_size_,
int list_object_keys_size_)
: client(client_)
, s3_max_single_read_retries(s3_max_single_read_retries_)
, s3_min_upload_part_size(s3_min_upload_part_size_)
, s3_max_single_part_upload_size(s3_max_single_part_upload_size_)
, min_bytes_for_seek(min_bytes_for_seek_)
, send_metadata(send_metadata_)
, thread_pool_size(thread_pool_size_)
, list_object_keys_size(list_object_keys_size_)
{
}
}

View File

@ -2,9 +2,9 @@
#include <atomic>
#include <common/logger_useful.h>
#include <Common/MultiVersion.h>
#include "Disks/DiskFactory.h"
#include "Disks/Executor.h"
#include "ProxyConfiguration.h"
#include <aws/s3/S3Client.h>
#include <aws/s3/model/HeadObjectResult.h>
@ -17,6 +17,29 @@
namespace DB
{
/// Settings for DiskS3 that can be changed in runtime.
struct DiskS3Settings
{
DiskS3Settings(
const std::shared_ptr<Aws::S3::S3Client> & client_,
size_t s3_max_single_read_retries_,
size_t s3_min_upload_part_size_,
size_t s3_max_single_part_upload_size_,
size_t min_bytes_for_seek_,
bool send_metadata_,
int thread_pool_size_,
int list_object_keys_size_);
std::shared_ptr<Aws::S3::S3Client> client;
size_t s3_max_single_read_retries;
size_t s3_min_upload_part_size;
size_t s3_max_single_part_upload_size;
size_t min_bytes_for_seek;
bool send_metadata;
int thread_pool_size;
int list_object_keys_size;
};
/**
* Storage for persisting data in S3 and metadata on the local disk.
* Files are represented by file in local filesystem (clickhouse_root/disks/disk_name/path/to/file)
@ -27,6 +50,8 @@ class DiskS3 : public IDisk
public:
using ObjectMetadata = std::map<std::string, std::string>;
using Futures = std::vector<std::future<void>>;
using SettingsPtr = std::unique_ptr<DiskS3Settings>;
using GetDiskSettings = std::function<SettingsPtr(const Poco::Util::AbstractConfiguration &, const String, ContextConstPtr)>;
friend class DiskS3Reservation;
@ -36,18 +61,11 @@ public:
DiskS3(
String name_,
std::shared_ptr<Aws::S3::S3Client> client_,
std::shared_ptr<S3::ProxyConfiguration> proxy_configuration_,
String bucket_,
String s3_root_path_,
String metadata_path_,
UInt64 s3_max_single_read_retries_,
size_t min_upload_part_size_,
size_t max_single_part_upload_size_,
size_t min_bytes_for_seek_,
bool send_metadata_,
int thread_pool_size_,
int list_object_keys_size_);
SettingsPtr settings_,
GetDiskSettings settings_getter_);
const String & getName() const override { return name; }
@ -82,7 +100,7 @@ public:
DiskDirectoryIteratorPtr iterateDirectory(const String & path) override;
void moveFile(const String & from_path, const String & to_path) override;
void moveFile(const String & from_path, const String & to_path, bool send_metadata);
void replaceFile(const String & from_path, const String & to_path) override;
void listFiles(const String & path, std::vector<String> & file_names) override;
@ -109,6 +127,7 @@ public:
void removeSharedRecursive(const String & path, bool keep_s3) override;
void createHardLink(const String & src_path, const String & dst_path) override;
void createHardLink(const String & src_path, const String & dst_path, bool send_metadata);
void setLastModified(const String & path, const Poco::Timestamp & timestamp) override;
@ -122,6 +141,8 @@ public:
void shutdown() override;
void startup() override;
/// Return some uniq string for file
/// Required for distinguish different copies of the same part on S3
String getUniqueId(const String & path) const override;
@ -130,15 +151,11 @@ public:
/// Required for S3 to ensure that replica has access to data wroten by other node
bool checkUniqueId(const String & id) const override;
/// Actions performed after disk creation.
void startup();
/// Restore S3 metadata files on file system.
void restore();
/// Dumps current revision counter into file 'revision.txt' at given path.
void onFreeze(const String & path) override;
void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextConstPtr context) override;
private:
bool tryReserve(UInt64 bytes);
@ -146,6 +163,7 @@ private:
void removeMetaRecursive(const String & path, AwsS3KeyKeeper & keys);
void removeAws(const AwsS3KeyKeeper & keys);
Metadata readOrCreateMetaForWriting(const String & path, WriteMode mode);
Metadata readMeta(const String & path) const;
Metadata createMeta(const String & path) const;
@ -153,7 +171,7 @@ private:
/// Converts revision to binary string with leading zeroes (64 bit).
static String revisionToString(UInt64 revision);
bool checkObjectExists(const String & source_bucket, const String & prefix);
bool checkObjectExists(const String & source_bucket, const String & prefix) const;
void findLastRevision();
int readSchemaVersion(const String & source_bucket, const String & source_path);
@ -163,10 +181,12 @@ private:
void migrateToRestorableSchemaRecursive(const String & path, Futures & results);
void migrateToRestorableSchema();
Aws::S3::Model::HeadObjectResult headObject(const String & source_bucket, const String & key);
void listObjects(const String & source_bucket, const String & source_path, std::function<bool(const Aws::S3::Model::ListObjectsV2Result &)> callback);
void copyObject(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key);
Aws::S3::Model::HeadObjectResult headObject(const String & source_bucket, const String & key) const;
void listObjects(const String & source_bucket, const String & source_path, std::function<bool(const Aws::S3::Model::ListObjectsV2Result &)> callback) const;
void copyObject(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key) const;
/// Restore S3 metadata files on file system.
void restore();
void readRestoreInformation(RestoreInformation & restore_information);
void restoreFiles(const RestoreInformation & restore_information);
void processRestoreFiles(const String & source_bucket, const String & source_path, std::vector<String> keys);
@ -181,29 +201,24 @@ private:
static String pathToDetached(const String & source_path);
const String name;
std::shared_ptr<Aws::S3::S3Client> client;
std::shared_ptr<S3::ProxyConfiguration> proxy_configuration;
const String bucket;
const String s3_root_path;
String metadata_path;
UInt64 s3_max_single_read_retries;
size_t min_upload_part_size;
size_t max_single_part_upload_size;
size_t min_bytes_for_seek;
bool send_metadata;
const String metadata_path;
MultiVersion<DiskS3Settings> current_settings;
/// Gets disk settings from context.
GetDiskSettings settings_getter;
UInt64 reserved_bytes = 0;
UInt64 reservation_count = 0;
std::mutex reservation_mutex;
std::atomic<UInt64> revision_counter;
std::atomic<UInt64> revision_counter = 0;
static constexpr UInt64 LATEST_REVISION = std::numeric_limits<UInt64>::max();
static constexpr UInt64 UNKNOWN_REVISION = 0;
/// File at path {metadata_path}/restore contains metadata restore information
inline static const String RESTORE_FILE_NAME = "restore";
/// The number of keys listed in one request (1000 is max value)
int list_object_keys_size;
/// Key has format: ../../r{revision}-{operation}
const re2::RE2 key_regexp {".*/r(\\d+)-(\\w+).*"};

View File

@ -19,6 +19,7 @@
#include "ProxyConfiguration.h"
#include "ProxyListConfiguration.h"
#include "ProxyResolverConfiguration.h"
#include "Disks/DiskRestartProxy.h"
namespace DB
@ -31,84 +32,132 @@ namespace ErrorCodes
namespace
{
void checkWriteAccess(IDisk & disk)
{
auto file = disk.writeFile("test_acl", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite);
file->write("test", 4);
}
void checkWriteAccess(IDisk & disk)
{
auto file = disk.writeFile("test_acl", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite);
file->write("test", 4);
}
void checkReadAccess(const String & disk_name, IDisk & disk)
{
auto file = disk.readFile("test_acl", DBMS_DEFAULT_BUFFER_SIZE);
String buf(4, '0');
file->readStrict(buf.data(), 4);
if (buf != "test")
throw Exception("No read access to S3 bucket in disk " + disk_name, ErrorCodes::PATH_ACCESS_DENIED);
}
void checkReadAccess(const String & disk_name, IDisk & disk)
{
auto file = disk.readFile("test_acl", DBMS_DEFAULT_BUFFER_SIZE);
String buf(4, '0');
file->readStrict(buf.data(), 4);
if (buf != "test")
throw Exception("No read access to S3 bucket in disk " + disk_name, ErrorCodes::PATH_ACCESS_DENIED);
}
void checkRemoveAccess(IDisk & disk) { disk.removeFile("test_acl"); }
void checkRemoveAccess(IDisk & disk) { disk.removeFile("test_acl"); }
std::shared_ptr<S3::ProxyResolverConfiguration> getProxyResolverConfiguration(
const String & prefix, const Poco::Util::AbstractConfiguration & proxy_resolver_config)
{
auto endpoint = Poco::URI(proxy_resolver_config.getString(prefix + ".endpoint"));
auto proxy_scheme = proxy_resolver_config.getString(prefix + ".proxy_scheme");
if (proxy_scheme != "http" && proxy_scheme != "https")
throw Exception("Only HTTP/HTTPS schemas allowed in proxy resolver config: " + proxy_scheme, ErrorCodes::BAD_ARGUMENTS);
auto proxy_port = proxy_resolver_config.getUInt(prefix + ".proxy_port");
std::shared_ptr<S3::ProxyResolverConfiguration> getProxyResolverConfiguration(
const String & prefix, const Poco::Util::AbstractConfiguration & proxy_resolver_config)
{
auto endpoint = Poco::URI(proxy_resolver_config.getString(prefix + ".endpoint"));
auto proxy_scheme = proxy_resolver_config.getString(prefix + ".proxy_scheme");
if (proxy_scheme != "http" && proxy_scheme != "https")
throw Exception("Only HTTP/HTTPS schemas allowed in proxy resolver config: " + proxy_scheme, ErrorCodes::BAD_ARGUMENTS);
auto proxy_port = proxy_resolver_config.getUInt(prefix + ".proxy_port");
LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy resolver: {}, Scheme: {}, Port: {}",
endpoint.toString(), proxy_scheme, proxy_port);
LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy resolver: {}, Scheme: {}, Port: {}",
endpoint.toString(), proxy_scheme, proxy_port);
return std::make_shared<S3::ProxyResolverConfiguration>(endpoint, proxy_scheme, proxy_port);
}
return std::make_shared<S3::ProxyResolverConfiguration>(endpoint, proxy_scheme, proxy_port);
}
std::shared_ptr<S3::ProxyListConfiguration> getProxyListConfiguration(
const String & prefix, const Poco::Util::AbstractConfiguration & proxy_config)
{
std::vector<String> keys;
proxy_config.keys(prefix, keys);
std::shared_ptr<S3::ProxyListConfiguration> getProxyListConfiguration(
const String & prefix, const Poco::Util::AbstractConfiguration & proxy_config)
{
std::vector<String> keys;
proxy_config.keys(prefix, keys);
std::vector<Poco::URI> proxies;
for (const auto & key : keys)
if (startsWith(key, "uri"))
{
Poco::URI proxy_uri(proxy_config.getString(prefix + "." + key));
if (proxy_uri.getScheme() != "http" && proxy_uri.getScheme() != "https")
throw Exception("Only HTTP/HTTPS schemas allowed in proxy uri: " + proxy_uri.toString(), ErrorCodes::BAD_ARGUMENTS);
if (proxy_uri.getHost().empty())
throw Exception("Empty host in proxy uri: " + proxy_uri.toString(), ErrorCodes::BAD_ARGUMENTS);
proxies.push_back(proxy_uri);
LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy: {}", proxy_uri.toString());
}
if (!proxies.empty())
return std::make_shared<S3::ProxyListConfiguration>(proxies);
return nullptr;
}
std::shared_ptr<S3::ProxyConfiguration> getProxyConfiguration(const String & prefix, const Poco::Util::AbstractConfiguration & config)
{
if (!config.has(prefix + ".proxy"))
return nullptr;
std::vector<String> config_keys;
config.keys(prefix + ".proxy", config_keys);
if (auto resolver_configs = std::count(config_keys.begin(), config_keys.end(), "resolver"))
std::vector<Poco::URI> proxies;
for (const auto & key : keys)
if (startsWith(key, "uri"))
{
if (resolver_configs > 1)
throw Exception("Multiple proxy resolver configurations aren't allowed", ErrorCodes::BAD_ARGUMENTS);
Poco::URI proxy_uri(proxy_config.getString(prefix + "." + key));
return getProxyResolverConfiguration(prefix + ".proxy.resolver", config);
if (proxy_uri.getScheme() != "http" && proxy_uri.getScheme() != "https")
throw Exception("Only HTTP/HTTPS schemas allowed in proxy uri: " + proxy_uri.toString(), ErrorCodes::BAD_ARGUMENTS);
if (proxy_uri.getHost().empty())
throw Exception("Empty host in proxy uri: " + proxy_uri.toString(), ErrorCodes::BAD_ARGUMENTS);
proxies.push_back(proxy_uri);
LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy: {}", proxy_uri.toString());
}
return getProxyListConfiguration(prefix + ".proxy", config);
if (!proxies.empty())
return std::make_shared<S3::ProxyListConfiguration>(proxies);
return nullptr;
}
std::shared_ptr<S3::ProxyConfiguration> getProxyConfiguration(const String & prefix, const Poco::Util::AbstractConfiguration & config)
{
if (!config.has(prefix + ".proxy"))
return nullptr;
std::vector<String> config_keys;
config.keys(prefix + ".proxy", config_keys);
if (auto resolver_configs = std::count(config_keys.begin(), config_keys.end(), "resolver"))
{
if (resolver_configs > 1)
throw Exception("Multiple proxy resolver configurations aren't allowed", ErrorCodes::BAD_ARGUMENTS);
return getProxyResolverConfiguration(prefix + ".proxy.resolver", config);
}
return getProxyListConfiguration(prefix + ".proxy", config);
}
std::shared_ptr<Aws::S3::S3Client>
getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextConstPtr context)
{
S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
context->getRemoteHostFilter(), context->getGlobalContext()->getSettingsRef().s3_max_redirects);
S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint")));
if (uri.key.back() != '/')
throw Exception("S3 path must ends with '/', but '" + uri.key + "' doesn't.", ErrorCodes::BAD_ARGUMENTS);
client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", 10000);
client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 5000);
client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100);
client_configuration.endpointOverride = uri.endpoint;
auto proxy_config = getProxyConfiguration(config_prefix, config);
if (proxy_config)
client_configuration.perRequestConfiguration
= [proxy_config](const auto & request) { return proxy_config->getConfiguration(request); };
client_configuration.retryStrategy
= std::make_shared<Aws::Client::DefaultRetryStrategy>(config.getUInt(config_prefix + ".retry_attempts", 10));
return S3::ClientFactory::instance().create(
client_configuration,
uri.is_virtual_hosted_style,
config.getString(config_prefix + ".access_key_id", ""),
config.getString(config_prefix + ".secret_access_key", ""),
config.getString(config_prefix + ".server_side_encryption_customer_key_base64", ""),
{},
config.getBool(config_prefix + ".use_environment_credentials", config.getBool("s3.use_environment_credentials", false)),
config.getBool(config_prefix + ".use_insecure_imds_request", config.getBool("s3.use_insecure_imds_request", false)));
}
std::unique_ptr<DiskS3Settings> getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextConstPtr context)
{
return std::make_unique<DiskS3Settings>(
getClient(config, config_prefix, context),
config.getUInt64(config_prefix + ".s3_max_single_read_retries", context->getSettingsRef().s3_max_single_read_retries),
config.getUInt64(config_prefix + ".s3_min_upload_part_size", context->getSettingsRef().s3_min_upload_part_size),
config.getUInt64(config_prefix + ".s3_max_single_part_upload_size", context->getSettingsRef().s3_max_single_part_upload_size),
config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024),
config.getBool(config_prefix + ".send_metadata", false),
config.getInt(config_prefix + ".thread_pool_size", 16),
config.getInt(config_prefix + ".list_object_keys_size", 1000));
}
}
@ -118,56 +167,20 @@ void registerDiskS3(DiskFactory & factory)
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
ContextConstPtr context) -> DiskPtr {
Poco::File disk{context->getPath() + "disks/" + name};
disk.createDirectories();
S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
context->getRemoteHostFilter(),
context->getGlobalContext()->getSettingsRef().s3_max_redirects);
S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint")));
if (uri.key.back() != '/')
throw Exception("S3 path must ends with '/', but '" + uri.key + "' doesn't.", ErrorCodes::BAD_ARGUMENTS);
client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", 10000);
client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 5000);
client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100);
client_configuration.endpointOverride = uri.endpoint;
auto proxy_config = getProxyConfiguration(config_prefix, config);
if (proxy_config)
client_configuration.perRequestConfiguration = [proxy_config](const auto & request) { return proxy_config->getConfiguration(request); };
client_configuration.retryStrategy = std::make_shared<Aws::Client::DefaultRetryStrategy>(
config.getUInt(config_prefix + ".retry_attempts", 10));
auto client = S3::ClientFactory::instance().create(
client_configuration,
uri.is_virtual_hosted_style,
config.getString(config_prefix + ".access_key_id", ""),
config.getString(config_prefix + ".secret_access_key", ""),
config.getString(config_prefix + ".server_side_encryption_customer_key_base64", ""),
{},
config.getBool(config_prefix + ".use_environment_credentials", config.getBool("s3.use_environment_credentials", false)),
config.getBool(config_prefix + ".use_insecure_imds_request", config.getBool("s3.use_insecure_imds_request", false))
);
String metadata_path = config.getString(config_prefix + ".metadata_path", context->getPath() + "disks/" + name + "/");
Poco::File (metadata_path).createDirectories();
auto s3disk = std::make_shared<DiskS3>(
std::shared_ptr<IDisk> s3disk = std::make_shared<DiskS3>(
name,
client,
proxy_config,
uri.bucket,
uri.key,
metadata_path,
context->getSettingsRef().s3_max_single_read_retries,
context->getSettingsRef().s3_min_upload_part_size,
context->getSettingsRef().s3_max_single_part_upload_size,
config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024),
config.getBool(config_prefix + ".send_metadata", false),
config.getInt(config_prefix + ".thread_pool_size", 16),
config.getInt(config_prefix + ".list_object_keys_size", 1000));
getSettings(config, config_prefix, context),
getSettings);
/// This code is used only to check access to the corresponding disk.
if (!config.getBool(config_prefix + ".skip_access_check", false))
@ -177,7 +190,6 @@ void registerDiskS3(DiskFactory & factory)
checkRemoveAccess(*s3disk);
}
s3disk->restore();
s3disk->startup();
bool cache_enabled = config.getBool(config_prefix + ".cache_enabled", true);
@ -197,10 +209,10 @@ void registerDiskS3(DiskFactory & factory)
|| path.ends_with("txt") || path.ends_with("dat");
};
return std::make_shared<DiskCacheWrapper>(s3disk, cache_disk, cache_file_predicate);
s3disk = std::make_shared<DiskCacheWrapper>(s3disk, cache_disk, cache_file_predicate);
}
return s3disk;
return std::make_shared<DiskRestartProxy>(s3disk);
};
factory.registerDiskType("s3", creator);
}

View File

@ -8,7 +8,7 @@ PEERDIR(
SRCS(
<? find . -name '*.cpp' | grep -v -F tests | grep -v -F S3 | sed 's/^\.\// /' | sort ?>
<? find . -name '*.cpp' | grep -v -F tests | grep -v -F examples | grep -v -F S3 | sed 's/^\.\// /' | sort ?>
)
END()

View File

@ -10,7 +10,7 @@ PEERDIR(
SRCS(
<? find . -name '*.cpp' | grep -v -F tests | sed 's/^\.\// /' | sort ?>
<? find . -name '*.cpp' | grep -v -F tests | grep -v -F examples | sed 's/^\.\// /' | sort ?>
)
END()

View File

@ -149,8 +149,7 @@ ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnsWithTypeAndName & a
const NullMap & src_null_map = assert_cast<const ColumnUInt8 &>(*null_map_column).getData();
for (size_t i = 0, size = result_null_map.size(); i < size; ++i)
if (src_null_map[i])
result_null_map[i] = 1;
result_null_map[i] |= src_null_map[i];
result_null_map_column = std::move(mutable_result_null_map_column);
}
@ -179,10 +178,8 @@ NullPresence getNullPresense(const ColumnsWithTypeAndName & args)
for (const auto & elem : args)
{
if (!res.has_nullable)
res.has_nullable = elem.type->isNullable();
if (!res.has_null_constant)
res.has_null_constant = elem.type->onlyNull();
res.has_nullable |= elem.type->isNullable();
res.has_null_constant |= elem.type->onlyNull();
}
return res;

View File

@ -15,6 +15,8 @@ namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN;
extern const int DECIMAL_OVERFLOW;
extern const int ARGUMENT_OUT_OF_BOUND;
}
enum class AggregateOperation
@ -22,7 +24,8 @@ enum class AggregateOperation
min,
max,
sum,
average
average,
product
};
/**
@ -54,6 +57,12 @@ struct ArrayAggregateResultImpl<ArrayElement, AggregateOperation::average>
using Result = Float64;
};
template <typename ArrayElement>
struct ArrayAggregateResultImpl<ArrayElement, AggregateOperation::product>
{
using Result = Float64;
};
template <typename ArrayElement>
struct ArrayAggregateResultImpl<ArrayElement, AggregateOperation::sum>
{
@ -86,7 +95,7 @@ struct ArrayAggregateImpl
using Types = std::decay_t<decltype(types)>;
using DataType = typename Types::LeftType;
if constexpr (aggregate_operation == AggregateOperation::average)
if constexpr (aggregate_operation == AggregateOperation::average || aggregate_operation == AggregateOperation::product)
{
result = std::make_shared<DataTypeFloat64>();
@ -124,17 +133,17 @@ struct ArrayAggregateImpl
template <typename Element>
static NO_SANITIZE_UNDEFINED bool executeType(const ColumnPtr & mapped, const ColumnArray::Offsets & offsets, ColumnPtr & res_ptr)
{
using Result = ArrayAggregateResult<Element, aggregate_operation>;
using ResultType = ArrayAggregateResult<Element, aggregate_operation>;
using ColVecType = std::conditional_t<IsDecimalNumber<Element>, ColumnDecimal<Element>, ColumnVector<Element>>;
using ColVecResult = std::conditional_t<IsDecimalNumber<Result>, ColumnDecimal<Result>, ColumnVector<Result>>;
using ColVecResultType = std::conditional_t<IsDecimalNumber<ResultType>, ColumnDecimal<ResultType>, ColumnVector<ResultType>>;
/// For average of array we return Float64 as result, but we want to keep precision
/// so we convert to Float64 as last step, but intermediate sum is represented as result of sum operation
static constexpr bool is_average_operation = aggregate_operation == AggregateOperation::average;
/// For average and product of array we return Float64 as result, but we want to keep precision
/// so we convert to Float64 as last step, but intermediate value is represented as result of sum operation
static constexpr bool is_average_or_product_operation = aggregate_operation == AggregateOperation::average ||
aggregate_operation == AggregateOperation::product;
using SummAggregationType = ArrayAggregateResult<Element, AggregateOperation::sum>;
using AggregationType = std::conditional_t<is_average_operation, SummAggregationType, Result>;
using AggregationType = std::conditional_t<is_average_or_product_operation, SummAggregationType, ResultType>;
const ColVecType * column = checkAndGetColumn<ColVecType>(&*mapped);
@ -147,18 +156,15 @@ struct ArrayAggregateImpl
return false;
const AggregationType x = column_const->template getValue<Element>(); // NOLINT
const typename ColVecType::Container & data
= checkAndGetColumn<ColVecType>(&column_const->getDataColumn())->getData();
const auto & data = checkAndGetColumn<ColVecType>(&column_const->getDataColumn())->getData();
typename ColVecResult::MutablePtr res_column;
typename ColVecResultType::MutablePtr res_column;
if constexpr (IsDecimalNumber<Element>)
{
res_column = ColVecResult::create(offsets.size(), data.getScale());
}
res_column = ColVecResultType::create(offsets.size(), data.getScale());
else
res_column = ColVecResult::create(offsets.size());
res_column = ColVecResultType::create(offsets.size());
typename ColVecResult::Container & res = res_column->getData();
auto & res = res_column->getData();
size_t pos = 0;
for (size_t i = 0; i < offsets.size(); ++i)
@ -178,13 +184,45 @@ struct ArrayAggregateImpl
{
if constexpr (IsDecimalNumber<Element>)
{
res[i] = DecimalUtils::convertTo<Result>(x, data.getScale());
res[i] = DecimalUtils::convertTo<ResultType>(x, data.getScale());
}
else
{
res[i] = x;
}
}
else if constexpr (aggregate_operation == AggregateOperation::product)
{
size_t array_size = offsets[i] - pos;
AggregationType product = x;
if constexpr (IsDecimalNumber<Element>)
{
using T = decltype(x.value);
T x_val = x.value;
for (size_t array_index = 1; array_index < array_size; ++array_index)
{
T product_val = product.value;
if (common::mulOverflow(x_val, product_val, product.value))
throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Decimal math overflow");
}
auto result_scale = data.getScale() * array_size;
if (unlikely(result_scale > DecimalUtils::max_precision<AggregationType>))
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Scale {} is out of bounds", result_scale);
res[i] = DecimalUtils::convertTo<ResultType>(product, data.getScale() * array_size);
}
else
{
for (size_t array_index = 1; array_index < array_size; ++array_index)
product = product * x;
res[i] = product;
}
}
pos = offsets[i];
}
@ -193,30 +231,30 @@ struct ArrayAggregateImpl
return true;
}
const typename ColVecType::Container & data = column->getData();
const auto & data = column->getData();
typename ColVecResult::MutablePtr res_column;
typename ColVecResultType::MutablePtr res_column;
if constexpr (IsDecimalNumber<Element>)
res_column = ColVecResult::create(offsets.size(), data.getScale());
res_column = ColVecResultType::create(offsets.size(), data.getScale());
else
res_column = ColVecResult::create(offsets.size());
res_column = ColVecResultType::create(offsets.size());
typename ColVecResult::Container & res = res_column->getData();
typename ColVecResultType::Container & res = res_column->getData();
size_t pos = 0;
for (size_t i = 0; i < offsets.size(); ++i)
{
AggregationType s = 0;
AggregationType aggregate_value = 0;
/// Array is empty
if (offsets[i] == pos)
{
res[i] = s;
res[i] = aggregate_value;
continue;
}
size_t count = 1;
s = data[pos]; // NOLINT
aggregate_value = data[pos]; // NOLINT
++pos;
for (; pos < offsets[i]; ++pos)
@ -226,20 +264,36 @@ struct ArrayAggregateImpl
if constexpr (aggregate_operation == AggregateOperation::sum ||
aggregate_operation == AggregateOperation::average)
{
s += element;
aggregate_value += element;
}
else if constexpr (aggregate_operation == AggregateOperation::min)
{
if (element < s)
if (element < aggregate_value)
{
s = element;
aggregate_value = element;
}
}
else if constexpr (aggregate_operation == AggregateOperation::max)
{
if (element > s)
if (element > aggregate_value)
{
s = element;
aggregate_value = element;
}
}
else if constexpr (aggregate_operation == AggregateOperation::product)
{
if constexpr (IsDecimalNumber<Element>)
{
using AggregateValueDecimalUnderlyingValue = decltype(aggregate_value.value);
AggregateValueDecimalUnderlyingValue current_aggregate_value = aggregate_value.value;
AggregateValueDecimalUnderlyingValue element_value = static_cast<AggregateValueDecimalUnderlyingValue>(element.value);
if (common::mulOverflow(current_aggregate_value, element_value, aggregate_value.value))
throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Decimal math overflow");
}
else
{
aggregate_value *= element;
}
}
@ -250,17 +304,26 @@ struct ArrayAggregateImpl
{
if constexpr (IsDecimalNumber<Element>)
{
s = s / count;
res[i] = DecimalUtils::convertTo<Result>(s, data.getScale());
aggregate_value = aggregate_value / count;
res[i] = DecimalUtils::convertTo<ResultType>(aggregate_value, data.getScale());
}
else
{
res[i] = static_cast<Result>(s) / count;
res[i] = static_cast<ResultType>(aggregate_value) / count;
}
}
else if constexpr (aggregate_operation == AggregateOperation::product && IsDecimalNumber<Element>)
{
auto result_scale = data.getScale() * count;
if (unlikely(result_scale > DecimalUtils::max_precision<AggregationType>))
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Scale {} is out of bounds", result_scale);
res[i] = DecimalUtils::convertTo<ResultType>(aggregate_value, result_scale);
}
else
{
res[i] = s;
res[i] = aggregate_value;
}
}
@ -291,7 +354,7 @@ struct ArrayAggregateImpl
executeType<Decimal128>(mapped, offsets, res))
return res;
else
throw Exception("Unexpected column for arraySum: " + mapped->getName(), ErrorCodes::ILLEGAL_COLUMN);
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected column for arraySum: {}" + mapped->getName());
}
};
@ -307,12 +370,16 @@ using FunctionArraySum = FunctionArrayMapped<ArrayAggregateImpl<AggregateOperati
struct NameArrayAverage { static constexpr auto name = "arrayAvg"; };
using FunctionArrayAverage = FunctionArrayMapped<ArrayAggregateImpl<AggregateOperation::average>, NameArrayAverage>;
struct NameArrayProduct { static constexpr auto name = "arrayProduct"; };
using FunctionArrayProduct = FunctionArrayMapped<ArrayAggregateImpl<AggregateOperation::product>, NameArrayProduct>;
void registerFunctionArrayAggregation(FunctionFactory & factory)
{
factory.registerFunction<FunctionArrayMin>();
factory.registerFunction<FunctionArrayMax>();
factory.registerFunction<FunctionArraySum>();
factory.registerFunction<FunctionArrayAverage>();
factory.registerFunction<FunctionArrayProduct>();
}
}

View File

@ -35,7 +35,7 @@ PEERDIR(
# "Arcadia" build is slightly deficient. It lacks many libraries that we need.
SRCS(
<? find . -name '*.cpp' | grep -i -v -P 'tests|Bitmap|abtesting' | sed 's/^\.\// /' | sort ?>
<? find . -name '*.cpp' | grep -i -v -P 'tests|Bitmap|abtesting' | grep -v -F examples | sed 's/^\.\// /' | sort ?>
)
END()

View File

@ -9,6 +9,8 @@
#if defined(__OpenBSD__) || defined(__FreeBSD__) || defined (__ANDROID__)
# include <sys/endian.h>
#elif defined(__sun)
# include <endian.h>
#elif defined(__APPLE__)
# include <libkern/OSByteOrder.h>

View File

@ -24,9 +24,9 @@ struct ConnectionTimeouts
ConnectionTimeouts() = default;
ConnectionTimeouts(const Poco::Timespan & connection_timeout_,
const Poco::Timespan & send_timeout_,
const Poco::Timespan & receive_timeout_)
ConnectionTimeouts(Poco::Timespan connection_timeout_,
Poco::Timespan send_timeout_,
Poco::Timespan receive_timeout_)
: connection_timeout(connection_timeout_),
send_timeout(send_timeout_),
receive_timeout(receive_timeout_),
@ -38,10 +38,10 @@ struct ConnectionTimeouts
{
}
ConnectionTimeouts(const Poco::Timespan & connection_timeout_,
const Poco::Timespan & send_timeout_,
const Poco::Timespan & receive_timeout_,
const Poco::Timespan & tcp_keep_alive_timeout_)
ConnectionTimeouts(Poco::Timespan connection_timeout_,
Poco::Timespan send_timeout_,
Poco::Timespan receive_timeout_,
Poco::Timespan tcp_keep_alive_timeout_)
: connection_timeout(connection_timeout_),
send_timeout(send_timeout_),
receive_timeout(receive_timeout_),
@ -52,11 +52,11 @@ struct ConnectionTimeouts
receive_data_timeout(receive_timeout_)
{
}
ConnectionTimeouts(const Poco::Timespan & connection_timeout_,
const Poco::Timespan & send_timeout_,
const Poco::Timespan & receive_timeout_,
const Poco::Timespan & tcp_keep_alive_timeout_,
const Poco::Timespan & http_keep_alive_timeout_)
ConnectionTimeouts(Poco::Timespan connection_timeout_,
Poco::Timespan send_timeout_,
Poco::Timespan receive_timeout_,
Poco::Timespan tcp_keep_alive_timeout_,
Poco::Timespan http_keep_alive_timeout_)
: connection_timeout(connection_timeout_),
send_timeout(send_timeout_),
receive_timeout(receive_timeout_),
@ -68,14 +68,14 @@ struct ConnectionTimeouts
{
}
ConnectionTimeouts(const Poco::Timespan & connection_timeout_,
const Poco::Timespan & send_timeout_,
const Poco::Timespan & receive_timeout_,
const Poco::Timespan & tcp_keep_alive_timeout_,
const Poco::Timespan & http_keep_alive_timeout_,
const Poco::Timespan & secure_connection_timeout_,
const Poco::Timespan & receive_hello_timeout_,
const Poco::Timespan & receive_data_timeout_)
ConnectionTimeouts(Poco::Timespan connection_timeout_,
Poco::Timespan send_timeout_,
Poco::Timespan receive_timeout_,
Poco::Timespan tcp_keep_alive_timeout_,
Poco::Timespan http_keep_alive_timeout_,
Poco::Timespan secure_connection_timeout_,
Poco::Timespan receive_hello_timeout_,
Poco::Timespan receive_data_timeout_)
: connection_timeout(connection_timeout_),
send_timeout(send_timeout_),
receive_timeout(receive_timeout_),
@ -87,7 +87,7 @@ struct ConnectionTimeouts
{
}
static Poco::Timespan saturate(const Poco::Timespan & timespan, const Poco::Timespan & limit)
static Poco::Timespan saturate(Poco::Timespan timespan, Poco::Timespan limit)
{
if (limit.totalMicroseconds() == 0)
return timespan;
@ -95,7 +95,7 @@ struct ConnectionTimeouts
return (timespan > limit) ? limit : timespan;
}
ConnectionTimeouts getSaturated(const Poco::Timespan & limit) const
ConnectionTimeouts getSaturated(Poco::Timespan limit) const
{
return ConnectionTimeouts(saturate(connection_timeout, limit),
saturate(send_timeout, limit),

View File

@ -0,0 +1,46 @@
#include <IO/ReadBufferFromFileDecorator.h>
namespace DB
{
ReadBufferFromFileDecorator::ReadBufferFromFileDecorator(std::unique_ptr<ReadBufferFromFileBase> impl_)
: impl(std::move(impl_))
{
swap(*impl);
}
std::string ReadBufferFromFileDecorator::getFileName() const
{
return impl->getFileName();
}
off_t ReadBufferFromFileDecorator::getPosition()
{
swap(*impl);
auto position = impl->getPosition();
swap(*impl);
return position;
}
off_t ReadBufferFromFileDecorator::seek(off_t off, int whence)
{
swap(*impl);
auto result = impl->seek(off, whence);
swap(*impl);
return result;
}
bool ReadBufferFromFileDecorator::nextImpl()
{
swap(*impl);
auto result = impl->next();
swap(*impl);
return result;
}
}

View File

@ -0,0 +1,27 @@
#pragma once
#include <IO/ReadBufferFromFileBase.h>
namespace DB
{
/// Delegates all reads to underlying buffer. Doesn't have own memory.
class ReadBufferFromFileDecorator : public ReadBufferFromFileBase
{
public:
explicit ReadBufferFromFileDecorator(std::unique_ptr<ReadBufferFromFileBase> impl_);
std::string getFileName() const override;
off_t getPosition() override;
off_t seek(off_t off, int whence) override;
bool nextImpl() override;
protected:
std::unique_ptr<ReadBufferFromFileBase> impl;
};
}

View File

@ -8,7 +8,7 @@
namespace DB
{
using AsyncCallback = std::function<void(int, const Poco::Timespan &, const std::string &)>;
using AsyncCallback = std::function<void(int, Poco::Timespan, const std::string &)>;
/// Works with the ready Poco::Net::Socket. Blocking operations.
class ReadBufferFromPocoSocket : public BufferWithOwnMemory<ReadBuffer>

View File

@ -4,28 +4,10 @@
namespace DB
{
SeekAvoidingReadBuffer::SeekAvoidingReadBuffer(std::unique_ptr<ReadBufferFromFileBase> nested_, UInt64 min_bytes_for_seek_)
: nested(std::move(nested_))
SeekAvoidingReadBuffer::SeekAvoidingReadBuffer(std::unique_ptr<ReadBufferFromFileBase> impl_, UInt64 min_bytes_for_seek_)
: ReadBufferFromFileDecorator(std::move(impl_))
, min_bytes_for_seek(min_bytes_for_seek_)
{
swap(*nested);
}
std::string SeekAvoidingReadBuffer::getFileName() const
{
return nested->getFileName();
}
off_t SeekAvoidingReadBuffer::getPosition()
{
swap(*nested);
off_t position = nested->getPosition();
swap(*nested);
return position;
}
{ }
off_t SeekAvoidingReadBuffer::seek(off_t off, int whence)
{
@ -39,28 +21,13 @@ off_t SeekAvoidingReadBuffer::seek(off_t off, int whence)
if (whence == SEEK_SET && off >= position && off < position + static_cast<off_t>(min_bytes_for_seek))
{
swap(*nested);
nested->ignore(off - position);
swap(*nested);
position = off;
}
else
{
swap(*nested);
position = nested->seek(off, whence);
swap(*nested);
swap(*impl);
impl->ignore(off - position);
swap(*impl);
return off;
}
return position;
}
bool SeekAvoidingReadBuffer::nextImpl()
{
swap(*nested);
bool nested_result = nested->next();
swap(*nested);
return nested_result;
return ReadBufferFromFileDecorator::seek(off, whence);
}
}

Some files were not shown because too many files have changed in this diff Show More