Merge branch 'ClickHouse:master' into translation-docs-zh

This commit is contained in:
qianmoQ 2021-08-27 19:33:10 +08:00 committed by GitHub
commit af84be6009
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
228 changed files with 4872 additions and 1085 deletions

View File

@ -127,12 +127,13 @@ if (USE_STATIC_LIBRARIES)
list(REVERSE CMAKE_FIND_LIBRARY_SUFFIXES)
endif ()
# Implies ${WITH_COVERAGE}
option (ENABLE_FUZZING "Fuzzy testing using libfuzzer" OFF)
if (ENABLE_FUZZING)
# Also set WITH_COVERAGE=1 for better fuzzing process
# By default this is disabled, because fuzzers are built in CI with the clickhouse itself.
# And we don't want to enable coverage for it.
message (STATUS "Fuzzing instrumentation enabled")
set (WITH_COVERAGE ON)
set (FUZZER "libfuzzer")
endif()

View File

@ -13,6 +13,3 @@ ClickHouse® is an open-source column-oriented database management system that a
* [Code Browser](https://clickhouse.tech/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation.
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
* You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
## Upcoming Events
* [SF Bay Area ClickHouse August Community Meetup (online)](https://www.meetup.com/San-Francisco-Bay-Area-ClickHouse-Meetup/events/279109379/) on 25 August 2021.

View File

@ -1,4 +1,5 @@
#include <sys/auxv.h>
#include "atomic.h"
#include <unistd.h> // __environ
#include <errno.h>
@ -17,18 +18,7 @@ static size_t __find_auxv(unsigned long type)
return (size_t) -1;
}
__attribute__((constructor)) static void __auxv_init()
{
size_t i;
for (i = 0; __environ[i]; i++);
__auxv = (unsigned long *) (__environ + i + 1);
size_t secure_idx = __find_auxv(AT_SECURE);
if (secure_idx != ((size_t) -1))
__auxv_secure = __auxv[secure_idx];
}
unsigned long getauxval(unsigned long type)
unsigned long __getauxval(unsigned long type)
{
if (type == AT_SECURE)
return __auxv_secure;
@ -43,3 +33,38 @@ unsigned long getauxval(unsigned long type)
errno = ENOENT;
return 0;
}
static void * volatile getauxval_func;
static unsigned long __auxv_init(unsigned long type)
{
if (!__environ)
{
// __environ is not initialized yet so we can't initialize __auxv right now.
// That's normally occurred only when getauxval() is called from some sanitizer's internal code.
errno = ENOENT;
return 0;
}
// Initialize __auxv and __auxv_secure.
size_t i;
for (i = 0; __environ[i]; i++);
__auxv = (unsigned long *) (__environ + i + 1);
size_t secure_idx = __find_auxv(AT_SECURE);
if (secure_idx != ((size_t) -1))
__auxv_secure = __auxv[secure_idx];
// Now we've initialized __auxv, next time getauxval() will only call __get_auxval().
a_cas_p(&getauxval_func, (void *)__auxv_init, (void *)__getauxval);
return __getauxval(type);
}
// First time getauxval() will call __auxv_init().
static void * volatile getauxval_func = (void *)__auxv_init;
unsigned long getauxval(unsigned long type)
{
return ((unsigned long (*)(unsigned long))getauxval_func)(type);
}

View File

@ -80,7 +80,9 @@ PoolWithFailover::PoolWithFailover(
const std::string & password,
unsigned default_connections_,
unsigned max_connections_,
size_t max_tries_)
size_t max_tries_,
size_t connect_timeout,
size_t rw_timeout)
: max_tries(max_tries_)
, shareable(false)
{
@ -90,8 +92,8 @@ PoolWithFailover::PoolWithFailover(
replicas_by_priority[0].emplace_back(std::make_shared<Pool>(database,
host, user, password, port,
/* socket_ = */ "",
MYSQLXX_DEFAULT_TIMEOUT,
MYSQLXX_DEFAULT_RW_TIMEOUT,
connect_timeout,
rw_timeout,
default_connections_,
max_connections_));
}
@ -130,7 +132,6 @@ PoolWithFailover::Entry PoolWithFailover::get()
for (size_t try_no = 0; try_no < max_tries; ++try_no)
{
full_pool = nullptr;
for (auto & priority_replicas : replicas_by_priority)
{
Replicas & replicas = priority_replicas.second;

View File

@ -117,7 +117,9 @@ namespace mysqlxx
const std::string & password,
unsigned default_connections_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS,
unsigned max_connections_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS,
size_t max_tries_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES);
size_t max_tries_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES,
size_t connect_timeout = MYSQLXX_DEFAULT_TIMEOUT,
size_t rw_timeout = MYSQLXX_DEFAULT_RW_TIMEOUT);
PoolWithFailover(const PoolWithFailover & other);

View File

@ -22,6 +22,7 @@ set (SRCS
"${LIBRARY_DIR}/src/transaction.cxx"
"${LIBRARY_DIR}/src/transaction_base.cxx"
"${LIBRARY_DIR}/src/row.cxx"
"${LIBRARY_DIR}/src/params.cxx"
"${LIBRARY_DIR}/src/util.cxx"
"${LIBRARY_DIR}/src/version.cxx"
)
@ -31,6 +32,7 @@ set (SRCS
# conflicts with all includes of <array>.
set (HDRS
"${LIBRARY_DIR}/include/pqxx/array.hxx"
"${LIBRARY_DIR}/include/pqxx/params.hxx"
"${LIBRARY_DIR}/include/pqxx/binarystring.hxx"
"${LIBRARY_DIR}/include/pqxx/composite.hxx"
"${LIBRARY_DIR}/include/pqxx/connection.hxx"
@ -75,4 +77,3 @@ set(CM_CONFIG_PQ "${LIBRARY_DIR}/include/pqxx/config-internal-libpq.h")
configure_file("${CM_CONFIG_H_IN}" "${CM_CONFIG_INT}" @ONLY)
configure_file("${CM_CONFIG_H_IN}" "${CM_CONFIG_PUB}" @ONLY)
configure_file("${CM_CONFIG_H_IN}" "${CM_CONFIG_PQ}" @ONLY)

2
contrib/libunwind vendored

@ -1 +1 @@
Subproject commit 6b816d2fba3991f8fd6aaec17d92f68947eab667
Subproject commit c4ea9848a697747dfa35325af9b3452f30841685

View File

@ -83,6 +83,16 @@ then
mv "$COMBINED_OUTPUT.tgz" /output
fi
# Also build fuzzers if any sanitizer specified
if [ -n "$SANITIZER" ]
then
# Currently we are in build/build_docker directory
../docker/packager/other/fuzzer.sh
fi
ccache --show-config ||:
ccache --show-stats ||:
if [ "${CCACHE_DEBUG:-}" == "1" ]
then
find . -name '*.ccache-*' -print0 \
@ -95,4 +105,3 @@ then
# files in place, and will fail because this directory is not writable.
tar -cv -I pixz -f /output/ccache.log.txz "$CCACHE_LOGFILE"
fi

View File

@ -23,12 +23,24 @@ then
echo "Place $BINARY_OUTPUT to output"
mkdir /output/binary ||: # if exists
mv /build/obj-*/programs/clickhouse* /output/binary
if [ "$BINARY_OUTPUT" = "tests" ]
then
mv /build/obj-*/src/unit_tests_dbms /output/binary
fi
fi
# Also build fuzzers if any sanitizer specified
if [ -n "$SANITIZER" ]
then
# Script is supposed that we are in build directory.
mkdir -p build/build_docker
cd build/build_docker
# Launching build script
../docker/packager/other/fuzzer.sh
cd
fi
ccache --show-config ||:
ccache --show-stats ||:

35
docker/packager/other/fuzzer.sh Executable file
View File

@ -0,0 +1,35 @@
#!/usr/bin/env bash
# This script is responsible for building all fuzzers, and copy them to output directory
# as an archive.
# Script is supposed that we are in build directory.
set -x -e
printenv
# Delete previous cache, because we add a new flags -DENABLE_FUZZING=1 and -DFUZZER=libfuzzer
rm -f CMakeCache.txt
read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
# Hope, that the most part of files will be in cache, so we just link new executables
cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA -DCMAKE_C_COMPILER="$CC" -DCMAKE_CXX_COMPILER="$CXX" -DENABLE_CLICKHOUSE_ODBC_BRIDGE=OFF \
-DENABLE_LIBRARIES=0 -DENABLE_SSL=1 -DUSE_INTERNAL_SSL_LIBRARY=1 -DUSE_UNWIND=ON -DENABLE_EMBEDDED_COMPILER=0 \
-DENABLE_EXAMPLES=0 -DENABLE_UTILS=0 -DENABLE_THINLTO=0 "-DSANITIZE=$SANITIZER" \
-DENABLE_FUZZING=1 -DFUZZER='libfuzzer' -DENABLE_TCMALLOC=0 -DENABLE_JEMALLOC=0 \
-DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
FUZZER_TARGETS=$(find ../src -name '*_fuzzer.cpp' -execdir basename {} .cpp ';' | tr '\n' ' ')
mkdir -p /output/fuzzers
for FUZZER_TARGET in $FUZZER_TARGETS
do
# shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
ninja $NINJA_FLAGS $FUZZER_TARGET
# Find this binary in build directory and strip it
FUZZER_PATH=$(find ./src -name "$FUZZER_TARGET")
strip --strip-unneeded "$FUZZER_PATH"
mv "$FUZZER_PATH" /output/fuzzers
done
tar -zcvf /output/fuzzers.tar.gz /output/fuzzers
rm -rf /output/fuzzers

View File

@ -105,6 +105,9 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
if image_type == "deb" or image_type == "unbundled":
result.append("DEB_CC={}".format(cc))
result.append("DEB_CXX={}".format(cxx))
# For building fuzzers
result.append("CC={}".format(cc))
result.append("CXX={}".format(cxx))
elif image_type == "binary":
result.append("CC={}".format(cc))
result.append("CXX={}".format(cxx))

View File

@ -396,6 +396,9 @@ function run_tests
# needs s3
01944_insert_partition_by
# depends on Go
02013_zlib_read_after_eof
)
time clickhouse-test --hung-check -j 8 --order=random --use-skip-list \

View File

@ -16,6 +16,8 @@ RUN apt-get update \
p7zip-full \
parallel \
psmisc \
python3 \
python3-pip \
rsync \
tree \
tzdata \
@ -25,6 +27,8 @@ RUN apt-get update \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
RUN pip3 install Jinja2
COPY * /
SHELL ["/bin/bash", "-c"]

View File

@ -0,0 +1,62 @@
#!/usr/bin/env python3
from argparse import ArgumentParser
import os
import jinja2
def removesuffix(text, suffix):
"""
Added in python 3.9
https://www.python.org/dev/peps/pep-0616/
"""
if suffix and text.endswith(suffix):
return text[:-len(suffix)]
else:
return text[:]
def render_test_template(j2env, suite_dir, test_name):
"""
Render template for test and reference file if needed
"""
test_base_name = removesuffix(test_name, ".sql.j2")
reference_file_name = test_base_name + ".reference.j2"
reference_file_path = os.path.join(suite_dir, reference_file_name)
if os.path.isfile(reference_file_path):
tpl = j2env.get_template(reference_file_name)
tpl.stream().dump(os.path.join(suite_dir, test_base_name) + ".gen.reference")
if test_name.endswith(".sql.j2"):
tpl = j2env.get_template(test_name)
generated_test_name = test_base_name + ".gen.sql"
tpl.stream().dump(os.path.join(suite_dir, generated_test_name))
return generated_test_name
return test_name
def main(args):
suite_dir = args.path
print(f"Scanning {suite_dir} directory...")
j2env = jinja2.Environment(
loader=jinja2.FileSystemLoader(suite_dir),
keep_trailing_newline=True,
)
test_names = os.listdir(suite_dir)
for test_name in test_names:
if not test_name.endswith(".sql.j2"):
continue
new_name = render_test_template(j2env, suite_dir, test_name)
print(f"File {new_name} generated")
if __name__ == "__main__":
parser = ArgumentParser(description="Jinja2 test generator")
parser.add_argument("-p", "--path", help="Path to test dir", required=True)
main(parser.parse_args())

View File

@ -71,12 +71,12 @@ function watchdog
kill -9 -- $fuzzer_pid ||:
}
function filter_exists
function filter_exists_and_template
{
local path
for path in "$@"; do
if [ -e "$path" ]; then
echo "$path"
echo "$path" | sed -n 's/\.sql\.j2$/.gen.sql/'
else
echo "'$path' does not exists" >&2
fi
@ -85,11 +85,13 @@ function filter_exists
function fuzz
{
/generate-test-j2.py --path ch/tests/queries/0_stateless
# Obtain the list of newly added tests. They will be fuzzed in more extreme way than other tests.
# Don't overwrite the NEW_TESTS_OPT so that it can be set from the environment.
NEW_TESTS="$(sed -n 's!\(^tests/queries/0_stateless/.*\.sql\)$!ch/\1!p' ci-changed-files.txt | sort -R)"
NEW_TESTS="$(sed -n 's!\(^tests/queries/0_stateless/.*\.sql\(\.j2\)\?\)$!ch/\1!p' ci-changed-files.txt | sort -R)"
# ci-changed-files.txt contains also files that has been deleted/renamed, filter them out.
NEW_TESTS="$(filter_exists $NEW_TESTS)"
NEW_TESTS="$(filter_exists_and_template $NEW_TESTS)"
if [[ -n "$NEW_TESTS" ]]
then
NEW_TESTS_OPT="${NEW_TESTS_OPT:---interleave-queries-file ${NEW_TESTS}}"

View File

@ -24,6 +24,8 @@ RUN apt-get update -y \
python3-pip \
qemu-user-static \
sudo \
# golang version 1.13 on Ubuntu 20 is enough for tests
golang \
telnet \
tree \
unixodbc \

View File

@ -5,6 +5,86 @@ toc_title: Third-Party Libraries Used
# Third-Party Libraries Used {#third-party-libraries-used}
The list of third-party libraries:
| Library name | License type |
|:-|:-|
| abseil-cpp | [Apache](https://github.com/ClickHouse-Extras/abseil-cpp/blob/4f3b686f86c3ebaba7e4e926e62a79cb1c659a54/LICENSE) |
| AMQP-CPP | [Apache](https://github.com/ClickHouse-Extras/AMQP-CPP/blob/1a6c51f4ac51ac56610fa95081bd2f349911375a/LICENSE) |
| arrow | [Apache](https://github.com/ClickHouse-Extras/arrow/blob/078e21bad344747b7656ef2d7a4f7410a0a303eb/LICENSE.txt) |
| avro | [Apache](https://github.com/ClickHouse-Extras/avro/blob/e43c46e87fd32eafdc09471e95344555454c5ef8/LICENSE.txt) |
| aws | [Apache](https://github.com/ClickHouse-Extras/aws-sdk-cpp/blob/7d48b2c8193679cc4516e5bd68ae4a64b94dae7d/LICENSE.txt) |
| aws-c-common | [Apache](https://github.com/ClickHouse-Extras/aws-c-common/blob/736a82d1697c108b04a277e66438a7f4e19b6857/LICENSE) |
| aws-c-event-stream | [Apache](https://github.com/ClickHouse-Extras/aws-c-event-stream/blob/3bc33662f9ccff4f4cbcf9509cc78c26e022fde0/LICENSE) |
| aws-checksums | [Apache](https://github.com/ClickHouse-Extras/aws-checksums/blob/519d6d9093819b6cf89ffff589a27ef8f83d0f65/LICENSE) |
| base64 | [BSD 2-clause](https://github.com/ClickHouse-Extras/Turbo-Base64/blob/af9b331f2b4f30b41c70f3a571ff904a8251c1d3/LICENSE) |
| boost | [Boost](https://github.com/ClickHouse-Extras/boost/blob/9cf09dbfd55a5c6202dedbdf40781a51b02c2675/LICENSE_1_0.txt) |
| boringssl | [BSD](https://github.com/ClickHouse-Extras/boringssl/blob/a6a2e2ab3e44d97ce98e51c558e989f211de7eb3/LICENSE) |
| brotli | [MIT](https://github.com/google/brotli/blob/63be8a99401992075c23e99f7c84de1c653e39e2/LICENSE) |
| capnproto | [MIT](https://github.com/capnproto/capnproto/blob/a00ccd91b3746ef2ab51d40fe3265829949d1ace/LICENSE) |
| cassandra | [Apache](https://github.com/ClickHouse-Extras/cpp-driver/blob/eb9b68dadbb4417a2c132ad4a1c2fa76e65e6fc1/LICENSE.txt) |
| cctz | [Apache](https://github.com/ClickHouse-Extras/cctz/blob/c0f1bcb97fd2782f7c3f972fadd5aad5affac4b8/LICENSE.txt) |
| cityhash102 | [MIT](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/cityhash102/COPYING) |
| cppkafka | [BSD 2-clause](https://github.com/mfontanini/cppkafka/blob/5a119f689f8a4d90d10a9635e7ee2bee5c127de1/LICENSE) |
| croaring | [Apache](https://github.com/RoaringBitmap/CRoaring/blob/2c867e9f9c9e2a3a7032791f94c4c7ae3013f6e0/LICENSE) |
| curl | [Apache](https://github.com/curl/curl/blob/3b8bbbbd1609c638a3d3d0acb148a33dedb67be3/docs/LICENSE-MIXING.md) |
| cyrus-sasl | [BSD 2-clause](https://github.com/ClickHouse-Extras/cyrus-sasl/blob/e6466edfd638cc5073debe941c53345b18a09512/COPYING) |
| double-conversion | [BSD 3-clause](https://github.com/google/double-conversion/blob/cf2f0f3d547dc73b4612028a155b80536902ba02/LICENSE) |
| dragonbox | [Apache](https://github.com/ClickHouse-Extras/dragonbox/blob/923705af6fd953aa948fc175f6020b15f7359838/LICENSE-Apache2-LLVM) |
| fast_float | [Apache](https://github.com/fastfloat/fast_float/blob/7eae925b51fd0f570ccd5c880c12e3e27a23b86f/LICENSE) |
| fastops | [MIT](https://github.com/ClickHouse-Extras/fastops/blob/88752a5e03cf34639a4a37a4b41d8b463fffd2b5/LICENSE) |
| flatbuffers | [Apache](https://github.com/ClickHouse-Extras/flatbuffers/blob/eb3f827948241ce0e701516f16cd67324802bce9/LICENSE.txt) |
| fmtlib | [Unknown](https://github.com/fmtlib/fmt/blob/c108ee1d590089ccf642fc85652b845924067af2/LICENSE.rst) |
| gcem | [Apache](https://github.com/kthohr/gcem/blob/8d4f1b5d76ea8f6ff12f3f4f34cda45424556b00/LICENSE) |
| googletest | [BSD 3-clause](https://github.com/google/googletest/blob/e7e591764baba0a0c3c9ad0014430e7a27331d16/LICENSE) |
| grpc | [Apache](https://github.com/ClickHouse-Extras/grpc/blob/60c986e15cae70aade721d26badabab1f822fdd6/LICENSE) |
| h3 | [Apache](https://github.com/ClickHouse-Extras/h3/blob/c7f46cfd71fb60e2fefc90e28abe81657deff735/LICENSE) |
| hyperscan | [Boost](https://github.com/ClickHouse-Extras/hyperscan/blob/e9f08df0213fc637aac0a5bbde9beeaeba2fe9fa/LICENSE) |
| icu | [Public Domain](https://github.com/unicode-org/icu/blob/faa2f9f9e1fe74c5ed00eba371d2830134cdbea1/icu4c/LICENSE) |
| icudata | [Public Domain](https://github.com/ClickHouse-Extras/icudata/blob/f020820388e3faafb44cc643574a2d563dfde572/LICENSE) |
| jemalloc | [BSD 2-clause](https://github.com/ClickHouse-Extras/jemalloc/blob/e6891d9746143bf2cf617493d880ba5a0b9a3efd/COPYING) |
| krb5 | [MIT](https://github.com/ClickHouse-Extras/krb5/blob/5149dea4e2be0f67707383d2682b897c14631374/src/lib/gssapi/LICENSE) |
| libc-headers | [LGPL](https://github.com/ClickHouse-Extras/libc-headers/blob/a720b7105a610acbd7427eea475a5b6810c151eb/LICENSE) |
| libcpuid | [BSD 2-clause](https://github.com/ClickHouse-Extras/libcpuid/blob/8db3b8d2d32d22437f063ce692a1b9bb15e42d18/COPYING) |
| libcxx | [Apache](https://github.com/ClickHouse-Extras/libcxx/blob/2fa892f69acbaa40f8a18c6484854a6183a34482/LICENSE.TXT) |
| libcxxabi | [Apache](https://github.com/ClickHouse-Extras/libcxxabi/blob/df8f1e727dbc9e2bedf2282096fa189dc3fe0076/LICENSE.TXT) |
| libdivide | [zLib](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
| libfarmhash | [MIT](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libfarmhash/COPYING) |
| libgsasl | [LGPL](https://github.com/ClickHouse-Extras/libgsasl/blob/383ee28e82f69fa16ed43b48bd9c8ee5b313ab84/LICENSE) |
| libhdfs3 | [Apache](https://github.com/ClickHouse-Extras/libhdfs3/blob/095b9d48b400abb72d967cb0539af13b1e3d90cf/LICENSE.txt) |
| libmetrohash | [Apache](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libmetrohash/LICENSE) |
| libpq | [Unknown](https://github.com/ClickHouse-Extras/libpq/blob/e071ea570f8985aa00e34f5b9d50a3cfe666327e/COPYRIGHT) |
| libpqxx | [BSD 3-clause](https://github.com/ClickHouse-Extras/libpqxx/blob/357608d11b7a1961c3fb7db2ef9a5dbb2e87da77/COPYING) |
| librdkafka | [MIT](https://github.com/ClickHouse-Extras/librdkafka/blob/b8554f1682062c85ba519eb54ef2f90e02b812cb/LICENSE.murmur2) |
| libunwind | [Apache](https://github.com/ClickHouse-Extras/libunwind/blob/6b816d2fba3991f8fd6aaec17d92f68947eab667/LICENSE.TXT) |
| libuv | [BSD](https://github.com/ClickHouse-Extras/libuv/blob/e2e9b7e9f978ce8a1367b5fe781d97d1ce9f94ab/LICENSE) |
| llvm | [Apache](https://github.com/ClickHouse-Extras/llvm/blob/e5751459412bce1391fb7a2e9bbc01e131bf72f1/llvm/LICENSE.TXT) |
| lz4 | [BSD](https://github.com/lz4/lz4/blob/f39b79fb02962a1cd880bbdecb6dffba4f754a11/LICENSE) |
| mariadb-connector-c | [LGPL](https://github.com/ClickHouse-Extras/mariadb-connector-c/blob/5f4034a3a6376416504f17186c55fe401c6d8e5e/COPYING.LIB) |
| miniselect | [Boost](https://github.com/danlark1/miniselect/blob/be0af6bd0b6eb044d1acc4f754b229972d99903a/LICENSE_1_0.txt) |
| msgpack-c | [Boost](https://github.com/msgpack/msgpack-c/blob/46684265d50b5d1b062d4c5c428ba08462844b1d/LICENSE_1_0.txt) |
| murmurhash | [Public Domain](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/murmurhash/LICENSE) |
| NuRaft | [Apache](https://github.com/ClickHouse-Extras/NuRaft/blob/7ecb16844af6a9c283ad432d85ecc2e7d1544676/LICENSE) |
| openldap | [Unknown](https://github.com/ClickHouse-Extras/openldap/blob/0208811b6043ca06fda8631a5e473df1ec515ccb/LICENSE) |
| orc | [Apache](https://github.com/ClickHouse-Extras/orc/blob/0a936f6bbdb9303308973073f8623b5a8d82eae1/LICENSE) |
| poco | [Boost](https://github.com/ClickHouse-Extras/poco/blob/7351c4691b5d401f59e3959adfc5b4fa263b32da/LICENSE) |
| protobuf | [BSD 3-clause](https://github.com/ClickHouse-Extras/protobuf/blob/75601841d172c73ae6bf4ce8121f42b875cdbabd/LICENSE) |
| rapidjson | [MIT](https://github.com/ClickHouse-Extras/rapidjson/blob/c4ef90ccdbc21d5d5a628d08316bfd301e32d6fa/bin/jsonschema/LICENSE) |
| re2 | [BSD 3-clause](https://github.com/google/re2/blob/13ebb377c6ad763ca61d12dd6f88b1126bd0b911/LICENSE) |
| replxx | [BSD 3-clause](https://github.com/ClickHouse-Extras/replxx/blob/c81be6c68b146f15f2096b7ef80e3f21fe27004c/LICENSE.md) |
| rocksdb | [BSD 3-clause](https://github.com/ClickHouse-Extras/rocksdb/blob/b6480c69bf3ab6e298e0d019a07fd4f69029b26a/LICENSE.leveldb) |
| s2geometry | [Apache](https://github.com/ClickHouse-Extras/s2geometry/blob/20ea540d81f4575a3fc0aea585aac611bcd03ede/LICENSE) |
| sentry-native | [MIT](https://github.com/ClickHouse-Extras/sentry-native/blob/94644e92f0a3ff14bd35ed902a8622a2d15f7be4/LICENSE) |
| simdjson | [Apache](https://github.com/simdjson/simdjson/blob/8df32cea3359cb30120795da6020b3b73da01d38/LICENSE) |
| snappy | [Public Domain](https://github.com/google/snappy/blob/3f194acb57e0487531c96b97af61dcbd025a78a3/COPYING) |
| sparsehash-c11 | [BSD 3-clause](https://github.com/sparsehash/sparsehash-c11/blob/cf0bffaa456f23bc4174462a789b90f8b6f5f42f/LICENSE) |
| stats | [Apache](https://github.com/kthohr/stats/blob/b6dd459c10a88c7ea04693c007e9e35820c5d9ad/LICENSE) |
| thrift | [Apache](https://github.com/apache/thrift/blob/010ccf0a0c7023fea0f6bf4e4078ebdff7e61982/LICENSE) |
| unixodbc | [LGPL](https://github.com/ClickHouse-Extras/UnixODBC/blob/b0ad30f7f6289c12b76f04bfb9d466374bb32168/COPYING) |
| xz | [Public Domain](https://github.com/xz-mirror/xz/blob/869b9d1b4edd6df07f819d360d306251f8147353/COPYING) |
| zlib-ng | [zLib](https://github.com/ClickHouse-Extras/zlib-ng/blob/6a5e93b9007782115f7f7e5235dedc81c4f1facb/LICENSE.md) |
| zstd | [BSD](https://github.com/facebook/zstd/blob/a488ba114ec17ea1054b9057c26a046fc122b3b6/LICENSE) |
The list of third-party libraries can be obtained by the following query:
``` sql
@ -13,84 +93,6 @@ SELECT library_name, license_type, license_path FROM system.licenses ORDER BY li
[Example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==)
| library_name | license_type | license_path |
|:-|:-|:-|
| abseil-cpp | Apache | /contrib/abseil-cpp/LICENSE |
| AMQP-CPP | Apache | /contrib/AMQP-CPP/LICENSE |
| arrow | Apache | /contrib/arrow/LICENSE.txt |
| avro | Apache | /contrib/avro/LICENSE.txt |
| aws | Apache | /contrib/aws/LICENSE.txt |
| aws-c-common | Apache | /contrib/aws-c-common/LICENSE |
| aws-c-event-stream | Apache | /contrib/aws-c-event-stream/LICENSE |
| aws-checksums | Apache | /contrib/aws-checksums/LICENSE |
| base64 | BSD 2-clause | /contrib/base64/LICENSE |
| boost | Boost | /contrib/boost/LICENSE_1_0.txt |
| boringssl | BSD | /contrib/boringssl/LICENSE |
| brotli | MIT | /contrib/brotli/LICENSE |
| capnproto | MIT | /contrib/capnproto/LICENSE |
| cassandra | Apache | /contrib/cassandra/LICENSE.txt |
| cctz | Apache | /contrib/cctz/LICENSE.txt |
| cityhash102 | MIT | /contrib/cityhash102/COPYING |
| cppkafka | BSD 2-clause | /contrib/cppkafka/LICENSE |
| croaring | Apache | /contrib/croaring/LICENSE |
| curl | Apache | /contrib/curl/docs/LICENSE-MIXING.md |
| cyrus-sasl | BSD 2-clause | /contrib/cyrus-sasl/COPYING |
| double-conversion | BSD 3-clause | /contrib/double-conversion/LICENSE |
| dragonbox | Apache | /contrib/dragonbox/LICENSE-Apache2-LLVM |
| fast_float | Apache | /contrib/fast_float/LICENSE |
| fastops | MIT | /contrib/fastops/LICENSE |
| flatbuffers | Apache | /contrib/flatbuffers/LICENSE.txt |
| fmtlib | Unknown | /contrib/fmtlib/LICENSE.rst |
| gcem | Apache | /contrib/gcem/LICENSE |
| googletest | BSD 3-clause | /contrib/googletest/LICENSE |
| grpc | Apache | /contrib/grpc/LICENSE |
| h3 | Apache | /contrib/h3/LICENSE |
| hyperscan | Boost | /contrib/hyperscan/LICENSE |
| icu | Public Domain | /contrib/icu/icu4c/LICENSE |
| icudata | Public Domain | /contrib/icudata/LICENSE |
| jemalloc | BSD 2-clause | /contrib/jemalloc/COPYING |
| krb5 | MIT | /contrib/krb5/src/lib/gssapi/LICENSE |
| libc-headers | LGPL | /contrib/libc-headers/LICENSE |
| libcpuid | BSD 2-clause | /contrib/libcpuid/COPYING |
| libcxx | Apache | /contrib/libcxx/LICENSE.TXT |
| libcxxabi | Apache | /contrib/libcxxabi/LICENSE.TXT |
| libdivide | zLib | /contrib/libdivide/LICENSE.txt |
| libfarmhash | MIT | /contrib/libfarmhash/COPYING |
| libgsasl | LGPL | /contrib/libgsasl/LICENSE |
| libhdfs3 | Apache | /contrib/libhdfs3/LICENSE.txt |
| libmetrohash | Apache | /contrib/libmetrohash/LICENSE |
| libpq | Unknown | /contrib/libpq/COPYRIGHT |
| libpqxx | BSD 3-clause | /contrib/libpqxx/COPYING |
| librdkafka | MIT | /contrib/librdkafka/LICENSE.murmur2 |
| libunwind | Apache | /contrib/libunwind/LICENSE.TXT |
| libuv | BSD | /contrib/libuv/LICENSE |
| llvm | Apache | /contrib/llvm/llvm/LICENSE.TXT |
| lz4 | BSD | /contrib/lz4/LICENSE |
| mariadb-connector-c | LGPL | /contrib/mariadb-connector-c/COPYING.LIB |
| miniselect | Boost | /contrib/miniselect/LICENSE_1_0.txt |
| msgpack-c | Boost | /contrib/msgpack-c/LICENSE_1_0.txt |
| murmurhash | Public Domain | /contrib/murmurhash/LICENSE |
| NuRaft | Apache | /contrib/NuRaft/LICENSE |
| openldap | Unknown | /contrib/openldap/LICENSE |
| orc | Apache | /contrib/orc/LICENSE |
| poco | Boost | /contrib/poco/LICENSE |
| protobuf | BSD 3-clause | /contrib/protobuf/LICENSE |
| rapidjson | MIT | /contrib/rapidjson/bin/jsonschema/LICENSE |
| re2 | BSD 3-clause | /contrib/re2/LICENSE |
| replxx | BSD 3-clause | /contrib/replxx/LICENSE.md |
| rocksdb | BSD 3-clause | /contrib/rocksdb/LICENSE.leveldb |
| s2geometry | Apache | /contrib/s2geometry/LICENSE |
| sentry-native | MIT | /contrib/sentry-native/LICENSE |
| simdjson | Apache | /contrib/simdjson/LICENSE |
| snappy | Public Domain | /contrib/snappy/COPYING |
| sparsehash-c11 | BSD 3-clause | /contrib/sparsehash-c11/LICENSE |
| stats | Apache | /contrib/stats/LICENSE |
| thrift | Apache | /contrib/thrift/LICENSE |
| unixodbc | LGPL | /contrib/unixodbc/COPYING |
| xz | Public Domain | /contrib/xz/COPYING |
| zlib-ng | zLib | /contrib/zlib-ng/LICENSE.md |
| zstd | BSD | /contrib/zstd/LICENSE |
## Guidelines for adding new third-party libraries and maintaining custom changes in them {#adding-third-party-libraries}
1. All external third-party code should reside in the dedicated directories under `contrib` directory of ClickHouse repo. Prefer Git submodules, when available.

View File

@ -34,6 +34,7 @@ The table structure can differ from the original PostgreSQL table structure:
- `user` — PostgreSQL user.
- `password` — User password.
- `schema` — Non-default table schema. Optional.
- `on conflict ...` — example: `ON CONFLICT DO NOTHING`. Optional. Note: adding this option will make insertion less efficient.
## Implementation Details {#implementation-details}

View File

@ -390,20 +390,27 @@ Functions with a constant argument that is less than ngram size cant be used
- `s != 1`
- `NOT startsWith(s, 'test')`
### Projections {#projections}
Projections are like materialized views but defined in part-level. It provides consistency guarantees along with automatic usage in queries.
## Projections {#projections}
Projections are like [materialized views](../../../sql-reference/statements/create/view.md#materialized) but defined in part-level. It provides consistency guarantees along with automatic usage in queries.
#### Query {#projection-query}
A projection query is what defines a projection. It has the following grammar:
Projections are an experimental feature. To enable them you must set the [allow_experimental_projection_optimization](../../../operations/settings/settings.md#allow-experimental-projection-optimization) to `1`. See also the [force_optimize_projection](../../../operations/settings/settings.md#force-optimize-projection) setting.
`SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]`
Projections are not supported in the `SELECT` statements with the [FINAL](../../../sql-reference/statements/select/from.md#select-from-final) modifier.
It implicitly selects data from the parent table.
### Projection Query {#projection-query}
A projection query is what defines a projection. It implicitly selects data from the parent table.
**Syntax**
#### Storage {#projection-storage}
Projections are stored inside the part directory. It's similar to an index but contains a subdirectory that stores an anonymous MergeTree table's part. The table is induced by the definition query of the projection. If there is a GROUP BY clause, the underlying storage engine becomes AggregatedMergeTree, and all aggregate functions are converted to AggregateFunction. If there is an ORDER BY clause, the MergeTree table will use it as its primary key expression. During the merge process, the projection part will be merged via its storage's merge routine. The checksum of the parent table's part will combine the projection's part. Other maintenance jobs are similar to skip indices.
```sql
SELECT <column list expr> [GROUP BY] <group keys expr> [ORDER BY] <expr>
```
#### Query Analysis {#projection-query-analysis}
Projections can be modified or dropped with the [ALTER](../../../sql-reference/statements/alter/projection.md) statement.
### Projection Storage {#projection-storage}
Projections are stored inside the part directory. It's similar to an index but contains a subdirectory that stores an anonymous `MergeTree` table's part. The table is induced by the definition query of the projection. If there is a `GROUP BY` clause, the underlying storage engine becomes [AggregatingMergeTree](aggregatingmergetree.md), and all aggregate functions are converted to `AggregateFunction`. If there is an `ORDER BY` clause, the `MergeTree` table uses it as its primary key expression. During the merge process the projection part is merged via its storage's merge routine. The checksum of the parent table's part is combined with the projection's part. Other maintenance jobs are similar to skip indices.
### Query Analysis {#projection-query-analysis}
1. Check if the projection can be used to answer the given query, that is, it generates the same answer as querying the base table.
2. Select the best feasible match, which contains the least granules to read.
3. The query pipeline which uses projections will be different from the one that uses the original parts. If the projection is absent in some parts, we can add the pipeline to "project" it on the fly.

View File

@ -141,7 +141,7 @@ Since version 20.5, `clickhouse-client` has automatic syntax highlighting (alway
Example of a config file:
``` xml
```xml
<config>
<user>username</user>
<password>password</password>
@ -149,4 +149,30 @@ Example of a config file:
</config>
```
[Original article](https://clickhouse.tech/docs/en/interfaces/cli/) <!--hide-->
### Query ID Format {#query-id-format}
In interactive mode `clickhouse-client` shows query ID for every query. By default, the ID is formatted like this:
```sql
Query id: 927f137d-00f1-4175-8914-0dd066365e96
```
A custom format may be specified in a configuration file inside a `query_id_formats` tag. `{query_id}` placeholder in the format string is replaced with the ID of a query. Several format strings are allowed inside the tag.
This feature can be used to generate URLs to facilitate profiling of queries.
**Example**
```xml
<config>
<query_id_formats>
<speedscope>http://speedscope-host/#profileURL=qp%3Fid%3D{query_id}</speedscope>
</query_id_formats>
</config>
```
If the configuration above is applied, the ID of a query is shown in the following format:
``` text
speedscope:http://speedscope-host/#profileURL=qp%3Fid%3Dc8ecc783-e753-4b38-97f1-42cddfb98b7d
```

View File

@ -7,7 +7,7 @@ toc_title: Configuration Files
ClickHouse supports multi-file configuration management. The main server configuration file is `/etc/clickhouse-server/config.xml` or `/etc/clickhouse-server/config.yaml`. Other files must be in the `/etc/clickhouse-server/config.d` directory. Note, that any configuration file can be written either in XML or YAML, but mixing formats in one file is not supported. For example, you can have main configs as `config.xml` and `users.xml` and write additional files in `config.d` and `users.d` directories in `.yaml`.
All the configuration files should be in XML or YAML formats. All XML files should have the same root element, usually `<yandex>`. As for YAML, `yandex:` should not be present, the parser will insert it automatically.
All XML files should have the same root element, usually `<yandex>`. As for YAML, `yandex:` should not be present, the parser will insert it automatically.
## Override {#override}
@ -32,7 +32,7 @@ You can also declare attributes as coming from environment variables by using `f
## Substitution {#substitution}
The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md)).
The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md#macros)).
If you want to replace an entire element with a substitution use `include` as element name.

View File

@ -3435,3 +3435,25 @@ Possible values:
- 1 — The table is automatically updated in the background, when schema changes are detected.
Default value: `0`.
## allow_experimental_projection_optimization {#allow-experimental-projection-optimization}
Enables or disables [projection](../../engines/table-engines/mergetree-family/mergetree.md#projections) optimization when processing `SELECT` queries.
Possible values:
- 0 — Projection optimization disabled.
- 1 — Projection optimization enabled.
Default value: `0`.
## force_optimize_projection {#force-optimize-projection}
Enables or disables the obligatory use of [projections](../../engines/table-engines/mergetree-family/mergetree.md#projections) in `SELECT` queries, when projection optimization is enabled (see [allow_experimental_projection_optimization](#allow-experimental-projection-optimization) setting).
Possible values:
- 0 — Projection optimization is not obligatory.
- 1 — Projection optimization is obligatory.
Default value: `0`.

View File

@ -154,7 +154,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 3) FROM
└──────────────────────────────────────────────────────────────────────────────────────────┘
```
In this case, the function couldnt find the event chain matching the pattern, because the event for number 3 occured between 1 and 2. If in the same case we checked the condition for number 4, the sequence would match the pattern.
In this case, the function couldnt find the event chain matching the pattern, because the event for number 3 occurred between 1 and 2. If in the same case we checked the condition for number 4, the sequence would match the pattern.
``` sql
SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM t

View File

@ -9,9 +9,9 @@ A date. Stored in two bytes as the number of days since 1970-01-01 (unsigned). A
The date value is stored without the time zone.
## Examples {#examples}
**Example**
**1.** Creating a table with a `DateTime`-type column and inserting data into it:
Creating a table with a `Date`-type column and inserting data into it:
``` sql
CREATE TABLE dt
@ -23,10 +23,7 @@ ENGINE = TinyLog;
```
``` sql
INSERT INTO dt Values (1546300800, 1), ('2019-01-01', 2);
```
``` sql
INSERT INTO dt VALUES (1546300800, 1), ('2019-01-01', 2);
SELECT * FROM dt;
```
@ -37,11 +34,8 @@ SELECT * FROM dt;
└────────────┴──────────┘
```
## See Also {#see-also}
**See Also**
- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md)
- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime)
- [`DateTime` data type](../../sql-reference/data-types/datetime.md)
[Original article](https://clickhouse.tech/docs/en/data_types/date/) <!--hide-->

View File

@ -0,0 +1,40 @@
---
toc_priority: 48
toc_title: Date32
---
# Date32 {#data_type-datetime32}
A date. Supports the date range same with [Datetime64](../../sql-reference/data-types/datetime64.md). Stored in four bytes as the number of days since 1925-01-01. Allows storing values till 2283-11-11.
**Examples**
Creating a table with a `Date32`-type column and inserting data into it:
``` sql
CREATE TABLE new
(
`timestamp` Date32,
`event_id` UInt8
)
ENGINE = TinyLog;
```
``` sql
INSERT INTO new VALUES (4102444800, 1), ('2100-01-01', 2);
SELECT * FROM new;
```
``` text
┌──timestamp─┬─event_id─┐
│ 2100-01-01 │ 1 │
│ 2100-01-01 │ 2 │
└────────────┴──────────┘
```
**See Also**
- [toDate32](../../sql-reference/functions/type-conversion-functions.md#todate32)
- [toDate32OrZero](../../sql-reference/functions/type-conversion-functions.md#todate32-or-zero)
- [toDate32OrNull](../../sql-reference/functions/type-conversion-functions.md#todate32-or-null)

View File

@ -17,7 +17,7 @@ DateTime64(precision, [timezone])
Internally, stores data as a number of ticks since epoch start (1970-01-01 00:00:00 UTC) as Int64. The tick resolution is determined by the precision parameter. Additionally, the `DateTime64` type can store time zone that is the same for the entire column, that affects how the values of the `DateTime64` type values are displayed in text format and how the values specified as strings are parsed (2020-01-01 05:00:01.000). The time zone is not stored in the rows of the table (or in resultset), but is stored in the column metadata. See details in [DateTime](../../sql-reference/data-types/datetime.md).
Supported range from January 1, 1925 till December 31, 2283.
Supported range from January 1, 1925 till November 11, 2283.
## Examples {#examples}

View File

@ -2236,3 +2236,74 @@ defaultRoles()
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
## queryID {#query-id}
Returns the ID of the current query. Other parameters of a query can be extracted from the [system.query_log](../../operations/system-tables/query_log.md) table via `query_id`.
In contrast to [initialQueryID](#initial-query-id) function, `queryID` can return different results on different shards (see example).
**Syntax**
``` sql
queryID()
```
**Returned value**
- The ID of the current query.
Type: [String](../../sql-reference/data-types/string.md)
**Example**
Query:
``` sql
CREATE TABLE tmp (str String) ENGINE = Log;
INSERT INTO tmp (*) VALUES ('a');
SELECT count(DISTINCT t) FROM (SELECT queryID() AS t FROM remote('127.0.0.{1..3}', currentDatabase(), 'tmp') GROUP BY queryID());
```
Result:
``` text
┌─count()─┐
│ 3 │
└─────────┘
```
## initialQueryID {#initial-query-id}
Returns the ID of the initial current query. Other parameters of a query can be extracted from the [system.query_log](../../operations/system-tables/query_log.md) table via `initial_query_id`.
In contrast to [queryID](#query-id) function, `initialQueryID` returns the same results on different shards (see example).
**Syntax**
``` sql
initialQueryID()
```
**Returned value**
- The ID of the initial current query.
Type: [String](../../sql-reference/data-types/string.md)
**Example**
Query:
``` sql
CREATE TABLE tmp (str String) ENGINE = Log;
INSERT INTO tmp (*) VALUES ('a');
SELECT count(DISTINCT t) FROM (SELECT initialQueryID() AS t FROM remote('127.0.0.{1..3}', currentDatabase(), 'tmp') GROUP BY queryID());
```
Result:
``` text
┌─count()─┐
│ 1 │
└─────────┘
```

View File

@ -152,6 +152,104 @@ Alias: `DATE`.
## toDateTimeOrNull {#todatetimeornull}
## toDate32 {#todate32}
Converts the argument to the [Date32](../../sql-reference/data-types/date32.md) data type. If the value is outside the range returns the border values supported by `Date32`. If the argument has [Date](../../sql-reference/data-types/date.md) type, borders of `Date` are taken into account.
**Syntax**
``` sql
toDate32(expr)
```
**Arguments**
- `expr` — The value. [String](../../sql-reference/data-types/string.md), [UInt32](../../sql-reference/data-types/int-uint.md) or [Date](../../sql-reference/data-types/date.md).
**Returned value**
- A calendar date.
Type: [Date32](../../sql-reference/data-types/date32.md).
**Example**
1. The value is within the range:
``` sql
SELECT toDate32('1955-01-01') AS value, toTypeName(value);
```
``` text
┌──────value─┬─toTypeName(toDate32('1925-01-01'))─┐
│ 1955-01-01 │ Date32 │
└────────────┴────────────────────────────────────┘
```
2. The value is outside the range:
``` sql
SELECT toDate32('1924-01-01') AS value, toTypeName(value);
```
``` text
┌──────value─┬─toTypeName(toDate32('1925-01-01'))─┐
│ 1925-01-01 │ Date32 │
└────────────┴────────────────────────────────────┘
```
3. With `Date`-type argument:
``` sql
SELECT toDate32(toDate('1924-01-01')) AS value, toTypeName(value);
```
``` text
┌──────value─┬─toTypeName(toDate32(toDate('1924-01-01')))─┐
│ 1970-01-01 │ Date32 │
└────────────┴────────────────────────────────────────────┘
```
## toDate32OrZero {#todate32-or-zero}
The same as [toDate32](#todate32) but returns the min value of [Date32](../../sql-reference/data-types/date32.md) if invalid argument is received.
**Example**
Query:
``` sql
SELECT toDate32OrZero('1924-01-01'), toDate32OrZero('');
```
Result:
``` text
┌─toDate32OrZero('1924-01-01')─┬─toDate32OrZero('')─┐
│ 1925-01-01 │ 1925-01-01 │
└──────────────────────────────┴────────────────────┘
```
## toDate32OrNull {#todate32-or-null}
The same as [toDate32](#todate32) but returns `NULL` if invalid argument is received.
**Example**
Query:
``` sql
SELECT toDate32OrNull('1955-01-01'), toDate32OrNull('');
```
Result:
``` text
┌─toDate32OrNull('1955-01-01')─┬─toDate32OrNull('')─┐
│ 1955-01-01 │ ᴺᵁᴸᴸ │
└──────────────────────────────┴────────────────────┘
```
## toDecimal(32\|64\|128\|256) {#todecimal3264128256}
Converts `value` to the [Decimal](../../sql-reference/data-types/decimal.md) data type with precision of `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places.

View File

@ -5,7 +5,7 @@ toc_title: PROJECTION
# Manipulating Projections {#manipulations-with-projections}
The following operations are available:
The following operations with [projections](../../../engines/table-engines/mergetree-family/mergetree.md#projections) are available:
- `ALTER TABLE [db].name ADD PROJECTION name AS SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]` - Adds projection description to tables metadata.
@ -15,7 +15,7 @@ The following operations are available:
- `ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` - Deletes projection files from disk without removing description.
The commands ADD, DROP and CLEAR are lightweight in a sense that they only change metadata or remove files.
The commands `ADD`, `DROP` and `CLEAR` are lightweight in a sense that they only change metadata or remove files.
Also, they are replicated, syncing projections metadata via ZooKeeper.

View File

@ -4,40 +4,95 @@ toc_title: "Используемые сторонние библиотеки"
---
# Используемые сторонние библиотеки {#ispolzuemye-storonnie-biblioteki}
# Используемые сторонние библиотеки {#third-party-libraries-used}
| Библиотека | Лицензия |
|---------------------|----------------------------------------------------------------------------------------------------------------------------------------------|
| base64 | [BSD 2-Clause License](https://github.com/aklomp/base64/blob/a27c565d1b6c676beaf297fe503c4518185666f7/LICENSE) |
| boost | [Boost Software License 1.0](https://github.com/ClickHouse-Extras/boost-extra/blob/6883b40449f378019aec792f9983ce3afc7ff16e/LICENSE_1_0.txt) |
| brotli | [MIT](https://github.com/google/brotli/blob/master/LICENSE) |
| capnproto | [MIT](https://github.com/capnproto/capnproto/blob/master/LICENSE) |
| cctz | [Apache License 2.0](https://github.com/google/cctz/blob/4f9776a310f4952454636363def82c2bf6641d5f/LICENSE.txt) |
| double-conversion | [BSD 3-Clause License](https://github.com/google/double-conversion/blob/cf2f0f3d547dc73b4612028a155b80536902ba02/LICENSE) |
| FastMemcpy | [MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libmemcpy/impl/LICENSE) |
| googletest | [BSD 3-Clause License](https://github.com/google/googletest/blob/master/LICENSE) |
| h3 | [Apache License 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
| hyperscan | [BSD 3-Clause License](https://github.com/intel/hyperscan/blob/master/LICENSE) |
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
| libdivide | [Zlib License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
| libhdfs3 | [Apache License 2.0](https://github.com/ClickHouse-Extras/libhdfs3/blob/bd6505cbb0c130b0db695305b9a38546fa880e5a/LICENSE.txt) |
| libmetrohash | [Apache License 2.0](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libmetrohash/LICENSE) |
| libpcg-random | [Apache License 2.0](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libpcg-random/LICENSE-APACHE.txt) |
| libressl | [OpenSSL License](https://github.com/ClickHouse-Extras/ssl/blob/master/COPYING) |
| librdkafka | [BSD 2-Clause License](https://github.com/edenhill/librdkafka/blob/363dcad5a23dc29381cc626620e68ae418b3af19/LICENSE) |
| libwidechar_width | [CC0 1.0 Universal](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libwidechar_width/LICENSE) |
| llvm | [BSD 3-Clause License](https://github.com/ClickHouse-Extras/llvm/blob/163def217817c90fb982a6daf384744d8472b92b/llvm/LICENSE.TXT) |
| lz4 | [BSD 2-Clause License](https://github.com/lz4/lz4/blob/c10863b98e1503af90616ae99725ecd120265dfb/LICENSE) |
| mariadb-connector-c | [LGPL v2.1](https://github.com/ClickHouse-Extras/mariadb-connector-c/blob/3.1/COPYING.LIB) |
| murmurhash | [Public Domain](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/murmurhash/LICENSE) |
| pdqsort | [Zlib License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/pdqsort/license.txt) |
| poco | [Boost Software License - Version 1.0](https://github.com/ClickHouse-Extras/poco/blob/fe5505e56c27b6ecb0dcbc40c49dc2caf4e9637f/LICENSE) |
| protobuf | [BSD 3-Clause License](https://github.com/ClickHouse-Extras/protobuf/blob/12735370922a35f03999afff478e1c6d7aa917a4/LICENSE) |
| re2 | [BSD 3-Clause License](https://github.com/google/re2/blob/7cf8b88e8f70f97fd4926b56aa87e7f53b2717e0/LICENSE) |
| UnixODBC | [LGPL v2.1](https://github.com/ClickHouse-Extras/UnixODBC/tree/b0ad30f7f6289c12b76f04bfb9d466374bb32168) |
| zlib-ng | [Zlib License](https://github.com/ClickHouse-Extras/zlib-ng/blob/develop/LICENSE.md) |
| zstd | [BSD 3-Clause License](https://github.com/facebook/zstd/blob/dev/LICENSE) |
Список сторонних библиотек:
| Библиотека | Тип лицензии |
|:-|:-|
| abseil-cpp | [Apache](https://github.com/ClickHouse-Extras/abseil-cpp/blob/4f3b686f86c3ebaba7e4e926e62a79cb1c659a54/LICENSE) |
| AMQP-CPP | [Apache](https://github.com/ClickHouse-Extras/AMQP-CPP/blob/1a6c51f4ac51ac56610fa95081bd2f349911375a/LICENSE) |
| arrow | [Apache](https://github.com/ClickHouse-Extras/arrow/blob/078e21bad344747b7656ef2d7a4f7410a0a303eb/LICENSE.txt) |
| avro | [Apache](https://github.com/ClickHouse-Extras/avro/blob/e43c46e87fd32eafdc09471e95344555454c5ef8/LICENSE.txt) |
| aws | [Apache](https://github.com/ClickHouse-Extras/aws-sdk-cpp/blob/7d48b2c8193679cc4516e5bd68ae4a64b94dae7d/LICENSE.txt) |
| aws-c-common | [Apache](https://github.com/ClickHouse-Extras/aws-c-common/blob/736a82d1697c108b04a277e66438a7f4e19b6857/LICENSE) |
| aws-c-event-stream | [Apache](https://github.com/ClickHouse-Extras/aws-c-event-stream/blob/3bc33662f9ccff4f4cbcf9509cc78c26e022fde0/LICENSE) |
| aws-checksums | [Apache](https://github.com/ClickHouse-Extras/aws-checksums/blob/519d6d9093819b6cf89ffff589a27ef8f83d0f65/LICENSE) |
| base64 | [BSD 2-clause](https://github.com/ClickHouse-Extras/Turbo-Base64/blob/af9b331f2b4f30b41c70f3a571ff904a8251c1d3/LICENSE) |
| boost | [Boost](https://github.com/ClickHouse-Extras/boost/blob/9cf09dbfd55a5c6202dedbdf40781a51b02c2675/LICENSE_1_0.txt) |
| boringssl | [BSD](https://github.com/ClickHouse-Extras/boringssl/blob/a6a2e2ab3e44d97ce98e51c558e989f211de7eb3/LICENSE) |
| brotli | [MIT](https://github.com/google/brotli/blob/63be8a99401992075c23e99f7c84de1c653e39e2/LICENSE) |
| capnproto | [MIT](https://github.com/capnproto/capnproto/blob/a00ccd91b3746ef2ab51d40fe3265829949d1ace/LICENSE) |
| cassandra | [Apache](https://github.com/ClickHouse-Extras/cpp-driver/blob/eb9b68dadbb4417a2c132ad4a1c2fa76e65e6fc1/LICENSE.txt) |
| cctz | [Apache](https://github.com/ClickHouse-Extras/cctz/blob/c0f1bcb97fd2782f7c3f972fadd5aad5affac4b8/LICENSE.txt) |
| cityhash102 | [MIT](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/cityhash102/COPYING) |
| cppkafka | [BSD 2-clause](https://github.com/mfontanini/cppkafka/blob/5a119f689f8a4d90d10a9635e7ee2bee5c127de1/LICENSE) |
| croaring | [Apache](https://github.com/RoaringBitmap/CRoaring/blob/2c867e9f9c9e2a3a7032791f94c4c7ae3013f6e0/LICENSE) |
| curl | [Apache](https://github.com/curl/curl/blob/3b8bbbbd1609c638a3d3d0acb148a33dedb67be3/docs/LICENSE-MIXING.md) |
| cyrus-sasl | [BSD 2-clause](https://github.com/ClickHouse-Extras/cyrus-sasl/blob/e6466edfd638cc5073debe941c53345b18a09512/COPYING) |
| double-conversion | [BSD 3-clause](https://github.com/google/double-conversion/blob/cf2f0f3d547dc73b4612028a155b80536902ba02/LICENSE) |
| dragonbox | [Apache](https://github.com/ClickHouse-Extras/dragonbox/blob/923705af6fd953aa948fc175f6020b15f7359838/LICENSE-Apache2-LLVM) |
| fast_float | [Apache](https://github.com/fastfloat/fast_float/blob/7eae925b51fd0f570ccd5c880c12e3e27a23b86f/LICENSE) |
| fastops | [MIT](https://github.com/ClickHouse-Extras/fastops/blob/88752a5e03cf34639a4a37a4b41d8b463fffd2b5/LICENSE) |
| flatbuffers | [Apache](https://github.com/ClickHouse-Extras/flatbuffers/blob/eb3f827948241ce0e701516f16cd67324802bce9/LICENSE.txt) |
| fmtlib | [Unknown](https://github.com/fmtlib/fmt/blob/c108ee1d590089ccf642fc85652b845924067af2/LICENSE.rst) |
| gcem | [Apache](https://github.com/kthohr/gcem/blob/8d4f1b5d76ea8f6ff12f3f4f34cda45424556b00/LICENSE) |
| googletest | [BSD 3-clause](https://github.com/google/googletest/blob/e7e591764baba0a0c3c9ad0014430e7a27331d16/LICENSE) |
| grpc | [Apache](https://github.com/ClickHouse-Extras/grpc/blob/60c986e15cae70aade721d26badabab1f822fdd6/LICENSE) |
| h3 | [Apache](https://github.com/ClickHouse-Extras/h3/blob/c7f46cfd71fb60e2fefc90e28abe81657deff735/LICENSE) |
| hyperscan | [Boost](https://github.com/ClickHouse-Extras/hyperscan/blob/e9f08df0213fc637aac0a5bbde9beeaeba2fe9fa/LICENSE) |
| icu | [Public Domain](https://github.com/unicode-org/icu/blob/faa2f9f9e1fe74c5ed00eba371d2830134cdbea1/icu4c/LICENSE) |
| icudata | [Public Domain](https://github.com/ClickHouse-Extras/icudata/blob/f020820388e3faafb44cc643574a2d563dfde572/LICENSE) |
| jemalloc | [BSD 2-clause](https://github.com/ClickHouse-Extras/jemalloc/blob/e6891d9746143bf2cf617493d880ba5a0b9a3efd/COPYING) |
| krb5 | [MIT](https://github.com/ClickHouse-Extras/krb5/blob/5149dea4e2be0f67707383d2682b897c14631374/src/lib/gssapi/LICENSE) |
| libc-headers | [LGPL](https://github.com/ClickHouse-Extras/libc-headers/blob/a720b7105a610acbd7427eea475a5b6810c151eb/LICENSE) |
| libcpuid | [BSD 2-clause](https://github.com/ClickHouse-Extras/libcpuid/blob/8db3b8d2d32d22437f063ce692a1b9bb15e42d18/COPYING) |
| libcxx | [Apache](https://github.com/ClickHouse-Extras/libcxx/blob/2fa892f69acbaa40f8a18c6484854a6183a34482/LICENSE.TXT) |
| libcxxabi | [Apache](https://github.com/ClickHouse-Extras/libcxxabi/blob/df8f1e727dbc9e2bedf2282096fa189dc3fe0076/LICENSE.TXT) |
| libdivide | [zLib](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
| libfarmhash | [MIT](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libfarmhash/COPYING) |
| libgsasl | [LGPL](https://github.com/ClickHouse-Extras/libgsasl/blob/383ee28e82f69fa16ed43b48bd9c8ee5b313ab84/LICENSE) |
| libhdfs3 | [Apache](https://github.com/ClickHouse-Extras/libhdfs3/blob/095b9d48b400abb72d967cb0539af13b1e3d90cf/LICENSE.txt) |
| libmetrohash | [Apache](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libmetrohash/LICENSE) |
| libpq | [Unknown](https://github.com/ClickHouse-Extras/libpq/blob/e071ea570f8985aa00e34f5b9d50a3cfe666327e/COPYRIGHT) |
| libpqxx | [BSD 3-clause](https://github.com/ClickHouse-Extras/libpqxx/blob/357608d11b7a1961c3fb7db2ef9a5dbb2e87da77/COPYING) |
| librdkafka | [MIT](https://github.com/ClickHouse-Extras/librdkafka/blob/b8554f1682062c85ba519eb54ef2f90e02b812cb/LICENSE.murmur2) |
| libunwind | [Apache](https://github.com/ClickHouse-Extras/libunwind/blob/6b816d2fba3991f8fd6aaec17d92f68947eab667/LICENSE.TXT) |
| libuv | [BSD](https://github.com/ClickHouse-Extras/libuv/blob/e2e9b7e9f978ce8a1367b5fe781d97d1ce9f94ab/LICENSE) |
| llvm | [Apache](https://github.com/ClickHouse-Extras/llvm/blob/e5751459412bce1391fb7a2e9bbc01e131bf72f1/llvm/LICENSE.TXT) |
| lz4 | [BSD](https://github.com/lz4/lz4/blob/f39b79fb02962a1cd880bbdecb6dffba4f754a11/LICENSE) |
| mariadb-connector-c | [LGPL](https://github.com/ClickHouse-Extras/mariadb-connector-c/blob/5f4034a3a6376416504f17186c55fe401c6d8e5e/COPYING.LIB) |
| miniselect | [Boost](https://github.com/danlark1/miniselect/blob/be0af6bd0b6eb044d1acc4f754b229972d99903a/LICENSE_1_0.txt) |
| msgpack-c | [Boost](https://github.com/msgpack/msgpack-c/blob/46684265d50b5d1b062d4c5c428ba08462844b1d/LICENSE_1_0.txt) |
| murmurhash | [Public Domain](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/murmurhash/LICENSE) |
| NuRaft | [Apache](https://github.com/ClickHouse-Extras/NuRaft/blob/7ecb16844af6a9c283ad432d85ecc2e7d1544676/LICENSE) |
| openldap | [Unknown](https://github.com/ClickHouse-Extras/openldap/blob/0208811b6043ca06fda8631a5e473df1ec515ccb/LICENSE) |
| orc | [Apache](https://github.com/ClickHouse-Extras/orc/blob/0a936f6bbdb9303308973073f8623b5a8d82eae1/LICENSE) |
| poco | [Boost](https://github.com/ClickHouse-Extras/poco/blob/7351c4691b5d401f59e3959adfc5b4fa263b32da/LICENSE) |
| protobuf | [BSD 3-clause](https://github.com/ClickHouse-Extras/protobuf/blob/75601841d172c73ae6bf4ce8121f42b875cdbabd/LICENSE) |
| rapidjson | [MIT](https://github.com/ClickHouse-Extras/rapidjson/blob/c4ef90ccdbc21d5d5a628d08316bfd301e32d6fa/bin/jsonschema/LICENSE) |
| re2 | [BSD 3-clause](https://github.com/google/re2/blob/13ebb377c6ad763ca61d12dd6f88b1126bd0b911/LICENSE) |
| replxx | [BSD 3-clause](https://github.com/ClickHouse-Extras/replxx/blob/c81be6c68b146f15f2096b7ef80e3f21fe27004c/LICENSE.md) |
| rocksdb | [BSD 3-clause](https://github.com/ClickHouse-Extras/rocksdb/blob/b6480c69bf3ab6e298e0d019a07fd4f69029b26a/LICENSE.leveldb) |
| s2geometry | [Apache](https://github.com/ClickHouse-Extras/s2geometry/blob/20ea540d81f4575a3fc0aea585aac611bcd03ede/LICENSE) |
| sentry-native | [MIT](https://github.com/ClickHouse-Extras/sentry-native/blob/94644e92f0a3ff14bd35ed902a8622a2d15f7be4/LICENSE) |
| simdjson | [Apache](https://github.com/simdjson/simdjson/blob/8df32cea3359cb30120795da6020b3b73da01d38/LICENSE) |
| snappy | [Public Domain](https://github.com/google/snappy/blob/3f194acb57e0487531c96b97af61dcbd025a78a3/COPYING) |
| sparsehash-c11 | [BSD 3-clause](https://github.com/sparsehash/sparsehash-c11/blob/cf0bffaa456f23bc4174462a789b90f8b6f5f42f/LICENSE) |
| stats | [Apache](https://github.com/kthohr/stats/blob/b6dd459c10a88c7ea04693c007e9e35820c5d9ad/LICENSE) |
| thrift | [Apache](https://github.com/apache/thrift/blob/010ccf0a0c7023fea0f6bf4e4078ebdff7e61982/LICENSE) |
| unixodbc | [LGPL](https://github.com/ClickHouse-Extras/UnixODBC/blob/b0ad30f7f6289c12b76f04bfb9d466374bb32168/COPYING) |
| xz | [Public Domain](https://github.com/xz-mirror/xz/blob/869b9d1b4edd6df07f819d360d306251f8147353/COPYING) |
| zlib-ng | [zLib](https://github.com/ClickHouse-Extras/zlib-ng/blob/6a5e93b9007782115f7f7e5235dedc81c4f1facb/LICENSE.md) |
| zstd | [BSD](https://github.com/facebook/zstd/blob/a488ba114ec17ea1054b9057c26a046fc122b3b6/LICENSE) |
Список всех сторонних библиотек можно получить с помощью запроса:
``` sql
SELECT library_name, license_type, license_path FROM system.licenses ORDER BY library_name COLLATE 'en';
```
[Пример](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==)
## Рекомендации по добавлению сторонних библиотек и поддержанию в них пользовательских изменений {#adding-third-party-libraries}

View File

@ -377,23 +377,33 @@ INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARIT
- `s != 1`
- `NOT startsWith(s, 'test')`
### Проекции {#projections}
Проекции похожи на материализованные представления, но определяются на уровне партов. Это обеспечивает гарантии согласованности наряду с автоматическим использованием в запросах.
## Проекции {#projections}
Проекции похожи на [материализованные представления](../../../sql-reference/statements/create/view.md#materialized), но определяются на уровне кусков данных. Это обеспечивает гарантии согласованности данных наряду с автоматическим использованием в запросах.
#### Запрос {#projection-query}
Запрос проекции — это то, что определяет проекцию. Он имеет следующую грамматику:
Проекции — это экспериментальная возможность. Чтобы включить поддержку проекций, установите настройку [allow_experimental_projection_optimization](../../../operations/settings/settings.md#allow-experimental-projection-optimization) в значение `1`. См. также настройку [force_optimize_projection ](../../../operations/settings/settings.md#force-optimize-projection).
`SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]`
Проекции не поддерживаются для запросов `SELECT` с модификатором [FINAL](../../../sql-reference/statements/select/from.md#select-from-final).
Он неявно выбирает данные из родительской таблицы.
### Запрос проекции {#projection-query}
Запрос проекции — это то, что определяет проекцию. Такой запрос неявно выбирает данные из родительской таблицы.
**Синтаксис**
#### Хранение {#projection-storage}
Проекции хранятся в каталоге парта. Это похоже на хранение индексов, но используется подкаталог, в котором хранится анонимный парт таблицы MergeTree. Таблица создается запросом определения проекции. Если есть конструкция GROUP BY, то базовый механизм хранения становится AggregatedMergeTree, а все агрегатные функции преобразуются в AggregateFunction. Если есть конструкция ORDER BY, таблица MergeTree будет использовать его в качестве выражения первичного ключа. Во время процесса слияния парт проекции будет слит с помощью процедуры слияния ее хранилища. Контрольная сумма парта родительской таблицы будет включать парт проекции. Другие процедуры аналогичны индексам пропуска данных.
```sql
SELECT <column list expr> [GROUP BY] <group keys expr> [ORDER BY] <expr>
```
#### Анализ запросов {#projection-query-analysis}
1. Проверить, можно ли использовать проекцию в данном запросе, то есть, что с ней выходит тот же результат, что и с запросом к базовой таблице.
2. Выбрать наиболее подходящее совпадение, содержащее наименьшее количество гранул для чтения.
3. План запроса, который использует проекции, будет отличаться от того, который использует исходные парты. При отсутствии проекции в некоторых партах можно расширить план, чтобы «проецировать» на лету.
Проекции можно изменить или удалить с помощью запроса [ALTER](../../../sql-reference/statements/alter/projection.md).
### Хранение проекции {#projection-storage}
Проекции хранятся в каталоге куска данных. Это похоже на хранение индексов, но используется подкаталог, в котором хранится анонимный кусок таблицы `MergeTree`. Таблица создается запросом определения проекции.
Если присутствует секция `GROUP BY`, то используется движок [AggregatingMergeTree](aggregatingmergetree.md), а все агрегатные функции преобразуются в `AggregateFunction`.
Если присутствует секция `ORDER BY`, таблица `MergeTree` использует ее в качестве выражения для первичного ключа.
Во время процесса слияния кусок данных проекции объединяется с помощью процедуры слияния хранилища. Контрольная сумма куска данных родительской таблицы включает кусок данных проекции. Другие процедуры аналогичны индексам пропуска данных.
### Анализ запросов {#projection-query-analysis}
1. Проверьте, можно ли использовать проекцию в данном запросе, то есть, что с ней получается тот же результат, что и с запросом к базовой таблице.
2. Выберите наиболее подходящее совпадение, содержащее наименьшее количество гранул для чтения.
3. План запроса, который использует проекции, отличается от того, который использует исходные куски данных. Если в некоторых кусках проекции отсутствуют, можно расширить план, чтобы «проецировать» на лету.
## Конкурентный доступ к данным {#concurrent-data-access}

View File

@ -26,7 +26,7 @@ Connected to ClickHouse server version 20.13.1 revision 54442.
Клиент может быть использован в интерактивном и не интерактивном (batch) режиме.
Чтобы использовать batch режим, укажите параметр query, или отправьте данные в stdin (проверяется, что stdin - не терминал), или и то, и другое.
Аналогично HTTP интерфейсу, при использовании одновременно параметра query и отправке данных в stdin, запрос составляется из конкатенации параметра query, перевода строки, и данных в stdin. Это удобно для больших INSERT запросов.
Аналогично HTTP интерфейсу, при использовании одновременно параметра query и отправке данных в stdin, запрос составляется из конкатенации параметра query, перевода строки и данных в stdin. Это удобно для больших `INSERT` запросов.
Примеры использования клиента для вставки данных:
@ -41,17 +41,17 @@ _EOF
$ cat file.csv | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
```
В batch режиме в качестве формата данных по умолчанию используется формат TabSeparated. Формат может быть указан в секции FORMAT запроса.
В batch режиме в качестве формата данных по умолчанию используется формат `TabSeparated`. Формат может быть указан в запросе в секции `FORMAT`.
По умолчанию, в batch режиме вы можете выполнить только один запрос. Чтобы выполнить несколько запросов из «скрипта», используйте параметр multiquery. Это работает для всех запросов кроме INSERT. Результаты запросов выводятся подряд без дополнительных разделителей.
Также, при необходимости выполнить много запросов, вы можете запускать clickhouse-client на каждый запрос. Заметим, что запуск программы clickhouse-client может занимать десятки миллисекунд.
По умолчанию в batch режиме вы можете выполнить только один запрос. Чтобы выполнить несколько запросов из «скрипта», используйте параметр `-multiquery`. Это работает для всех запросов кроме `INSERT`. Результаты запросов выводятся подряд без дополнительных разделителей.
Если нужно выполнить много запросов, вы можете запускать clickhouse-client отдельно на каждый запрос. Заметим, что запуск программы clickhouse-client может занимать десятки миллисекунд.
В интерактивном режиме, вы получите командную строку, в которую можно вводить запросы.
В интерактивном режиме вы получаете командную строку, в которую можно вводить запросы.
Если не указано multiline (по умолчанию):
Чтобы выполнить запрос, нажмите Enter. Точка с запятой на конце запроса не обязательна. Чтобы ввести запрос, состоящий из нескольких строк, перед переводом строки, введите символ обратного слеша: `\` - тогда после нажатия Enter, вам предложат ввести следующую строку запроса.
Чтобы выполнить запрос, нажмите Enter. Точка с запятой на конце запроса необязательна. Чтобы ввести запрос, состоящий из нескольких строк, в конце строки поставьте символ обратного слеша `\`, тогда после нажатия Enter вы сможете ввести следующую строку запроса.
Если указано multiline (многострочный режим):
Если указан параметр `--multiline` (многострочный режим):
Чтобы выполнить запрос, завершите его точкой с запятой и нажмите Enter. Если в конце введённой строки не было точки с запятой, то вам предложат ввести следующую строчку запроса.
Исполняется только один запрос, поэтому всё, что введено после точки с запятой, игнорируется.
@ -61,20 +61,20 @@ $ cat file.csv | clickhouse-client --database=test --query="INSERT INTO test FOR
Командная строка сделана на основе readline (и history) (или libedit, или без какой-либо библиотеки, в зависимости от сборки) - то есть, в ней работают привычные сочетания клавиш, а также присутствует история.
История пишется в `~/.clickhouse-client-history`.
По умолчанию, в качестве формата, используется формат PrettyCompact (красивые таблички). Вы можете изменить формат с помощью секции FORMAT запроса, или с помощью указания `\G` на конце запроса, с помощью аргумента командной строки `--format` или `--vertical`, или с помощью конфигурационного файла клиента.
По умолчанию используется формат вывода `PrettyCompact` (он поддерживает красивый вывод таблиц). Вы можете изменить формат вывода результатов запроса следующими способами: с помощью секции `FORMAT` в запросе, указав символ `\G` в конце запроса, используя аргументы командной строки `--format` или `--vertical` или с помощью конфигурационного файла клиента.
Чтобы выйти из клиента, нажмите Ctrl+D, или наберите вместо запроса одно из: «exit», «quit», «logout», «учше», «йгше», «дщпщге», «exit;», «quit;», «logout;», «учшеж», «йгшеж», «дщпщгеж», «q», «й», «q», «Q», «:q», «й», «Й», «Жй»
Чтобы выйти из клиента, нажмите Ctrl+D или наберите вместо запроса одно из: «exit», «quit», «logout», «учше», «йгше», «дщпщге», «exit;», «quit;», «logout;», «учшеж», «йгшеж», «дщпщгеж», «q», «й», «q», «Q», «:q», «й», «Й», «Жй».
При выполнении запроса, клиент показывает:
При выполнении запроса клиент показывает:
1. Прогресс выполнение запроса, который обновляется не чаще, чем 10 раз в секунду (по умолчанию). При быстрых запросах, прогресс может не успеть отобразиться.
1. Прогресс выполнение запроса, который обновляется не чаще, чем 10 раз в секунду (по умолчанию). При быстрых запросах прогресс может не успеть отобразиться.
2. Отформатированный запрос после его парсинга - для отладки.
3. Результат в заданном формате.
4. Количество строк результата, прошедшее время, а также среднюю скорость выполнения запроса.
Вы можете прервать длинный запрос, нажав Ctrl+C. При этом вам всё равно придётся чуть-чуть подождать, пока сервер остановит запрос. На некоторых стадиях выполнения, запрос невозможно прервать. Если вы не дождётесь и нажмёте Ctrl+C второй раз, то клиент будет завершён.
Вы можете прервать длинный запрос, нажав Ctrl+C. При этом вам всё равно придётся чуть-чуть подождать, пока сервер остановит запрос. На некоторых стадиях выполнения запрос невозможно прервать. Если вы не дождётесь и нажмёте Ctrl+C второй раз, то клиент будет завершён.
Клиент командной строки позволяет передать внешние данные (внешние временные таблицы) для использования запроса. Подробнее смотрите раздел «Внешние данные для обработки запроса»
Клиент командной строки позволяет передать внешние данные (внешние временные таблицы) для выполнения запроса. Подробнее смотрите раздел «Внешние данные для обработки запроса».
### Запросы с параметрами {#cli-queries-with-parameters}
@ -84,7 +84,7 @@ $ cat file.csv | clickhouse-client --database=test --query="INSERT INTO test FOR
clickhouse-client --param_parName="[1, 2]" -q "SELECT * FROM table WHERE a = {parName:Array(UInt16)}"
```
#### Cинтаксис запроса {#cli-queries-with-parameters-syntax}
#### Синтаксис запроса {#cli-queries-with-parameters-syntax}
Отформатируйте запрос обычным способом. Представьте значения, которые вы хотите передать из параметров приложения в запрос в следующем формате:
@ -155,3 +155,29 @@ $ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="numbe
</config>
```
### Формат ID запроса {#query-id-format}
В интерактивном режиме `clickhouse-client` показывает ID для каждого запроса. По умолчанию ID выводится в таком виде:
```sql
Query id: 927f137d-00f1-4175-8914-0dd066365e96
```
Произвольный формат ID можно задать в конфигурационном файле внутри тега `query_id_formats`. ID подставляется вместо `{query_id}` в строке формата. В теге может быть перечислено несколько строк формата.
Эта возможность может быть полезна для генерации URL, с помощью которых выполняется профилирование запросов.
**Пример**
```xml
<config>
<query_id_formats>
<speedscope>http://speedscope-host/#profileURL=qp%3Fid%3D{query_id}</speedscope>
</query_id_formats>
</config>
```
Если применить приведённую выше конфигурацию, то ID запроса будет выводиться в следующем виде:
``` text
speedscope:http://speedscope-host/#profileURL=qp%3Fid%3Dc8ecc783-e753-4b38-97f1-42cddfb98b7d
```

View File

@ -6,19 +6,51 @@ toc_title: "Конфигурационные файлы"
# Конфигурационные файлы {#configuration_files}
Основной конфигурационный файл сервера - `config.xml` или `config.yaml`. Он расположен в директории `/etc/clickhouse-server/`.
ClickHouse поддерживает многофайловое управление конфигурацией. Основной конфигурационный файл сервера `/etc/clickhouse-server/config.xml` или `/etc/clickhouse-server/config.yaml`. Остальные файлы должны находиться в директории `/etc/clickhouse-server/config.d`. Обратите внимание, что конфигурационные файлы могут быть записаны в форматах XML или YAML, но смешение этих форматов в одном файле не поддерживается. Например, можно хранить основные конфигурационные файлы как `config.xml` и `users.xml`, а дополнительные файлы записать в директории `config.d` и `users.d` в формате `.yaml`.
Отдельные настройки могут быть переопределены в файлах `*.xml` и `*.conf`, а также `.yaml` (для файлов в формате YAML) из директории `config.d` рядом с конфигом.
Все XML файлы должны иметь одинаковый корневой элемент, обычно `<yandex>`. Для YAML элемент `yandex:` должен отсутствовать, так как парсер вставляет его автоматически.
У элементов этих конфигурационных файлов могут быть указаны атрибуты `replace` или `remove`.
## Переопределение {#override}
Если ни один не указан - объединить содержимое элементов рекурсивно с заменой значений совпадающих детей.
Некоторые настройки, определенные в основном конфигурационном файле, могут быть переопределены в других файлах:
Если указано `replace` - заменить весь элемент на указанный.
- У элементов этих конфигурационных файлов могут быть указаны атрибуты `replace` или `remove`.
- Если ни один атрибут не указан, сервер объединит содержимое элементов рекурсивно, заменяя совпадающие значения дочерних элементов.
- Если указан атрибут `replace`, сервер заменит весь элемент на указанный.
- Если указан атрибут `remove`, сервер удалит элемент.
Если указано `remove` - удалить элемент.
Также возможно указать атрибуты как переменные среды с помощью `from_env="VARIABLE_NAME"`:
Также в конфиге могут быть указаны «подстановки». Если у элемента присутствует атрибут `incl`, то в качестве значения будет использована соответствующая подстановка из файла. По умолчанию, путь к файлу с подстановками - `/etc/metrika.xml`. Он может быть изменён в конфигурации сервера в элементе [include_from](server-configuration-parameters/settings.md#server_configuration_parameters-include_from). Значения подстановок указываются в элементах `/yandex/имя_подстановки` этого файла. Если подстановка, заданная в `incl` отсутствует, то в лог попадает соответствующая запись. Чтобы ClickHouse не писал в лог об отсутствии подстановки, необходимо указать атрибут `optional="true"` (например, настройка [macros](server-configuration-parameters/settings.md)).
```xml
<yandex>
<macros>
<replica from_env="REPLICA" />
<layer from_env="LAYER" />
<shard from_env="SHARD" />
</macros>
</yandex>
```
## Подстановки {#substitution}
В конфигурационном файле могут быть указаны «подстановки». Если у элемента присутствует атрибут `incl`, то в качестве значения будет использована соответствующая подстановка из файла. По умолчанию путь к файлу с подстановками - `/etc/metrika.xml`. Он может быть изменён в конфигурации сервера в элементе [include_from](server-configuration-parameters/settings.md#server_configuration_parameters-include_from). Значения подстановок указываются в элементах `/yandex/имя_подстановки` этого файла. Если подстановка, заданная в `incl`, отсутствует, то делается соответствующая запись в лог. Чтобы ClickHouse фиксировал в логе отсутствие подстановки, необходимо указать атрибут `optional="true"` (например, настройки для [macros](server-configuration-parameters/settings.md#macros)).
Если нужно заменить весь элемент подстановкой, можно использовать `include` как имя элемента.
Пример подстановки XML:
```xml
<yandex>
<!-- Appends XML subtree found at `/profiles-in-zookeeper` ZK path to `<profiles>` element. -->
<profiles from_zk="/profiles-in-zookeeper" />
<users>
<!-- Replaces `include` element with the subtree found at `/users-in-zookeeper` ZK path. -->
<include from_zk="/users-in-zookeeper" />
<include from_zk="/other-users-in-zookeeper" />
</users>
</yandex>
```
Подстановки могут также выполняться из ZooKeeper. Для этого укажите у элемента атрибут `from_zk = "/path/to/node"`. Значение элемента заменится на содержимое узла `/path/to/node` в ZooKeeper. В ZooKeeper-узел также можно положить целое XML-поддерево, оно будет целиком вставлено в исходный элемент.
@ -115,3 +147,9 @@ seq:
<seq attr1="value1" attr2="value2">123</seq>
<seq attr1="value1" attr2="value2">abc</seq>
```
## Детали реализации {#implementation-details}
При старте сервера для каждого конфигурационного файла создаются файлы предобработки `file-preprocessed.xml`. Они содержат все выполненные подстановки и переопределения (эти сведения записываются просто для информации). Если в конфигурационном файле настроены подстановки ZooKeeper, но при старте сервера ZooKeeper не доступен, то сервер загружает конфигурацию из соответствующего файла предобработки.
Сервер отслеживает как изменения в конфигурационных файлах, так и файлы и узы ZooKeeper, которые были использованы при выполнении подстановок и переопределений, и на ходу перезагружает настройки для пользователей и кластеров. Это означает, что можно изменять кластеры, пользователей и их настройки без перезапуска сервера.

View File

@ -3252,3 +3252,25 @@ SETTINGS index_granularity = 8192 │
- 1 — таблица обновляется автоматически в фоновом режиме при обнаружении изменений схемы.
Значение по умолчанию: `0`.
## allow_experimental_projection_optimization {#allow-experimental-projection-optimization}
Включает или отключает поддержку [проекций](../../engines/table-engines/mergetree-family/mergetree.md#projections) при обработке запросов `SELECT`.
Возможные значения:
- 0 — Проекции не поддерживаются.
- 1 — Проекции поддерживаются.
Значение по умолчанию: `0`.
## force_optimize_projection {#force-optimize-projection}
Включает или отключает обязательное использование [проекций](../../engines/table-engines/mergetree-family/mergetree.md#projections) в запросах `SELECT`, если поддержка проекций включена (см. настройку [allow_experimental_projection_optimization](#allow-experimental-projection-optimization)).
Возможные значения:
- 0 — Проекции используются опционально.
- 1 — Проекции обязательно используются.
Значение по умолчанию: `0`.

View File

@ -9,9 +9,9 @@ toc_title: Date
Дата хранится без учёта часового пояса.
## Примеры {#examples}
**Пример**
**1.** Создание таблицы и добавление в неё данных:
Создание таблицы и добавление в неё данных:
``` sql
CREATE TABLE dt
@ -24,9 +24,6 @@ ENGINE = TinyLog;
``` sql
INSERT INTO dt Values (1546300800, 1), ('2019-01-01', 2);
```
``` sql
SELECT * FROM dt;
```
@ -37,7 +34,7 @@ SELECT * FROM dt;
└────────────┴──────────┘
```
## Смотрите также {#see-also}
**См. также**
- [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md)
- [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime)

View File

@ -0,0 +1,40 @@
---
toc_priority: 48
toc_title: Date32
---
# Date32 {#data_type-datetime32}
Дата. Поддерживается такой же диапазон дат, как для типа [Datetime64](../../sql-reference/data-types/datetime64.md). Значение хранится в четырех байтах и соответствует числу дней с 1925-01-01 по 2283-11-11.
**Пример**
Создание таблицы со столбцом типа `Date32`и добавление в нее данных:
``` sql
CREATE TABLE new
(
`timestamp` Date32,
`event_id` UInt8
)
ENGINE = TinyLog;
```
``` sql
INSERT INTO new VALUES (4102444800, 1), ('2100-01-01', 2);
SELECT * FROM new;
```
``` text
┌──timestamp─┬─event_id─┐
│ 2100-01-01 │ 1 │
│ 2100-01-01 │ 2 │
└────────────┴──────────┘
```
**См. также**
- [toDate32](../../sql-reference/functions/type-conversion-functions.md#todate32)
- [toDate32OrZero](../../sql-reference/functions/type-conversion-functions.md#todate32-or-zero)
- [toDate32OrNull](../../sql-reference/functions/type-conversion-functions.md#todate32-or-null)

View File

@ -17,7 +17,7 @@ DateTime64(precision, [timezone])
Данные хранятся в виде количества ‘тиков’, прошедших с момента начала эпохи (1970-01-01 00:00:00 UTC), в Int64. Размер тика определяется параметром precision. Дополнительно, тип `DateTime64` позволяет хранить часовой пояс, единый для всей колонки, который влияет на то, как будут отображаться значения типа `DateTime64` в текстовом виде и как будут парситься значения заданные в виде строк (2020-01-01 05:00:01.000). Часовой пояс не хранится в строках таблицы (выборки), а хранится в метаданных колонки. Подробнее см. [DateTime](datetime.md).
Поддерживаются значения от 1 января 1925 г. и до 31 декабря 2283 г.
Поддерживаются значения от 1 января 1925 г. и до 11 ноября 2283 г.
## Примеры {#examples}

View File

@ -2185,3 +2185,75 @@ defaultRoles()
- Список ролей по умолчанию.
Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
## queryID {#query-id}
Возвращает идентификатор текущего запроса. Другие параметры запроса могут быть извлечены из системной таблицы [system.query_log](../../operations/system-tables/query_log.md) через `query_id`.
В отличие от [initialQueryID](#initial-query-id), функция `queryID` может возвращать различные значения для разных шардов (см. пример).
**Синтаксис**
``` sql
queryID()
```
**Возвращаемое значение**
- Идентификатор текущего запроса.
Тип: [String](../../sql-reference/data-types/string.md)
**Пример**
Запрос:
``` sql
CREATE TABLE tmp (str String) ENGINE = Log;
INSERT INTO tmp (*) VALUES ('a');
SELECT count(DISTINCT t) FROM (SELECT queryID() AS t FROM remote('127.0.0.{1..3}', currentDatabase(), 'tmp') GROUP BY queryID());
```
Результат:
``` text
┌─count()─┐
│ 3 │
└─────────┘
```
## initialQueryID {#initial-query-id}
Возвращает идентификатор родительского запроса. Другие параметры запроса могут быть извлечены из системной таблицы [system.query_log](../../operations/system-tables/query_log.md) через `initial_query_id`.
В отличие от [queryID](#query-id), функция `initialQueryID` возвращает одинаковые значения для разных шардов (см. пример).
**Синтаксис**
``` sql
initialQueryID()
```
**Возвращаемое значение**
- Идентификатор родительского запроса.
Тип: [String](../../sql-reference/data-types/string.md)
**Пример**
Запрос:
``` sql
CREATE TABLE tmp (str String) ENGINE = Log;
INSERT INTO tmp (*) VALUES ('a');
SELECT count(DISTINCT t) FROM (SELECT initialQueryID() AS t FROM remote('127.0.0.{1..3}', currentDatabase(), 'tmp') GROUP BY queryID());
```
Результат:
``` text
┌─count()─┐
│ 1 │
└─────────┘
```

View File

@ -152,6 +152,104 @@ Cиноним: `DATE`.
## toDateTimeOrNull {#todatetimeornull}
## toDate32 {#todate32}
Конвертирует аргумент в значение типа [Date32](../../sql-reference/data-types/date32.md). Если значение выходит за границы диапазона, возвращается пограничное значение `Date32`. Если аргумент имеет тип [Date](../../sql-reference/data-types/date.md), учитываются границы типа `Date`.
**Синтаксис**
``` sql
toDate32(value)
```
**Аргументы**
- `value` — Значение даты. [String](../../sql-reference/data-types/string.md), [UInt32](../../sql-reference/data-types/int-uint.md) или [Date](../../sql-reference/data-types/date.md).
**Возвращаемое значение**
- Календарная дата.
Тип: [Date32](../../sql-reference/data-types/date32.md).
**Пример**
1. Значение находится в границах диапазона:
``` sql
SELECT toDate32('1955-01-01') AS value, toTypeName(value);
```
``` text
┌──────value─┬─toTypeName(toDate32('1925-01-01'))─┐
│ 1955-01-01 │ Date32 │
└────────────┴────────────────────────────────────┘
```
2. Значение выходит за границы диапазона:
``` sql
SELECT toDate32('1924-01-01') AS value, toTypeName(value);
```
``` text
┌──────value─┬─toTypeName(toDate32('1925-01-01'))─┐
│ 1925-01-01 │ Date32 │
└────────────┴────────────────────────────────────┘
```
3. С аргументом типа `Date`:
``` sql
SELECT toDate32(toDate('1924-01-01')) AS value, toTypeName(value);
```
``` text
┌──────value─┬─toTypeName(toDate32(toDate('1924-01-01')))─┐
│ 1970-01-01 │ Date32 │
└────────────┴────────────────────────────────────────────┘
```
## toDate32OrZero {#todate32-or-zero}
То же самое, что и [toDate32](#todate32), но возвращает минимальное значение типа [Date32](../../sql-reference/data-types/date32.md), если получен недопустимый аргумент.
**Пример**
Запрос:
``` sql
SELECT toDate32OrZero('1924-01-01'), toDate32OrZero('');
```
Результат:
``` text
┌─toDate32OrZero('1924-01-01')─┬─toDate32OrZero('')─┐
│ 1925-01-01 │ 1925-01-01 │
└──────────────────────────────┴────────────────────┘
```
## toDate32OrNull {#todate32-or-null}
То же самое, что и [toDate32](#todate32), но возвращает `NULL`, если получен недопустимый аргумент.
**Пример**
Запрос:
``` sql
SELECT toDate32OrNull('1955-01-01'), toDate32OrNull('');
```
Результат:
``` text
┌─toDate32OrNull('1955-01-01')─┬─toDate32OrNull('')─┐
│ 1955-01-01 │ ᴺᵁᴸᴸ │
└──────────────────────────────┴────────────────────┘
```
## toDecimal(32\|64\|128\|256) {#todecimal3264128}
Преобразует `value` к типу данных [Decimal](../../sql-reference/functions/type-conversion-functions.md) с точностью `S`. `value` может быть числом или строкой. Параметр `S` (scale) задаёт число десятичных знаков.

View File

@ -5,7 +5,7 @@ toc_title: PROJECTION
# Манипуляции с проекциями {#manipulations-with-projections}
Доступны следующие операции:
Доступны следующие операции с [проекциями](../../../engines/table-engines/mergetree-family/mergetree.md#projections):
- `ALTER TABLE [db].name ADD PROJECTION name AS SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]` — добавляет описание проекции в метаданные.
@ -15,7 +15,7 @@ toc_title: PROJECTION
- `ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` — удаляет файлы проекции с диска без удаления описания.
Комманды ADD, DROP и CLEAR — легковесны, поскольку они только меняют метаданные или удаляют файлы.
Команды `ADD`, `DROP` и `CLEAR` — легковесны, поскольку они только меняют метаданные или удаляют файлы.
Также команды реплицируются, синхронизируя описания проекций в метаданных с помощью ZooKeeper.

View File

@ -7,7 +7,7 @@ toc_title: Star Schema Benchmark
编译 dbgen:
``` bash
```bash
$ git clone git@github.com:vadimtk/ssb-dbgen.git
$ cd ssb-dbgen
$ make
@ -16,9 +16,9 @@ $ make
开始生成数据:
!!! warning "注意"
使用`-s 100`dbgen将生成6亿行数据(67GB), 如果使用`-s 1000`它会生成60亿行数据(这需要很多时间))
使用`-s 100`dbgen 将生成 6 亿行数据(67GB), 如果使用`-s 1000`它会生成 60 亿行数据(这需要很多时间))
``` bash
```bash
$ ./dbgen -s 1000 -T c
$ ./dbgen -s 1000 -T l
$ ./dbgen -s 1000 -T p
@ -26,9 +26,9 @@ $ ./dbgen -s 1000 -T s
$ ./dbgen -s 1000 -T d
```
在ClickHouse中创建数据表
ClickHouse 中创建数据表:
``` sql
```sql
CREATE TABLE customer
(
C_CUSTKEY UInt32,
@ -93,7 +93,7 @@ ENGINE = MergeTree ORDER BY S_SUPPKEY;
写入数据:
``` bash
```bash
$ clickhouse-client --query "INSERT INTO customer FORMAT CSV" < customer.tbl
$ clickhouse-client --query "INSERT INTO part FORMAT CSV" < part.tbl
$ clickhouse-client --query "INSERT INTO supplier FORMAT CSV" < supplier.tbl
@ -102,100 +102,267 @@ $ clickhouse-client --query "INSERT INTO lineorder FORMAT CSV" < lineorder.tbl
将`star schema`转换为`flat schema`
``` sql
SET max_memory_usage = 20000000000, allow_experimental_multiple_joins_emulation = 1;
```sql
SET max_memory_usage = 20000000000;
CREATE TABLE lineorder_flat
ENGINE = MergeTree
PARTITION BY toYear(LO_ORDERDATE)
ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS
SELECT l.*, c.*, s.*, p.*
FROM lineorder l
ANY INNER JOIN customer c ON (c.C_CUSTKEY = l.LO_CUSTKEY)
ANY INNER JOIN supplier s ON (s.S_SUPPKEY = l.LO_SUPPKEY)
ANY INNER JOIN part p ON (p.P_PARTKEY = l.LO_PARTKEY);
ALTER TABLE lineorder_flat DROP COLUMN C_CUSTKEY, DROP COLUMN S_SUPPKEY, DROP COLUMN P_PARTKEY;
SELECT
l.LO_ORDERKEY AS LO_ORDERKEY,
l.LO_LINENUMBER AS LO_LINENUMBER,
l.LO_CUSTKEY AS LO_CUSTKEY,
l.LO_PARTKEY AS LO_PARTKEY,
l.LO_SUPPKEY AS LO_SUPPKEY,
l.LO_ORDERDATE AS LO_ORDERDATE,
l.LO_ORDERPRIORITY AS LO_ORDERPRIORITY,
l.LO_SHIPPRIORITY AS LO_SHIPPRIORITY,
l.LO_QUANTITY AS LO_QUANTITY,
l.LO_EXTENDEDPRICE AS LO_EXTENDEDPRICE,
l.LO_ORDTOTALPRICE AS LO_ORDTOTALPRICE,
l.LO_DISCOUNT AS LO_DISCOUNT,
l.LO_REVENUE AS LO_REVENUE,
l.LO_SUPPLYCOST AS LO_SUPPLYCOST,
l.LO_TAX AS LO_TAX,
l.LO_COMMITDATE AS LO_COMMITDATE,
l.LO_SHIPMODE AS LO_SHIPMODE,
c.C_NAME AS C_NAME,
c.C_ADDRESS AS C_ADDRESS,
c.C_CITY AS C_CITY,
c.C_NATION AS C_NATION,
c.C_REGION AS C_REGION,
c.C_PHONE AS C_PHONE,
c.C_MKTSEGMENT AS C_MKTSEGMENT,
s.S_NAME AS S_NAME,
s.S_ADDRESS AS S_ADDRESS,
s.S_CITY AS S_CITY,
s.S_NATION AS S_NATION,
s.S_REGION AS S_REGION,
s.S_PHONE AS S_PHONE,
p.P_NAME AS P_NAME,
p.P_MFGR AS P_MFGR,
p.P_CATEGORY AS P_CATEGORY,
p.P_BRAND AS P_BRAND,
p.P_COLOR AS P_COLOR,
p.P_TYPE AS P_TYPE,
p.P_SIZE AS P_SIZE,
p.P_CONTAINER AS P_CONTAINER
FROM lineorder AS l
INNER JOIN customer AS c ON c.C_CUSTKEY = l.LO_CUSTKEY
INNER JOIN supplier AS s ON s.S_SUPPKEY = l.LO_SUPPKEY
INNER JOIN part AS p ON p.P_PARTKEY = l.LO_PARTKEY;
```
运行查询:
Q1.1
``` sql
SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat WHERE toYear(LO_ORDERDATE) = 1993 AND LO_DISCOUNT BETWEEN 1 AND 3 AND LO_QUANTITY < 25;
```sql
SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM lineorder_flat
WHERE toYear(LO_ORDERDATE) = 1993 AND LO_DISCOUNT BETWEEN 1 AND 3 AND LO_QUANTITY < 25;
```
Q1.2
``` sql
SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat WHERE toYYYYMM(LO_ORDERDATE) = 199401 AND LO_DISCOUNT BETWEEN 4 AND 6 AND LO_QUANTITY BETWEEN 26 AND 35;
```sql
SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM lineorder_flat
WHERE toYYYYMM(LO_ORDERDATE) = 199401 AND LO_DISCOUNT BETWEEN 4 AND 6 AND LO_QUANTITY BETWEEN 26 AND 35;
```
Q1.3
``` sql
SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat WHERE toISOWeek(LO_ORDERDATE) = 6 AND toYear(LO_ORDERDATE) = 1994 AND LO_DISCOUNT BETWEEN 5 AND 7 AND LO_QUANTITY BETWEEN 26 AND 35;
```sql
SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM lineorder_flat
WHERE toISOWeek(LO_ORDERDATE) = 6 AND toYear(LO_ORDERDATE) = 1994
AND LO_DISCOUNT BETWEEN 5 AND 7 AND LO_QUANTITY BETWEEN 26 AND 35;
```
Q2.1
``` sql
SELECT sum(LO_REVENUE), toYear(LO_ORDERDATE) AS year, P_BRAND FROM lineorder_flat WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA' GROUP BY year, P_BRAND ORDER BY year, P_BRAND;
```sql
SELECT
sum(LO_REVENUE),
toYear(LO_ORDERDATE) AS year,
P_BRAND
FROM lineorder_flat
WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA'
GROUP BY
year,
P_BRAND
ORDER BY
year,
P_BRAND;
```
Q2.2
``` sql
SELECT sum(LO_REVENUE), toYear(LO_ORDERDATE) AS year, P_BRAND FROM lineorder_flat WHERE P_BRAND BETWEEN 'MFGR#2221' AND 'MFGR#2228' AND S_REGION = 'ASIA' GROUP BY year, P_BRAND ORDER BY year, P_BRAND;
```sql
SELECT
sum(LO_REVENUE),
toYear(LO_ORDERDATE) AS year,
P_BRAND
FROM lineorder_flat
WHERE P_BRAND >= 'MFGR#2221' AND P_BRAND <= 'MFGR#2228' AND S_REGION = 'ASIA'
GROUP BY
year,
P_BRAND
ORDER BY
year,
P_BRAND;
```
Q2.3
``` sql
SELECT sum(LO_REVENUE), toYear(LO_ORDERDATE) AS year, P_BRAND FROM lineorder_flat WHERE P_BRAND = 'MFGR#2239' AND S_REGION = 'EUROPE' GROUP BY year, P_BRAND ORDER BY year, P_BRAND;
```sql
SELECT
sum(LO_REVENUE),
toYear(LO_ORDERDATE) AS year,
P_BRAND
FROM lineorder_flat
WHERE P_BRAND = 'MFGR#2239' AND S_REGION = 'EUROPE'
GROUP BY
year,
P_BRAND
ORDER BY
year,
P_BRAND;
```
Q3.1
``` sql
SELECT C_NATION, S_NATION, toYear(LO_ORDERDATE) AS year, sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE C_REGION = 'ASIA' AND S_REGION = 'ASIA' AND year >= 1992 AND year <= 1997 GROUP BY C_NATION, S_NATION, year ORDER BY year asc, revenue desc;
```sql
SELECT
C_NATION,
S_NATION,
toYear(LO_ORDERDATE) AS year,
sum(LO_REVENUE) AS revenue
FROM lineorder_flat
WHERE C_REGION = 'ASIA' AND S_REGION = 'ASIA' AND year >= 1992 AND year <= 1997
GROUP BY
C_NATION,
S_NATION,
year
ORDER BY
year ASC,
revenue DESC;
```
Q3.2
``` sql
SELECT C_CITY, S_CITY, toYear(LO_ORDERDATE) AS year, sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE C_NATION = 'UNITED STATES' AND S_NATION = 'UNITED STATES' AND year >= 1992 AND year <= 1997 GROUP BY C_CITY, S_CITY, year ORDER BY year asc, revenue desc;
```sql
SELECT
C_CITY,
S_CITY,
toYear(LO_ORDERDATE) AS year,
sum(LO_REVENUE) AS revenue
FROM lineorder_flat
WHERE C_NATION = 'UNITED STATES' AND S_NATION = 'UNITED STATES' AND year >= 1992 AND year <= 1997
GROUP BY
C_CITY,
S_CITY,
year
ORDER BY
year ASC,
revenue DESC;
```
Q3.3
``` sql
SELECT C_CITY, S_CITY, toYear(LO_ORDERDATE) AS year, sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE (C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5') AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5') AND year >= 1992 AND year <= 1997 GROUP BY C_CITY, S_CITY, year ORDER BY year asc, revenue desc;
```sql
SELECT
C_CITY,
S_CITY,
toYear(LO_ORDERDATE) AS year,
sum(LO_REVENUE) AS revenue
FROM lineorder_flat
WHERE (C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5') AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5') AND year >= 1992 AND year <= 1997
GROUP BY
C_CITY,
S_CITY,
year
ORDER BY
year ASC,
revenue DESC;
```
Q3.4
``` sql
SELECT C_CITY, S_CITY, toYear(LO_ORDERDATE) AS year, sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE (C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5') AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5') AND toYYYYMM(LO_ORDERDATE) = '199712' GROUP BY C_CITY, S_CITY, year ORDER BY year asc, revenue desc;
```sql
SELECT
C_CITY,
S_CITY,
toYear(LO_ORDERDATE) AS year,
sum(LO_REVENUE) AS revenue
FROM lineorder_flat
WHERE (C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5') AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5') AND toYYYYMM(LO_ORDERDATE) = 199712
GROUP BY
C_CITY,
S_CITY,
year
ORDER BY
year ASC,
revenue DESC;
```
Q4.1
``` sql
SELECT toYear(LO_ORDERDATE) AS year, C_NATION, sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') GROUP BY year, C_NATION ORDER BY year, C_NATION;
```sql
SELECT
toYear(LO_ORDERDATE) AS year,
C_NATION,
sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
FROM lineorder_flat
WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2')
GROUP BY
year,
C_NATION
ORDER BY
year ASC,
C_NATION ASC;
```
Q4.2
``` sql
SELECT toYear(LO_ORDERDATE) AS year, S_NATION, P_CATEGORY, sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (year = 1997 OR year = 1998) AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') GROUP BY year, S_NATION, P_CATEGORY ORDER BY year, S_NATION, P_CATEGORY;
```sql
SELECT
toYear(LO_ORDERDATE) AS year,
S_NATION,
P_CATEGORY,
sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
FROM lineorder_flat
WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (year = 1997 OR year = 1998) AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2')
GROUP BY
year,
S_NATION,
P_CATEGORY
ORDER BY
year ASC,
S_NATION ASC,
P_CATEGORY ASC;
```
Q4.3
``` sql
SELECT toYear(LO_ORDERDATE) AS year, S_CITY, P_BRAND, sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE S_NATION = 'UNITED STATES' AND (year = 1997 OR year = 1998) AND P_CATEGORY = 'MFGR#14' GROUP BY year, S_CITY, P_BRAND ORDER BY year, S_CITY, P_BRAND;
```sql
SELECT
toYear(LO_ORDERDATE) AS year,
S_CITY,
P_BRAND,
sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
FROM lineorder_flat
WHERE S_NATION = 'UNITED STATES' AND (year = 1997 OR year = 1998) AND P_CATEGORY = 'MFGR#14'
GROUP BY
year,
S_CITY,
P_BRAND
ORDER BY
year ASC,
S_CITY ASC,
P_BRAND ASC;
```
[原始文章](https://clickhouse.tech/docs/en/getting_started/example_datasets/star_schema/) <!--hide-->

View File

@ -520,7 +520,7 @@ WHERE (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403) AND (domain(StartU
ClickHouse集群是一个同质集群。 设置步骤:
1. 在群集的所有机器上安装ClickHouse服务端
2. 在配置文件中设置集配置
2. 在配置文件中设置集配置
3. 在每个实例上创建本地表
4. 创建一个[分布式表](../engines/table-engines/special/distributed.md)

View File

@ -300,9 +300,9 @@ int Keeper::main(const std::vector<std::string> & /*args*/)
if (config().has("keeper_server.storage_path"))
path = config().getString("keeper_server.storage_path");
else if (config().has("keeper_server.log_storage_path"))
path = config().getString("keeper_server.log_storage_path");
path = std::filesystem::path(config().getString("keeper_server.log_storage_path")).parent_path();
else if (config().has("keeper_server.snapshot_storage_path"))
path = config().getString("keeper_server.snapshot_storage_path");
path = std::filesystem::path(config().getString("keeper_server.snapshot_storage_path")).parent_path();
else
path = std::filesystem::path{KEEPER_DEFAULT_PATH};
@ -359,7 +359,7 @@ int Keeper::main(const std::vector<std::string> & /*args*/)
auto servers = std::make_shared<std::vector<ProtocolServerAdapter>>();
/// Initialize test keeper RAFT. Do nothing if no nu_keeper_server in config.
global_context->initializeKeeperStorageDispatcher();
global_context->initializeKeeperDispatcher();
for (const auto & listen_host : listen_hosts)
{
/// TCP Keeper
@ -428,7 +428,7 @@ int Keeper::main(const std::vector<std::string> & /*args*/)
else
LOG_INFO(log, "Closed connections to Keeper.");
global_context->shutdownKeeperStorageDispatcher();
global_context->shutdownKeeperDispatcher();
/// Wait server pool to avoid use-after-free of destroyed context in the handlers
server_pool.joinAll();

View File

@ -12,6 +12,7 @@
#include <Interpreters/executeQuery.h>
#include <Interpreters/loadMetadata.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/UserDefinedObjectsLoader.h>
#include <Interpreters/Session.h>
#include <Common/Exception.h>
#include <Common/Macros.h>
@ -270,6 +271,9 @@ try
/// Load global settings from default_profile and system_profile.
global_context->setDefaultProfiles(config());
/// We load temporary database first, because projections need it.
DatabaseCatalog::instance().initializeAndLoadTemporaryDatabase();
/** Init dummy default DB
* NOTE: We force using isolated default database to avoid conflicts with default database from server environment
* Otherwise, metadata of temporary File(format, EXPLICIT_PATH) tables will pollute metadata/ directory;
@ -287,6 +291,12 @@ try
/// Lock path directory before read
status.emplace(path + "status", StatusFile::write_full_info);
fs::create_directories(fs::path(path) / "user_defined/");
LOG_DEBUG(log, "Loading user defined objects from {}", path);
Poco::File(path + "user_defined/").createDirectories();
UserDefinedObjectsLoader::instance().loadObjects(global_context);
LOG_DEBUG(log, "Loaded user defined objects.");
LOG_DEBUG(log, "Loading metadata from {}", path);
fs::create_directories(fs::path(path) / "data/");
fs::create_directories(fs::path(path) / "metadata/");

View File

@ -5,40 +5,16 @@
#include <Core/Field.h>
#include <common/LocalDate.h>
#include <common/LocalDateTime.h>
#include <Parsers/ASTInsertQuery.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTIdentifier.h>
#include "getIdentifierQuote.h"
#include <IO/WriteHelpers.h>
#include <IO/Operators.h>
#include <Formats/FormatFactory.h>
#include <Parsers/getInsertQuery.h>
namespace DB
{
namespace
{
using ValueType = ExternalResultDescription::ValueType;
std::string getInsertQuery(const std::string & db_name, const std::string & table_name, const ColumnsWithTypeAndName & columns, IdentifierQuotingStyle quoting)
{
ASTInsertQuery query;
query.table_id.database_name = db_name;
query.table_id.table_name = table_name;
query.columns = std::make_shared<ASTExpressionList>(',');
query.children.push_back(query.columns);
for (const auto & column : columns)
query.columns->children.emplace_back(std::make_shared<ASTIdentifier>(column.name));
WriteBufferFromOwnString buf;
IAST::FormatSettings settings(buf, true);
settings.always_quote_identifiers = true;
settings.identifier_quoting_style = quoting;
query.IAST::format(settings);
return buf.str();
}
}
ODBCBlockOutputStream::ODBCBlockOutputStream(nanodbc::ConnectionHolderPtr connection_holder_,
const std::string & remote_database_name_,

View File

@ -13,6 +13,7 @@ namespace DB
class ODBCBlockOutputStream : public IBlockOutputStream
{
using ValueType = ExternalResultDescription::ValueType;
public:
ODBCBlockOutputStream(

View File

@ -53,6 +53,7 @@
#include <Interpreters/DNSCacheUpdater.h>
#include <Interpreters/ExternalLoaderXMLConfigRepository.h>
#include <Interpreters/InterserverCredentials.h>
#include <Interpreters/UserDefinedObjectsLoader.h>
#include <Interpreters/JIT/CompiledExpressionCache.h>
#include <Access/AccessControlManager.h>
#include <Storages/StorageReplicatedMergeTree.h>
@ -774,6 +775,7 @@ if (ThreadFuzzer::instance().isEffective())
{
fs::create_directories(path / "data/");
fs::create_directories(path / "metadata/");
fs::create_directories(path / "user_defined/");
/// Directory with metadata of tables, which was marked as dropped by Atomic database
fs::create_directories(path / "metadata_dropped/");
@ -994,7 +996,7 @@ if (ThreadFuzzer::instance().isEffective())
{
#if USE_NURAFT
/// Initialize test keeper RAFT. Do nothing if no nu_keeper_server in config.
global_context->initializeKeeperStorageDispatcher();
global_context->initializeKeeperDispatcher();
for (const auto & listen_host : listen_hosts)
{
/// TCP Keeper
@ -1077,7 +1079,7 @@ if (ThreadFuzzer::instance().isEffective())
else
LOG_INFO(log, "Closed connections to servers for tables.");
global_context->shutdownKeeperStorageDispatcher();
global_context->shutdownKeeperDispatcher();
}
/// Wait server pool to avoid use-after-free of destroyed context in the handlers
@ -1098,19 +1100,31 @@ if (ThreadFuzzer::instance().isEffective())
/// system logs may copy global context.
global_context->setCurrentDatabaseNameInGlobalContext(default_database);
LOG_INFO(log, "Loading user defined objects from {}", path_str);
try
{
UserDefinedObjectsLoader::instance().loadObjects(global_context);
}
catch (...)
{
tryLogCurrentException(log, "Caught exception while loading user defined objects");
throw;
}
LOG_DEBUG(log, "Loaded user defined objects");
LOG_INFO(log, "Loading metadata from {}", path_str);
try
{
auto & database_catalog = DatabaseCatalog::instance();
/// We load temporary database first, because projections need it.
database_catalog.initializeAndLoadTemporaryDatabase();
loadMetadataSystem(global_context);
/// After attaching system databases we can initialize system log.
global_context->initializeSystemLogs();
global_context->setSystemZooKeeperLogAfterInitializationIfNeeded();
auto & database_catalog = DatabaseCatalog::instance();
/// After the system database is created, attach virtual system tables (in addition to query_log and part_log)
attachSystemTablesServer(*database_catalog.getSystemDatabase(), has_zookeeper);
/// We load temporary database first, because projections need it.
database_catalog.initializeAndLoadTemporaryDatabase();
/// Then, load remaining databases
loadMetadata(global_context, default_database);
database_catalog.loadDatabases();

View File

@ -60,9 +60,6 @@ then
elif [[ "$SANITIZER" == "thread" ]]; then VERSION_POSTFIX+="+tsan"
elif [[ "$SANITIZER" == "memory" ]]; then VERSION_POSTFIX+="+msan"
elif [[ "$SANITIZER" == "undefined" ]]; then VERSION_POSTFIX+="+ubsan"
elif [[ "$SANITIZER" == "libfuzzer" ]]; then
VERSION_POSTFIX+="+libfuzzer"
MALLOC_OPTS="-DENABLE_TCMALLOC=0 -DENABLE_JEMALLOC=0"
else
echo "Unknown value of SANITIZER variable: $SANITIZER"
exit 3

View File

@ -87,6 +87,7 @@ enum class AccessType
M(CREATE_DICTIONARY, "", DICTIONARY, CREATE) /* allows to execute {CREATE|ATTACH} DICTIONARY */\
M(CREATE_TEMPORARY_TABLE, "", GLOBAL, CREATE) /* allows to create and manipulate temporary tables;
implicitly enabled by the grant CREATE_TABLE on any table */ \
M(CREATE_FUNCTION, "", DATABASE, CREATE) /* allows to execute CREATE FUNCTION */ \
M(CREATE, "", GROUP, ALL) /* allows to execute {CREATE|ATTACH} */ \
\
M(DROP_DATABASE, "", DATABASE, DROP) /* allows to execute {DROP|DETACH} DATABASE */\
@ -94,6 +95,7 @@ enum class AccessType
M(DROP_VIEW, "", VIEW, DROP) /* allows to execute {DROP|DETACH} TABLE for views;
implicitly enabled by the grant DROP_TABLE */\
M(DROP_DICTIONARY, "", DICTIONARY, DROP) /* allows to execute {DROP|DETACH} DICTIONARY */\
M(DROP_FUNCTION, "", DATABASE, DROP) /* allows to execute DROP FUNCTION */\
M(DROP, "", GROUP, ALL) /* allows to execute {DROP|DETACH} */\
\
M(TRUNCATE, "TRUNCATE TABLE", TABLE, ALL) \

View File

@ -45,7 +45,7 @@ TEST(AccessRights, Union)
lhs.grant(AccessType::INSERT);
rhs.grant(AccessType::ALL, "db1");
lhs.makeUnion(rhs);
ASSERT_EQ(lhs.toString(), "GRANT INSERT ON *.*, GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, DROP, TRUNCATE, OPTIMIZE, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*");
ASSERT_EQ(lhs.toString(), "GRANT INSERT ON *.*, GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, CREATE FUNCTION, DROP, TRUNCATE, OPTIMIZE, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*");
}

View File

@ -301,7 +301,7 @@ size_t ColumnUnique<ColumnType>::getNullValueIndex() const
template <typename ColumnType>
size_t ColumnUnique<ColumnType>::uniqueInsert(const Field & x)
{
if (x.getType() == Field::Types::Null)
if (x.isNull())
return getNullValueIndex();
if (valuesHaveFixedSize())

View File

@ -578,6 +578,12 @@
M(607, BACKUP_ELEMENT_DUPLICATE) \
M(608, CANNOT_RESTORE_TABLE) \
\
M(598, FUNCTION_ALREADY_EXISTS) \
M(599, CANNOT_DROP_SYSTEM_FUNCTION) \
M(600, CANNOT_CREATE_RECURSIVE_FUNCTION) \
M(601, OBJECT_ALREADY_STORED_ON_DISK) \
M(602, OBJECT_WAS_NOT_STORED_ON_DISK) \
\
M(998, POSTGRESQL_CONNECTION_FAILURE) \
M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \

View File

@ -64,7 +64,8 @@ size_t TLDListsHolder::parseAndAddTldList(const std::string & name, const std::s
while (!in.eof())
{
readEscapedStringUntilEOL(line, in);
++in.position();
if (!in.eof())
++in.position();
/// Skip comments
if (line.size() > 2 && line[0] == '/' && line[1] == '/')
continue;

View File

@ -74,17 +74,24 @@ void TimerDescriptor::drain() const
}
}
void TimerDescriptor::setRelative(Poco::Timespan timespan) const
void TimerDescriptor::setRelative(uint64_t usec) const
{
static constexpr uint32_t TIMER_PRECISION = 1e6;
itimerspec spec;
spec.it_interval.tv_nsec = 0;
spec.it_interval.tv_sec = 0;
spec.it_value.tv_sec = timespan.totalSeconds();
spec.it_value.tv_nsec = timespan.useconds() * 1000;
spec.it_value.tv_sec = usec / TIMER_PRECISION;
spec.it_value.tv_nsec = (usec % TIMER_PRECISION) * 1'000;
if (-1 == timerfd_settime(timer_fd, 0 /*relative timer */, &spec, nullptr))
throwFromErrno("Cannot set time for timer_fd", ErrorCodes::CANNOT_SET_TIMER_PERIOD);
}
void TimerDescriptor::setRelative(Poco::Timespan timespan) const
{
setRelative(timespan.totalMicroseconds());
}
}
#endif

View File

@ -24,6 +24,7 @@ public:
void reset() const;
void drain() const;
void setRelative(uint64_t usec) const;
void setRelative(Poco::Timespan timespan) const;
};

View File

@ -253,4 +253,3 @@ CompressedReadBufferBase::~CompressedReadBufferBase() = default; /// Proper d
}

View File

@ -82,8 +82,10 @@ void compressDataForType(const char * source, UInt32 source_size, char * dest)
}
template <typename T>
void decompressDataForType(const char * source, UInt32 source_size, char * dest)
void decompressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 output_size)
{
const char * output_end = dest + output_size;
if (source_size % sizeof(T) != 0)
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot delta decompress, data size {} is not aligned to {}", source_size, sizeof(T));
@ -92,6 +94,8 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
while (source < source_end)
{
accumulator += unalignedLoad<T>(source);
if (dest + sizeof(accumulator) > output_end)
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress the data");
unalignedStore<T>(dest, accumulator);
source += sizeof(T);
@ -137,6 +141,7 @@ void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
UInt8 bytes_to_skip = uncompressed_size % bytes_size;
UInt32 output_size = uncompressed_size - bytes_to_skip;
if (UInt32(2 + bytes_to_skip) > source_size)
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
@ -146,16 +151,16 @@ void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_
switch (bytes_size)
{
case 1:
decompressDataForType<UInt8>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
decompressDataForType<UInt8>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size);
break;
case 2:
decompressDataForType<UInt16>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
decompressDataForType<UInt16>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size);
break;
case 4:
decompressDataForType<UInt32>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
decompressDataForType<UInt32>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size);
break;
case 8:
decompressDataForType<UInt64>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
decompressDataForType<UInt64>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size);
break;
}
}
@ -209,4 +214,10 @@ void registerCodecDelta(CompressionCodecFactory & factory)
return std::make_shared<CompressionCodecDelta>(delta_bytes_size);
});
}
CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size)
{
return std::make_shared<CompressionCodecDelta>(delta_bytes_size);
}
}

View File

@ -353,12 +353,13 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
}
template <typename ValueType>
void decompressDataForType(const char * source, UInt32 source_size, char * dest)
void decompressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 output_size)
{
static_assert(is_unsigned_v<ValueType>, "ValueType must be unsigned.");
using UnsignedDeltaType = ValueType;
const char * source_end = source + source_size;
const char * output_end = dest + output_size;
if (source + sizeof(UInt32) > source_end)
return;
@ -374,6 +375,8 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
return;
prev_value = unalignedLoad<ValueType>(source);
if (dest + sizeof(prev_value) > output_end)
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress the data");
unalignedStore<ValueType>(dest, prev_value);
source += sizeof(prev_value);
@ -385,6 +388,8 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
prev_delta = unalignedLoad<UnsignedDeltaType>(source);
prev_value = prev_value + static_cast<ValueType>(prev_delta);
if (dest + sizeof(prev_value) > output_end)
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress the data");
unalignedStore<ValueType>(dest, prev_value);
source += sizeof(prev_delta);
@ -416,6 +421,8 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
const UnsignedDeltaType delta = double_delta + prev_delta;
const ValueType curr_value = prev_value + delta;
if (dest + sizeof(curr_value) > output_end)
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress the data");
unalignedStore<ValueType>(dest, curr_value);
dest += sizeof(curr_value);
@ -507,6 +514,7 @@ void CompressionCodecDoubleDelta::doDecompressData(const char * source, UInt32 s
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
UInt8 bytes_to_skip = uncompressed_size % bytes_size;
UInt32 output_size = uncompressed_size - bytes_to_skip;
if (UInt32(2 + bytes_to_skip) > source_size)
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
@ -516,16 +524,16 @@ void CompressionCodecDoubleDelta::doDecompressData(const char * source, UInt32 s
switch (bytes_size)
{
case 1:
decompressDataForType<UInt8>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
decompressDataForType<UInt8>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size);
break;
case 2:
decompressDataForType<UInt16>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
decompressDataForType<UInt16>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size);
break;
case 4:
decompressDataForType<UInt32>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
decompressDataForType<UInt32>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size);
break;
case 8:
decompressDataForType<UInt64>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
decompressDataForType<UInt64>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size);
break;
}
}
@ -543,4 +551,10 @@ void registerCodecDoubleDelta(CompressionCodecFactory & factory)
return std::make_shared<CompressionCodecDoubleDelta>(data_bytes_size);
});
}
CompressionCodecPtr getCompressionCodecDoubleDelta(UInt8 data_bytes_size)
{
return std::make_shared<CompressionCodecDoubleDelta>(data_bytes_size);
}
}

View File

@ -51,7 +51,7 @@ namespace DB
*/
static void setMasterKey(const std::string_view & master_key);
CompressionCodecEncrypted(const std::string_view & cipher);
explicit CompressionCodecEncrypted(const std::string_view & cipher);
uint8_t getMethodByte() const override;
void updateHash(SipHash & hash) const override;
@ -88,7 +88,7 @@ namespace DB
*/
struct KeyHolder : private boost::noncopyable
{
KeyHolder(const std::string_view & master_key);
explicit KeyHolder(const std::string_view & master_key);
~KeyHolder();
std::string keygen_key;
@ -99,6 +99,11 @@ namespace DB
static inline std::optional<KeyHolder> keys;
};
inline CompressionCodecPtr getCompressionCodecEncrypted(const std::string_view & master_key)
{
return std::make_shared<CompressionCodecEncrypted>(master_key);
}
}
#endif /* USE_SSL && USE_INTERNAL_SSL_LIBRARY */

View File

@ -147,4 +147,10 @@ CompressionCodecLZ4HC::CompressionCodecLZ4HC(int level_)
setCodecDescription("LZ4HC", {std::make_shared<ASTLiteral>(static_cast<UInt64>(level))});
}
CompressionCodecPtr getCompressionCodecLZ4(int level)
{
return std::make_shared<CompressionCodecLZ4HC>(level);
}
}

View File

@ -9,7 +9,7 @@ class CompressionCodecMultiple final : public ICompressionCodec
{
public:
CompressionCodecMultiple() = default; /// Need for CompressionFactory to register codec by method byte.
CompressionCodecMultiple(Codecs codecs_);
explicit CompressionCodecMultiple(Codecs codecs_);
uint8_t getMethodByte() const override;

View File

@ -156,4 +156,9 @@ void registerCodecZSTD(CompressionCodecFactory & factory)
});
}
CompressionCodecPtr getCompressionCodecZSTD(int level)
{
return std::make_shared<CompressionCodecZSTD>(level);
}
}

View File

@ -18,6 +18,8 @@ using Codecs = std::vector<CompressionCodecPtr>;
class IDataType;
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size);
/**
* Represents interface for compression codecs like LZ4, ZSTD, etc.
*/
@ -84,6 +86,8 @@ public:
virtual bool isNone() const { return false; }
protected:
/// This is used for fuzz testing
friend int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size);
/// Return size of compressed data without header
virtual UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const { return uncompressed_size; }

View File

@ -450,7 +450,11 @@ bool NO_INLINE decompressImpl(
const unsigned token = *ip++;
length = token >> 4;
if (length == 0x0F)
{
if (unlikely(ip + 1 >= input_end))
return false;
continue_read_length();
}
/// Copy literals.
@ -470,6 +474,20 @@ bool NO_INLINE decompressImpl(
if (unlikely(copy_end > output_end))
return false;
// Due to implementation specifics the copy length is always a multiple of copy_amount
size_t real_length = 0;
static_assert(copy_amount == 8 || copy_amount == 16 || copy_amount == 32);
if constexpr (copy_amount == 8)
real_length = (((length >> 3) + 1) * 8);
else if constexpr (copy_amount == 16)
real_length = (((length >> 4) + 1) * 16);
else if constexpr (copy_amount == 32)
real_length = (((length >> 5) + 1) * 32);
if (unlikely(ip + real_length >= input_end + ADDITIONAL_BYTES_AT_END_OF_BUFFER))
return false;
wildCopy<copy_amount>(op, ip, copy_end); /// Here we can write up to copy_amount - 1 bytes after buffer.
if (copy_end == output_end)
@ -494,7 +512,11 @@ bool NO_INLINE decompressImpl(
length = token & 0x0F;
if (length == 0x0F)
{
if (unlikely(ip + 1 >= input_end))
return false;
continue_read_length();
}
length += 4;
/// Copy match within block, that produce overlapping pattern. Match may replicate itself.

View File

@ -1,2 +1,20 @@
# Our code has strong cohesion and target associated with `Compression` also depends on `DataTypes`.
# But we can exclude some files which have dependencies in case of
# fuzzer related build (we are interested in fuzzing only particular part of our code).
# So, some symbols will be declared, but not defined. Unfortunately, this trick doesn't work with UBSan.
# If you want really small size of the resulted binary, just link with fuzz_compression and clickhouse_common_io
add_executable (compressed_buffer_fuzzer compressed_buffer_fuzzer.cpp)
target_link_libraries (compressed_buffer_fuzzer PRIVATE fuzz_compression clickhouse_common_io ${LIB_FUZZING_ENGINE})
target_link_libraries (compressed_buffer_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE})
add_executable (lz4_decompress_fuzzer lz4_decompress_fuzzer.cpp)
target_link_libraries (lz4_decompress_fuzzer PUBLIC dbms lz4 ${LIB_FUZZING_ENGINE})
add_executable (delta_decompress_fuzzer delta_decompress_fuzzer.cpp)
target_link_libraries (delta_decompress_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE})
add_executable (double_delta_decompress_fuzzer double_delta_decompress_fuzzer.cpp)
target_link_libraries (double_delta_decompress_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE})
add_executable (encrypted_decompress_fuzzer encrypted_decompress_fuzzer.cpp)
target_link_libraries (encrypted_decompress_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE})

View File

@ -17,6 +17,5 @@ try
}
catch (...)
{
std::cerr << DB::getCurrentExceptionMessage(true) << std::endl;
return 1;
}

View File

@ -0,0 +1,44 @@
#include <iostream>
#include <string>
#include <Compression/ICompressionCodec.h>
#include <IO/BufferWithOwnMemory.h>
namespace DB
{
CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size);
}
struct AuxiliaryRandomData
{
UInt8 delta_size_bytes;
size_t decompressed_size;
};
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
try
{
if (size < sizeof(AuxiliaryRandomData))
return 0;
const auto * p = reinterpret_cast<const AuxiliaryRandomData *>(data);
auto codec = DB::getCompressionCodecDelta(p->delta_size_bytes);
size_t output_buffer_size = p->decompressed_size % 65536;
size -= sizeof(AuxiliaryRandomData);
data += sizeof(AuxiliaryRandomData) / sizeof(uint8_t);
// std::string input = std::string(reinterpret_cast<const char*>(data), size);
// fmt::print(stderr, "Using input {} of size {}, output size is {}. \n", input, size, output_buffer_size);
DB::Memory<> memory;
memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer());
codec->doDecompressData(reinterpret_cast<const char *>(data), size, memory.data(), output_buffer_size);
return 0;
}
catch (...)
{
return 1;
}

View File

@ -0,0 +1,44 @@
#include <iostream>
#include <string>
#include <Compression/ICompressionCodec.h>
#include <IO/BufferWithOwnMemory.h>
namespace DB
{
CompressionCodecPtr getCompressionCodecDoubleDelta(UInt8 data_bytes_size);
}
struct AuxiliaryRandomData
{
UInt8 data_bytes_size;
size_t decompressed_size;
};
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
try
{
if (size < sizeof(AuxiliaryRandomData))
return 0;
const auto * p = reinterpret_cast<const AuxiliaryRandomData *>(data);
auto codec = DB::getCompressionCodecDoubleDelta(p->data_bytes_size);
size_t output_buffer_size = p->decompressed_size % 65536;
size -= sizeof(AuxiliaryRandomData);
data += sizeof(AuxiliaryRandomData) / sizeof(uint8_t);
// std::string input = std::string(reinterpret_cast<const char*>(data), size);
// fmt::print(stderr, "Using input {} of size {}, output size is {}. \n", input, size, output_buffer_size);
DB::Memory<> memory;
memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer());
codec->doDecompressData(reinterpret_cast<const char *>(data), size, memory.data(), output_buffer_size);
return 0;
}
catch (...)
{
return 1;
}

View File

@ -0,0 +1,52 @@
#include <iostream>
#include <string>
#include <Compression/ICompressionCodec.h>
#include <Compression/CompressionCodecEncrypted.h>
#include <IO/BufferWithOwnMemory.h>
namespace DB
{
CompressionCodecPtr getCompressionCodecEncrypted(const std::string_view & master_key);
}
constexpr size_t key_size = 20;
struct AuxiliaryRandomData
{
char key[key_size];
size_t decompressed_size;
};
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
try
{
if (size < sizeof(AuxiliaryRandomData))
return 0;
const auto * p = reinterpret_cast<const AuxiliaryRandomData *>(data);
std::string key = std::string(p->key, key_size);
auto codec = DB::getCompressionCodecEncrypted(key);
size_t output_buffer_size = p->decompressed_size % 65536;
size -= sizeof(AuxiliaryRandomData);
data += sizeof(AuxiliaryRandomData) / sizeof(uint8_t);
std::string input = std::string(reinterpret_cast<const char*>(data), size);
fmt::print(stderr, "Using input {} of size {}, output size is {}. \n", input, size, output_buffer_size);
if (output_buffer_size < size)
return 0;
DB::Memory<> memory;
memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer());
codec->doDecompressData(reinterpret_cast<const char *>(data), size, memory.data(), output_buffer_size);
return 0;
}
catch (...)
{
return 1;
}

View File

@ -0,0 +1,47 @@
#include <iostream>
#include <string>
#include <Compression/ICompressionCodec.h>
#include <IO/BufferWithOwnMemory.h>
#include <Compression/LZ4_decompress_faster.h>
namespace DB
{
CompressionCodecPtr getCompressionCodecLZ4(int level);
}
struct AuxiliaryRandomData
{
size_t level;
size_t decompressed_size;
};
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
try
{
if (size < sizeof(AuxiliaryRandomData) + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER)
return 0;
const auto * p = reinterpret_cast<const AuxiliaryRandomData *>(data);
auto codec = DB::getCompressionCodecLZ4(p->level);
size_t output_buffer_size = p->decompressed_size % 65536;
size -= sizeof(AuxiliaryRandomData);
size -= LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER;
data += sizeof(AuxiliaryRandomData) / sizeof(uint8_t);
// std::string input = std::string(reinterpret_cast<const char*>(data), size);
// fmt::print(stderr, "Using input {} of size {}, output size is {}. \n", input, size, output_buffer_size);
DB::Memory<> memory;
memory.resize(output_buffer_size + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER);
codec->doDecompressData(reinterpret_cast<const char *>(data), size, memory.data(), output_buffer_size);
return 0;
}
catch (...)
{
return 1;
}

View File

@ -165,10 +165,11 @@ public:
while (!read_buf.eof())
{
result.last_position = read_buf.count();
/// Read checksum
Checksum record_checksum;
readIntBinary(record_checksum, read_buf);
/// Initialization is required, otherwise checksums may fail
/// Read header
ChangelogRecord record;
readIntBinary(record.header.version, read_buf);
readIntBinary(record.header.index, read_buf);
@ -179,6 +180,7 @@ public:
if (record.header.version > CURRENT_CHANGELOG_VERSION)
throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unsupported changelog version {} on path {}", record.header.version, filepath);
/// Read data
if (record.header.blob_size != 0)
{
auto buffer = nuraft::buffer::alloc(record.header.blob_size);
@ -189,11 +191,13 @@ public:
else
record.blob = nullptr;
/// Check changelog integrity
if (previous_index != 0 && previous_index + 1 != record.header.index)
throw Exception(ErrorCodes::CORRUPTED_DATA, "Previous log entry {}, next log entry {}, seems like some entries skipped", previous_index, record.header.index);
previous_index = record.header.index;
/// Compare checksums
Checksum checksum = computeRecordChecksum(record);
if (checksum != record_checksum)
{
@ -202,22 +206,25 @@ public:
filepath, record.header.version, record.header.index, record.header.blob_size);
}
/// Check for duplicated changelog ids
if (logs.count(record.header.index) != 0)
throw Exception(ErrorCodes::CORRUPTED_DATA, "Duplicated index id {} in log {}", record.header.index, filepath);
result.entries_read += 1;
/// Read but skip this entry because our state is already more fresh
if (record.header.index < start_log_index)
{
continue;
}
/// Create log entry for read data
auto log_entry = nuraft::cs_new<nuraft::log_entry>(record.header.term, record.blob, record.header.value_type);
if (result.first_read_index == 0)
result.first_read_index = record.header.index;
/// Put it into in memory structure
logs.emplace(record.header.index, log_entry);
index_to_offset[record.header.index] = result.last_position;
if (result.entries_read % 50000 == 0)
LOG_TRACE(log, "Reading changelog from path {}, entries {}", filepath, result.entries_read);
}
@ -235,6 +242,7 @@ public:
result.error = true;
tryLogCurrentException(log);
}
LOG_TRACE(log, "Totally read from changelog {} {} entries", filepath, result.entries_read);
return result;
@ -255,6 +263,7 @@ Changelog::Changelog(
, force_sync(force_sync_)
, log(log_)
{
/// Load all files in changelog directory
namespace fs = std::filesystem;
if (!fs::exists(changelogs_dir))
fs::create_directories(changelogs_dir);
@ -264,45 +273,70 @@ Changelog::Changelog(
auto file_description = getChangelogFileDescription(p.path());
existing_changelogs[file_description.from_log_index] = file_description;
}
if (existing_changelogs.empty())
LOG_WARNING(log, "No logs exists in {}. It's Ok if it's the first run of clickhouse-keeper.", changelogs_dir);
}
void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uint64_t logs_to_keep)
{
uint64_t total_read = 0;
/// Amount of entries in last log index
uint64_t entries_in_last = 0;
uint64_t incomplete_log_index = 0;
/// Log idx of the first incomplete log (key in existing_changelogs)
int64_t first_incomplete_log_start_index = -1; /// if -1 then no incomplete log exists
ChangelogReadResult result{};
/// First log index which was read from all changelogs
uint64_t first_read_index = 0;
/// We must start to read from this log index
uint64_t start_to_read_from = last_commited_log_index;
/// If we need to have some reserved log read additional `logs_to_keep` logs
if (start_to_read_from > logs_to_keep)
start_to_read_from -= logs_to_keep;
else
start_to_read_from = 1;
/// At least we read something
bool started = false;
/// Got through changelog files in order of start_index
for (const auto & [changelog_start_index, changelog_description] : existing_changelogs)
{
entries_in_last = changelog_description.to_log_index - changelog_description.from_log_index + 1;
/// How many entries we have in the last changelog
entries_in_last = changelog_description.expectedEntriesCountInLog();
/// [from_log_index.>=.......start_to_read_from.....<=.to_log_index]
if (changelog_description.to_log_index >= start_to_read_from)
{
if (!started)
if (!started) /// still nothing was read
{
/// Our first log starts from the more fresh log_id than we required to read and this changelog is not empty log.
/// So we are missing something in our logs, but it's not dataloss, we will receive snapshot and required
/// entries from leader.
if (changelog_description.from_log_index > last_commited_log_index && (changelog_description.from_log_index - last_commited_log_index) > 1)
{
LOG_ERROR(log, "Some records was lost, last committed log index {}, smallest available log index on disk {}. Hopefully will receive missing records from leader.", last_commited_log_index, changelog_description.from_log_index);
incomplete_log_index = changelog_start_index;
first_incomplete_log_start_index = changelog_start_index;
break;
}
else if (changelog_description.from_log_index > start_to_read_from)
{
/// We don't have required amount of reserved logs, but nothing was lost.
LOG_WARNING(log, "Don't have required amount of reserved log records. Need to read from {}, smallest available log index on disk {}.", start_to_read_from, changelog_description.from_log_index);
}
}
started = true;
ChangelogReader reader(changelog_description.path);
result = reader.readChangelog(logs, start_to_read_from, index_to_start_pos, log);
started = true;
/// Otherwise we have already initialized it
if (first_read_index == 0)
first_read_index = result.first_read_index;
@ -311,7 +345,7 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
/// May happen after truncate, crash or simply unfinished log
if (result.entries_read < entries_in_last)
{
incomplete_log_index = changelog_start_index;
first_incomplete_log_start_index = changelog_start_index;
break;
}
}
@ -319,14 +353,16 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
if (first_read_index != 0)
start_index = first_read_index;
else
else /// We just may have no logs (only snapshot)
start_index = last_commited_log_index;
if (incomplete_log_index != 0)
/// Found some broken or non finished logs
/// We have to remove broken data and continue to write into incomplete log.
if (first_incomplete_log_start_index != -1) /// otherwise all logs completed so just start a new one
{
auto start_remove_from = existing_changelogs.begin();
if (started)
start_remove_from = existing_changelogs.upper_bound(incomplete_log_index);
start_remove_from = existing_changelogs.upper_bound(first_incomplete_log_start_index);
/// All subsequent logs shouldn't exist. But they may exist if we crashed after writeAt started. Remove them.
for (auto itr = start_remove_from; itr != existing_changelogs.end();)
@ -340,6 +376,9 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
if (!existing_changelogs.empty())
{
auto description = existing_changelogs.rbegin()->second;
if (description.expectedEntriesCountInLog() != rotate_interval)
LOG_TRACE(log, "Looks like rotate_logs_interval was changed, current {}, expected entries in last log {}", rotate_interval, description.expectedEntriesCountInLog());
LOG_TRACE(log, "Continue to write into {}", description.path);
current_writer = std::make_unique<ChangelogWriter>(description.path, WriteMode::Append, description.from_log_index);
current_writer->setEntriesWritten(result.entries_read);
@ -363,6 +402,7 @@ void Changelog::rotate(uint64_t new_start_log_index)
/// Flush previous log
flush();
/// Start new one
ChangelogFileDescription new_description;
new_description.prefix = DEFAULT_PREFIX;
new_description.from_log_index = new_start_log_index;
@ -378,7 +418,7 @@ void Changelog::rotate(uint64_t new_start_log_index)
ChangelogRecord Changelog::buildRecord(uint64_t index, const LogEntryPtr & log_entry)
{
ChangelogRecord record;
record.header.version = ChangelogVersion::V0;
record.header.version = ChangelogVersion::V1;
record.header.index = index;
record.header.term = log_entry->get_term();
record.header.value_type = log_entry->get_val_type();
@ -401,10 +441,13 @@ void Changelog::appendEntry(uint64_t index, const LogEntryPtr & log_entry)
if (logs.empty())
start_index = index;
if (current_writer->getEntriesWritten() == rotate_interval)
const auto & current_changelog_description = existing_changelogs[current_writer->getStartIndex()];
const bool log_is_complete = current_writer->getEntriesWritten() == current_changelog_description.expectedEntriesCountInLog();
if (log_is_complete)
rotate(index);
auto offset = current_writer->appendRecord(buildRecord(index, log_entry));
const auto offset = current_writer->appendRecord(buildRecord(index, log_entry));
if (!index_to_start_pos.try_emplace(index, offset).second)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Record with index {} already exists", index);
@ -416,26 +459,31 @@ void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry)
if (index_to_start_pos.count(index) == 0)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot write at index {} because changelog doesn't contain it", index);
bool go_to_previous_file = index < current_writer->getStartIndex();
/// This write_at require to overwrite everything in this file and also in previous file(s)
const bool go_to_previous_file = index < current_writer->getStartIndex();
if (go_to_previous_file)
{
auto index_changelog = existing_changelogs.lower_bound(index);
ChangelogFileDescription description;
if (index_changelog->first == index)
if (index_changelog->first == index) /// exactly this file starts from index
description = index_changelog->second;
else
description = std::prev(index_changelog)->second;
/// Initialize writer from this log file
current_writer = std::make_unique<ChangelogWriter>(description.path, WriteMode::Append, index_changelog->first);
current_writer->setEntriesWritten(description.to_log_index - description.from_log_index + 1);
}
auto entries_written = current_writer->getEntriesWritten();
/// Truncate current file
current_writer->truncateToLength(index_to_start_pos[index]);
if (go_to_previous_file)
{
/// Remove all subsequent files
/// Remove all subsequent files if overwritten something in previous one
auto to_remove_itr = existing_changelogs.upper_bound(index);
for (auto itr = to_remove_itr; itr != existing_changelogs.end();)
{
@ -444,19 +492,22 @@ void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry)
}
}
auto entries_written = current_writer->getEntriesWritten();
/// Remove redundant logs from memory
/// Everything >= index must be removed
for (uint64_t i = index; ; ++i)
{
auto log_itr = logs.find(i);
if (log_itr == logs.end())
break;
logs.erase(log_itr);
index_to_start_pos.erase(i);
entries_written--;
}
current_writer->setEntriesWritten(entries_written);
/// Now we can actually override entry at index
appendEntry(index, log_entry);
}
@ -467,7 +518,6 @@ void Changelog::compact(uint64_t up_to_log_index)
/// Remove all completely outdated changelog files
if (itr->second.to_log_index <= up_to_log_index)
{
LOG_INFO(log, "Removing changelog {} because of compaction", itr->second.path);
std::erase_if(index_to_start_pos, [right_index = itr->second.to_log_index] (const auto & item) { return item.first <= right_index; });
std::filesystem::remove(itr->second.path);
@ -482,9 +532,10 @@ void Changelog::compact(uint64_t up_to_log_index)
LogEntryPtr Changelog::getLastEntry() const
{
/// This entry treaded in special way by NuRaft
static LogEntryPtr fake_entry = nuraft::cs_new<nuraft::log_entry>(0, nuraft::buffer::alloc(sizeof(uint64_t)));
uint64_t next_index = getNextEntryIndex() - 1;
const uint64_t next_index = getNextEntryIndex() - 1;
auto entry = logs.find(next_index);
if (entry == logs.end())
return fake_entry;

View File

@ -53,13 +53,19 @@ struct ChangelogFileDescription
uint64_t to_log_index;
std::string path;
/// How many entries should be stored in this log
uint64_t expectedEntriesCountInLog() const
{
return to_log_index - from_log_index + 1;
}
};
class ChangelogWriter;
/// Simplest changelog with files rotation.
/// No compression, no metadata, just entries with headers one by one
/// Able to read broken files/entries and discard them.
/// No compression, no metadata, just entries with headers one by one.
/// Able to read broken files/entries and discard them. Not thread safe.
class Changelog
{
@ -128,10 +134,16 @@ private:
const bool force_sync;
Poco::Logger * log;
/// Currently existing changelogs
std::map<uint64_t, ChangelogFileDescription> existing_changelogs;
/// Current writer for changelog file
std::unique_ptr<ChangelogWriter> current_writer;
/// Mapping log_id -> binary offset in log file
IndexToOffset index_to_start_pos;
/// Mapping log_id -> log_entry
IndexToLogEntry logs;
/// Start log_id which exists in all "active" logs
uint64_t start_index = 0;
};

View File

@ -1,4 +1,4 @@
#include <Coordination/KeeperStorageDispatcher.h>
#include <Coordination/KeeperDispatcher.h>
#include <Common/setThreadName.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <future>
@ -9,19 +9,18 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int TIMEOUT_EXCEEDED;
}
KeeperStorageDispatcher::KeeperStorageDispatcher()
KeeperDispatcher::KeeperDispatcher()
: coordination_settings(std::make_shared<CoordinationSettings>())
, log(&Poco::Logger::get("KeeperDispatcher"))
{
}
void KeeperStorageDispatcher::requestThread()
void KeeperDispatcher::requestThread()
{
setThreadName("KeeperReqT");
@ -133,7 +132,7 @@ void KeeperStorageDispatcher::requestThread()
}
}
void KeeperStorageDispatcher::responseThread()
void KeeperDispatcher::responseThread()
{
setThreadName("KeeperRspT");
while (!shutdown_called)
@ -159,7 +158,7 @@ void KeeperStorageDispatcher::responseThread()
}
}
void KeeperStorageDispatcher::snapshotThread()
void KeeperDispatcher::snapshotThread()
{
setThreadName("KeeperSnpT");
while (!shutdown_called)
@ -181,9 +180,11 @@ void KeeperStorageDispatcher::snapshotThread()
}
}
void KeeperStorageDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response)
void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response)
{
std::lock_guard lock(session_to_response_callback_mutex);
/// Special new session response.
if (response->xid != Coordination::WATCH_XID && response->getOpNum() == Coordination::OpNum::SessionID)
{
const Coordination::ZooKeeperSessionIDResponse & session_id_resp = dynamic_cast<const Coordination::ZooKeeperSessionIDResponse &>(*response);
@ -196,25 +197,28 @@ void KeeperStorageDispatcher::setResponse(int64_t session_id, const Coordination
callback(response);
new_session_id_response_callback.erase(session_id_resp.internal_id);
}
else
else /// Normal response, just write to client
{
auto session_writer = session_to_response_callback.find(session_id);
if (session_writer == session_to_response_callback.end())
auto session_response_callback = session_to_response_callback.find(session_id);
/// Session was disconnected, just skip this response
if (session_response_callback == session_to_response_callback.end())
return;
session_writer->second(response);
session_response_callback->second(response);
/// Session closed, no more writes
if (response->xid != Coordination::WATCH_XID && response->getOpNum() == Coordination::OpNum::Close)
{
session_to_response_callback.erase(session_writer);
session_to_response_callback.erase(session_response_callback);
}
}
}
bool KeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id)
bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id)
{
{
/// If session was already disconnected than we will ignore requests
std::lock_guard lock(session_to_response_callback_mutex);
if (session_to_response_callback.count(session_id) == 0)
return false;
@ -237,7 +241,7 @@ bool KeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr
return true;
}
void KeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper)
void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper)
{
LOG_DEBUG(log, "Initializing storage dispatcher");
int myid = config.getInt("keeper_server.server_id");
@ -251,6 +255,7 @@ void KeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfiguration
server = std::make_unique<KeeperServer>(
myid, coordination_settings, config, responses_queue, snapshots_queue, standalone_keeper);
try
{
LOG_DEBUG(log, "Waiting server to initialize");
@ -266,13 +271,13 @@ void KeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfiguration
throw;
}
/// Start it after keeper server start
session_cleaner_thread = ThreadFromGlobalPool([this] { sessionCleanerTask(); });
LOG_DEBUG(log, "Dispatcher initialized");
}
void KeeperStorageDispatcher::shutdown()
void KeeperDispatcher::shutdown()
{
try
{
@ -306,6 +311,8 @@ void KeeperStorageDispatcher::shutdown()
server->shutdown();
KeeperStorage::RequestForSession request_for_session;
/// Set session expired for all pending requests
while (requests_queue->tryPop(request_for_session))
{
if (request_for_session.request)
@ -320,6 +327,7 @@ void KeeperStorageDispatcher::shutdown()
}
}
/// Clear all registered sessions
std::lock_guard lock(session_to_response_callback_mutex);
session_to_response_callback.clear();
}
@ -331,19 +339,19 @@ void KeeperStorageDispatcher::shutdown()
LOG_DEBUG(log, "Dispatcher shut down");
}
KeeperStorageDispatcher::~KeeperStorageDispatcher()
KeeperDispatcher::~KeeperDispatcher()
{
shutdown();
}
void KeeperStorageDispatcher::registerSession(int64_t session_id, ZooKeeperResponseCallback callback)
void KeeperDispatcher::registerSession(int64_t session_id, ZooKeeperResponseCallback callback)
{
std::lock_guard lock(session_to_response_callback_mutex);
if (!session_to_response_callback.try_emplace(session_id, callback).second)
throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session with id {} already registered in dispatcher", session_id);
}
void KeeperStorageDispatcher::sessionCleanerTask()
void KeeperDispatcher::sessionCleanerTask()
{
while (true)
{
@ -352,12 +360,16 @@ void KeeperStorageDispatcher::sessionCleanerTask()
try
{
/// Only leader node must check dead sessions
if (isLeader())
{
auto dead_sessions = server->getDeadSessions();
for (int64_t dead_session : dead_sessions)
{
LOG_INFO(log, "Found dead session {}, will try to close it", dead_session);
/// Close session == send close request to raft server
Coordination::ZooKeeperRequestPtr request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Close);
request->xid = Coordination::CLOSE_XID;
KeeperStorage::RequestForSession request_info;
@ -367,6 +379,8 @@ void KeeperStorageDispatcher::sessionCleanerTask()
std::lock_guard lock(push_request_mutex);
requests_queue->push(std::move(request_info));
}
/// Remove session from registered sessions
finishSession(dead_session);
LOG_INFO(log, "Dead session close request pushed");
}
@ -381,7 +395,7 @@ void KeeperStorageDispatcher::sessionCleanerTask()
}
}
void KeeperStorageDispatcher::finishSession(int64_t session_id)
void KeeperDispatcher::finishSession(int64_t session_id)
{
std::lock_guard lock(session_to_response_callback_mutex);
auto session_it = session_to_response_callback.find(session_id);
@ -389,7 +403,7 @@ void KeeperStorageDispatcher::finishSession(int64_t session_id)
session_to_response_callback.erase(session_it);
}
void KeeperStorageDispatcher::addErrorResponses(const KeeperStorage::RequestsForSessions & requests_for_sessions, Coordination::Error error)
void KeeperDispatcher::addErrorResponses(const KeeperStorage::RequestsForSessions & requests_for_sessions, Coordination::Error error)
{
for (const auto & [session_id, request] : requests_for_sessions)
{
@ -402,7 +416,7 @@ void KeeperStorageDispatcher::addErrorResponses(const KeeperStorage::RequestsFor
}
}
void KeeperStorageDispatcher::forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions)
void KeeperDispatcher::forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions)
{
if (!result->has_result())
result->get();
@ -417,10 +431,14 @@ void KeeperStorageDispatcher::forceWaitAndProcessResult(RaftAppendResult & resul
requests_for_sessions.clear();
}
int64_t KeeperStorageDispatcher::getSessionID(int64_t session_timeout_ms)
int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms)
{
/// New session id allocation is a special request, because we cannot process it in normal
/// way: get request -> put to raft -> set response for registered callback.
KeeperStorage::RequestForSession request_info;
std::shared_ptr<Coordination::ZooKeeperSessionIDRequest> request = std::make_shared<Coordination::ZooKeeperSessionIDRequest>();
/// Internal session id. It's a temporary number which is unique for each client on this server
/// but can be same on different servers.
request->internal_id = internal_session_id_counter.fetch_add(1);
request->session_timeout_ms = session_timeout_ms;
request->server_id = server->getServerID();
@ -430,6 +448,7 @@ int64_t KeeperStorageDispatcher::getSessionID(int64_t session_timeout_ms)
auto promise = std::make_shared<std::promise<int64_t>>();
auto future = promise->get_future();
{
std::lock_guard lock(session_to_response_callback_mutex);
new_session_id_response_callback[request->internal_id] = [promise, internal_id = request->internal_id] (const Coordination::ZooKeeperResponsePtr & response)
@ -452,6 +471,7 @@ int64_t KeeperStorageDispatcher::getSessionID(int64_t session_timeout_ms)
};
}
/// Push new session request to queue
{
std::lock_guard lock(push_request_mutex);
if (!requests_queue->tryPush(std::move(request_info), session_timeout_ms))
@ -461,6 +481,8 @@ int64_t KeeperStorageDispatcher::getSessionID(int64_t session_timeout_ms)
if (future.wait_for(std::chrono::milliseconds(session_timeout_ms)) != std::future_status::ready)
throw Exception("Cannot receive session id within session timeout", ErrorCodes::TIMEOUT_EXCEEDED);
/// Forcefully wait for request execution because we cannot process any other
/// requests for this client until it get new session id.
return future.get();
}

View File

@ -22,7 +22,9 @@ namespace DB
using ZooKeeperResponseCallback = std::function<void(const Coordination::ZooKeeperResponsePtr & response)>;
class KeeperStorageDispatcher
/// Highlevel wrapper for ClickHouse Keeper.
/// Process user requests via consensus and return responses.
class KeeperDispatcher
{
private:
@ -45,6 +47,7 @@ private:
/// (get, set, list, etc.). Dispatcher determines callback for each response
/// using session id from this map.
SessionToResponseCallback session_to_response_callback;
/// But when client connects to the server for the first time it doesn't
/// have session_id. It request it from server. We give temporary
/// internal id for such requests just to much client with its response.
@ -60,7 +63,7 @@ private:
/// Dumping new snapshots to disk
ThreadFromGlobalPool snapshot_thread;
/// RAFT wrapper. Most important class.
/// RAFT wrapper.
std::unique_ptr<KeeperServer> server;
Poco::Logger * log;
@ -69,10 +72,15 @@ private:
std::atomic<int64_t> internal_session_id_counter{0};
private:
/// Thread put requests to raft
void requestThread();
/// Thread put responses for subscribed sessions
void responseThread();
/// Thread clean disconnected sessions from memory
void sessionCleanerTask();
/// Thread create snapshots in the background
void snapshotThread();
void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response);
/// Add error responses for requests to responses queue.
@ -84,16 +92,23 @@ private:
void forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions);
public:
KeeperStorageDispatcher();
/// Just allocate some objects, real initialization is done by `intialize method`
KeeperDispatcher();
/// Call shutdown
~KeeperDispatcher();
/// Initialization from config.
/// standalone_keeper -- we are standalone keeper application (not inside clickhouse server)
void initialize(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper);
/// Shutdown internal keeper parts (server, state machine, log storage, etc)
void shutdown();
~KeeperStorageDispatcher();
/// Put request to ClickHouse Keeper
bool putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id);
/// Are we leader
bool isLeader() const
{
return server->isLeader();
@ -104,9 +119,12 @@ public:
return server->isLeaderAlive();
}
/// Get new session ID
int64_t getSessionID(int64_t session_timeout_ms);
/// Register session and subscribe for responses with callback
void registerSession(int64_t session_id, ZooKeeperResponseCallback callback);
/// Call if we don't need any responses for this session no more (session was expired)
void finishSession(int64_t session_id);
};

View File

@ -9,39 +9,53 @@
namespace DB
{
/// Wrapper around Changelog class. Implements RAFT log storage.
class KeeperLogStore : public nuraft::log_store
{
public:
KeeperLogStore(const std::string & changelogs_path, uint64_t rotate_interval_, bool force_sync_);
/// Read log storage from filesystem starting from last_commited_log_index
void init(uint64_t last_commited_log_index, uint64_t logs_to_keep);
uint64_t start_index() const override;
uint64_t next_slot() const override;
/// return last entry from log
nuraft::ptr<nuraft::log_entry> last_entry() const override;
/// Append new entry to log
uint64_t append(nuraft::ptr<nuraft::log_entry> & entry) override;
/// Remove all entries starting from index and write entry into index position
void write_at(uint64_t index, nuraft::ptr<nuraft::log_entry> & entry) override;
/// Return entries between [start, end)
nuraft::ptr<std::vector<nuraft::ptr<nuraft::log_entry>>> log_entries(uint64_t start, uint64_t end) override;
/// Return entry at index
nuraft::ptr<nuraft::log_entry> entry_at(uint64_t index) override;
/// Term if the index
uint64_t term_at(uint64_t index) override;
/// Serialize entries in interval [index, index + cnt)
nuraft::ptr<nuraft::buffer> pack(uint64_t index, int32_t cnt) override;
/// Apply serialized entries starting from index
void apply_pack(uint64_t index, nuraft::buffer & pack) override;
/// Entries from last_log_index can be removed from memory and from disk
bool compact(uint64_t last_log_index) override;
/// Call fsync to the stored data
bool flush() override;
/// Current log storage size
uint64_t size() const;
/// Flush batch of appended entries
void end_of_append_batch(uint64_t start_index, uint64_t count) override;
private:

View File

@ -38,6 +38,8 @@ private:
Poco::Logger * log;
/// Callback func which is called by NuRaft on all internal events.
/// Used to determine the moment when raft is ready to server new requests
nuraft::cb_func::ReturnCode callbackFunc(nuraft::cb_func::Type type, nuraft::cb_func::Param * param);
/// Almost copy-paste from nuraft::launcher, but with separated server init and start
@ -57,18 +59,25 @@ public:
SnapshotsQueue & snapshots_queue_,
bool standalone_keeper);
/// Load state machine from the latest snapshot and load log storage. Start NuRaft with required settings.
void startup();
/// Put local read request and execute in state machine directly and response into
/// responses queue
void putLocalReadRequest(const KeeperStorage::RequestForSession & request);
/// Put batch of requests into Raft and get result of put. Responses will be set separately into
/// responses_queue.
RaftAppendResult putRequestBatch(const KeeperStorage::RequestsForSessions & requests);
/// Return set of the non-active sessions
std::unordered_set<int64_t> getDeadSessions();
bool isLeader() const;
bool isLeaderAlive() const;
/// Wait server initialization (see callbackFunc)
void waitInit();
void shutdown();

View File

@ -14,29 +14,32 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
KeeperStorage::RequestForSession parseRequest(nuraft::buffer & data)
namespace
{
ReadBufferFromNuraftBuffer buffer(data);
KeeperStorage::RequestForSession request_for_session;
readIntBinary(request_for_session.session_id, buffer);
KeeperStorage::RequestForSession parseRequest(nuraft::buffer & data)
{
ReadBufferFromNuraftBuffer buffer(data);
KeeperStorage::RequestForSession request_for_session;
readIntBinary(request_for_session.session_id, buffer);
int32_t length;
Coordination::read(length, buffer);
int32_t length;
Coordination::read(length, buffer);
int32_t xid;
Coordination::read(xid, buffer);
int32_t xid;
Coordination::read(xid, buffer);
Coordination::OpNum opnum;
Coordination::OpNum opnum;
Coordination::read(opnum, buffer);
Coordination::read(opnum, buffer);
request_for_session.request = Coordination::ZooKeeperRequestFactory::instance().get(opnum);
request_for_session.request->xid = xid;
request_for_session.request->readImpl(buffer);
return request_for_session;
request_for_session.request = Coordination::ZooKeeperRequestFactory::instance().get(opnum);
request_for_session.request->xid = xid;
request_for_session.request->readImpl(buffer);
return request_for_session;
}
}
KeeperStateMachine::KeeperStateMachine(
KeeperStateMachine::KeeperStateMachine(
ResponsesQueue & responses_queue_,
SnapshotsQueue & snapshots_queue_,
const std::string & snapshots_path_,
@ -58,6 +61,7 @@ void KeeperStateMachine::init()
LOG_DEBUG(log, "Totally have {} snapshots", snapshot_manager.totalSnapshots());
bool loaded = false;
bool has_snapshots = snapshot_manager.totalSnapshots() != 0;
/// Deserialize latest snapshot from disk
while (snapshot_manager.totalSnapshots() != 0)
{
uint64_t latest_log_index = snapshot_manager.getLatestSnapshotIndex();
@ -97,6 +101,7 @@ void KeeperStateMachine::init()
nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, nuraft::buffer & data)
{
auto request_for_session = parseRequest(data);
/// Special processing of session_id request
if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID)
{
const Coordination::ZooKeeperSessionIDRequest & session_id_request = dynamic_cast<const Coordination::ZooKeeperSessionIDRequest &>(*request_for_session.request);
@ -136,7 +141,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
{
LOG_DEBUG(log, "Applying snapshot {}", s.get_last_log_idx());
nuraft::ptr<nuraft::buffer> latest_snapshot_ptr;
{
{ /// save snapshot into memory
std::lock_guard lock(snapshots_lock);
if (s.get_last_log_idx() != latest_snapshot_meta->get_last_log_idx())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Required to apply snapshot with last log index {}, but our last log index is {}",
@ -144,10 +149,11 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
latest_snapshot_ptr = latest_snapshot_buf;
}
{
{ /// deserialize and apply snapshot to storage
std::lock_guard lock(storage_lock);
std::tie(latest_snapshot_meta, storage) = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_ptr);
}
last_committed_idx = s.get_last_log_idx();
return true;
}
@ -168,18 +174,19 @@ void KeeperStateMachine::create_snapshot(
nuraft::ptr<nuraft::buffer> snp_buf = s.serialize();
auto snapshot_meta_copy = nuraft::snapshot::deserialize(*snp_buf);
CreateSnapshotTask snapshot_task;
{
{ /// lock storage for a short period time to turn on "snapshot mode". After that we can read consistent storage state without locking.
std::lock_guard lock(storage_lock);
snapshot_task.snapshot = std::make_shared<KeeperStorageSnapshot>(storage.get(), snapshot_meta_copy);
}
/// create snapshot task for background execution (in snapshot thread)
snapshot_task.create_snapshot = [this, when_done] (KeeperStorageSnapshotPtr && snapshot)
{
nuraft::ptr<std::exception> exception(nullptr);
bool ret = true;
try
{
{
{ /// Read storage data without locks and create snapshot
std::lock_guard lock(snapshots_lock);
auto snapshot_buf = snapshot_manager.serializeSnapshotToBuffer(*snapshot);
auto result_path = snapshot_manager.serializeSnapshotBufferToDisk(*snapshot_buf, snapshot->snapshot_meta->get_last_log_idx());
@ -192,6 +199,7 @@ void KeeperStateMachine::create_snapshot(
{
/// Must do it with lock (clearing elements from list)
std::lock_guard lock(storage_lock);
/// Turn off "snapshot mode" and clear outdate part of storage state
storage->clearGarbageAfterSnapshot();
/// Destroy snapshot with lock
snapshot.reset();
@ -209,7 +217,9 @@ void KeeperStateMachine::create_snapshot(
when_done(ret, exception);
};
LOG_DEBUG(log, "In memory snapshot {} created, queueing task to flash to disk", s.get_last_log_idx());
/// Flush snapshot to disk in a separate thread.
snapshots_queue.push(std::move(snapshot_task));
}
@ -224,7 +234,7 @@ void KeeperStateMachine::save_logical_snp_obj(
nuraft::ptr<nuraft::buffer> cloned_buffer;
nuraft::ptr<nuraft::snapshot> cloned_meta;
if (obj_id == 0)
if (obj_id == 0) /// Fake snapshot required by NuRaft at startup
{
std::lock_guard lock(storage_lock);
KeeperStorageSnapshot snapshot(storage.get(), s.get_last_log_idx());
@ -232,15 +242,18 @@ void KeeperStateMachine::save_logical_snp_obj(
}
else
{
/// copy snapshot into memory
cloned_buffer = nuraft::buffer::clone(data);
}
/// copy snapshot meta into memory
nuraft::ptr<nuraft::buffer> snp_buf = s.serialize();
cloned_meta = nuraft::snapshot::deserialize(*snp_buf);
try
{
std::lock_guard lock(snapshots_lock);
/// Serialize snapshot to disk and switch in memory pointers.
auto result_path = snapshot_manager.serializeSnapshotBufferToDisk(*cloned_buffer, s.get_last_log_idx());
latest_snapshot_buf = cloned_buffer;
latest_snapshot_meta = cloned_meta;
@ -262,7 +275,7 @@ int KeeperStateMachine::read_logical_snp_obj(
{
LOG_DEBUG(log, "Reading snapshot {} obj_id {}", s.get_last_log_idx(), obj_id);
if (obj_id == 0)
if (obj_id == 0) /// Fake snapshot required by NuRaft at startup
{
data_out = nuraft::buffer::alloc(sizeof(int32_t));
nuraft::buffer_serializer bs(data_out);
@ -272,6 +285,8 @@ int KeeperStateMachine::read_logical_snp_obj(
else
{
std::lock_guard lock(snapshots_lock);
/// Our snapshot is not equal to required. Maybe we still creating it in the background.
/// Let's wait and NuRaft will retry this call.
if (s.get_last_log_idx() != latest_snapshot_meta->get_last_log_idx())
{
LOG_WARNING(log, "Required to apply snapshot with last log index {}, but our last log index is {}. Will ignore this one and retry",
@ -281,11 +296,13 @@ int KeeperStateMachine::read_logical_snp_obj(
data_out = nuraft::buffer::clone(*latest_snapshot_buf);
is_last_obj = true;
}
return 1;
}
void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSession & request_for_session)
{
/// Pure local request, just process it with storage
KeeperStorage::ResponsesForSessions responses;
{
std::lock_guard lock(storage_lock);

View File

@ -13,6 +13,8 @@ namespace DB
using ResponsesQueue = ThreadSafeQueue<KeeperStorage::ResponseForSession>;
using SnapshotsQueue = ConcurrentBoundedQueue<CreateSnapshotTask>;
/// ClickHouse Keeper state machine. Wrapper for KeeperStorage.
/// Responsible for entries commit, snapshots creation and so on.
class KeeperStateMachine : public nuraft::state_machine
{
public:
@ -21,24 +23,30 @@ public:
const std::string & snapshots_path_, const CoordinationSettingsPtr & coordination_settings_,
const std::string & superdigest_ = "");
/// Read state from the latest snapshot
void init();
/// Currently not supported
nuraft::ptr<nuraft::buffer> pre_commit(const uint64_t /*log_idx*/, nuraft::buffer & /*data*/) override { return nullptr; }
nuraft::ptr<nuraft::buffer> commit(const uint64_t log_idx, nuraft::buffer & data) override;
/// Currently not supported
void rollback(const uint64_t /*log_idx*/, nuraft::buffer & /*data*/) override {}
uint64_t last_commit_index() override { return last_committed_idx; }
/// Apply preliminarily saved (save_logical_snp_obj) snapshot to our state.
bool apply_snapshot(nuraft::snapshot & s) override;
nuraft::ptr<nuraft::snapshot> last_snapshot() override;
/// Create new snapshot from current state.
void create_snapshot(
nuraft::snapshot & s,
nuraft::async_result<bool>::handler_type & when_done) override;
/// Save snapshot which was send by leader to us. After that we will apply it in apply_snapshot.
void save_logical_snp_obj(
nuraft::snapshot & s,
uint64_t & obj_id,
@ -46,6 +54,8 @@ public:
bool is_first_obj,
bool is_last_obj) override;
/// Better name is `serialize snapshot` -- save existing snapshot (created by create_snapshot) into
/// in-memory buffer data_out.
int read_logical_snp_obj(
nuraft::snapshot & s,
void* & user_snp_ctx,
@ -58,6 +68,7 @@ public:
return *storage;
}
/// Process local read request
void processReadRequest(const KeeperStorage::RequestForSession & request_for_session);
std::unordered_set<int64_t> getDeadSessions();
@ -66,18 +77,25 @@ public:
private:
/// In our state machine we always have a single snapshot which is stored
/// in memory in compressed (serialized) format.
SnapshotMetadataPtr latest_snapshot_meta = nullptr;
nuraft::ptr<nuraft::buffer> latest_snapshot_buf = nullptr;
CoordinationSettingsPtr coordination_settings;
/// Main state machine logic
KeeperStoragePtr storage;
/// Save/Load and Serialize/Deserialize logic for snapshots.
KeeperSnapshotManager snapshot_manager;
/// Put processed responses into this queue
ResponsesQueue & responses_queue;
/// Snapshots to create by snapshot thread
SnapshotsQueue & snapshots_queue;
/// Mutex for snapshots
std::mutex snapshots_lock;
@ -88,6 +106,7 @@ private:
std::atomic<uint64_t> last_committed_idx;
Poco::Logger * log;
/// Special part of ACL system -- superdigest specified in server config.
const std::string superdigest;
};

View File

@ -151,19 +151,39 @@ static KeeperStorage::ResponsesForSessions processWatchesImpl(const String & pat
}
auto parent_path = parentPath(path);
it = list_watches.find(parent_path);
if (it != list_watches.end())
{
std::shared_ptr<Coordination::ZooKeeperWatchResponse> watch_list_response = std::make_shared<Coordination::ZooKeeperWatchResponse>();
watch_list_response->path = parent_path;
watch_list_response->xid = Coordination::WATCH_XID;
watch_list_response->zxid = -1;
watch_list_response->type = Coordination::Event::CHILD;
watch_list_response->state = Coordination::State::CONNECTED;
for (auto watcher_session : it->second)
result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_list_response});
list_watches.erase(it);
Strings paths_to_check_for_list_watches;
if (event_type == Coordination::Event::CREATED)
{
paths_to_check_for_list_watches.push_back(parent_path); /// Trigger list watches for parent
}
else if (event_type == Coordination::Event::DELETED)
{
paths_to_check_for_list_watches.push_back(path); /// Trigger both list watches for this path
paths_to_check_for_list_watches.push_back(parent_path); /// And for parent path
}
/// CHANGED event never trigger list wathes
for (const auto & path_to_check : paths_to_check_for_list_watches)
{
it = list_watches.find(path_to_check);
if (it != list_watches.end())
{
std::shared_ptr<Coordination::ZooKeeperWatchResponse> watch_list_response = std::make_shared<Coordination::ZooKeeperWatchResponse>();
watch_list_response->path = path_to_check;
watch_list_response->xid = Coordination::WATCH_XID;
watch_list_response->zxid = -1;
if (path_to_check == parent_path)
watch_list_response->type = Coordination::Event::CHILD;
else
watch_list_response->type = Coordination::Event::DELETED;
watch_list_response->state = Coordination::State::CONNECTED;
for (auto watcher_session : it->second)
result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_list_response});
list_watches.erase(it);
}
}
return result;
}
@ -177,32 +197,32 @@ KeeperStorage::KeeperStorage(int64_t tick_time_ms, const String & superdigest_)
using Undo = std::function<void()>;
struct KeeperStorageRequest
struct KeeperStorageRequestProcessor
{
Coordination::ZooKeeperRequestPtr zk_request;
explicit KeeperStorageRequest(const Coordination::ZooKeeperRequestPtr & zk_request_)
explicit KeeperStorageRequestProcessor(const Coordination::ZooKeeperRequestPtr & zk_request_)
: zk_request(zk_request_)
{}
virtual std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage & storage, int64_t zxid, int64_t session_id) const = 0;
virtual KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & /*watches*/, KeeperStorage::Watches & /*list_watches*/) const { return {}; }
virtual bool checkAuth(KeeperStorage & /*storage*/, int64_t /*session_id*/) const { return true; }
virtual ~KeeperStorageRequest() = default;
virtual ~KeeperStorageRequestProcessor() = default;
};
struct KeeperStorageHeartbeatRequest final : public KeeperStorageRequest
struct KeeperStorageHeartbeatRequestProcessor final : public KeeperStorageRequestProcessor
{
using KeeperStorageRequest::KeeperStorageRequest;
using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage & /* storage */, int64_t /* zxid */, int64_t /* session_id */) const override
{
return {zk_request->makeResponse(), {}};
}
};
struct KeeperStorageSyncRequest final : public KeeperStorageRequest
struct KeeperStorageSyncRequestProcessor final : public KeeperStorageRequestProcessor
{
using KeeperStorageRequest::KeeperStorageRequest;
using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage & /* storage */, int64_t /* zxid */, int64_t /* session_id */) const override
{
auto response = zk_request->makeResponse();
@ -212,9 +232,9 @@ struct KeeperStorageSyncRequest final : public KeeperStorageRequest
}
};
struct KeeperStorageCreateRequest final : public KeeperStorageRequest
struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestProcessor
{
using KeeperStorageRequest::KeeperStorageRequest;
using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor;
KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override
{
@ -363,7 +383,7 @@ struct KeeperStorageCreateRequest final : public KeeperStorageRequest
}
};
struct KeeperStorageGetRequest final : public KeeperStorageRequest
struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProcessor
{
bool checkAuth(KeeperStorage & storage, int64_t session_id) const override
@ -381,7 +401,7 @@ struct KeeperStorageGetRequest final : public KeeperStorageRequest
return checkACL(Coordination::ACL::Read, node_acls, session_auths);
}
using KeeperStorageRequest::KeeperStorageRequest;
using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage & storage, int64_t /* zxid */, int64_t /* session_id */) const override
{
auto & container = storage.container;
@ -423,7 +443,7 @@ namespace
}
}
struct KeeperStorageRemoveRequest final : public KeeperStorageRequest
struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestProcessor
{
bool checkAuth(KeeperStorage & storage, int64_t session_id) const override
{
@ -440,7 +460,7 @@ struct KeeperStorageRemoveRequest final : public KeeperStorageRequest
return checkACL(Coordination::ACL::Delete, node_acls, session_auths);
}
using KeeperStorageRequest::KeeperStorageRequest;
using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/) const override
{
auto & container = storage.container;
@ -520,9 +540,9 @@ struct KeeperStorageRemoveRequest final : public KeeperStorageRequest
}
};
struct KeeperStorageExistsRequest final : public KeeperStorageRequest
struct KeeperStorageExistsRequestProcessor final : public KeeperStorageRequestProcessor
{
using KeeperStorageRequest::KeeperStorageRequest;
using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /* session_id */) const override
{
auto & container = storage.container;
@ -546,7 +566,7 @@ struct KeeperStorageExistsRequest final : public KeeperStorageRequest
}
};
struct KeeperStorageSetRequest final : public KeeperStorageRequest
struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProcessor
{
bool checkAuth(KeeperStorage & storage, int64_t session_id) const override
{
@ -563,7 +583,7 @@ struct KeeperStorageSetRequest final : public KeeperStorageRequest
return checkACL(Coordination::ACL::Write, node_acls, session_auths);
}
using KeeperStorageRequest::KeeperStorageRequest;
using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage & storage, int64_t zxid, int64_t /* session_id */) const override
{
auto & container = storage.container;
@ -624,7 +644,7 @@ struct KeeperStorageSetRequest final : public KeeperStorageRequest
}
};
struct KeeperStorageListRequest final : public KeeperStorageRequest
struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProcessor
{
bool checkAuth(KeeperStorage & storage, int64_t session_id) const override
{
@ -641,7 +661,7 @@ struct KeeperStorageListRequest final : public KeeperStorageRequest
return checkACL(Coordination::ACL::Read, node_acls, session_auths);
}
using KeeperStorageRequest::KeeperStorageRequest;
using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/) const override
{
auto & container = storage.container;
@ -669,7 +689,7 @@ struct KeeperStorageListRequest final : public KeeperStorageRequest
}
};
struct KeeperStorageCheckRequest final : public KeeperStorageRequest
struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestProcessor
{
bool checkAuth(KeeperStorage & storage, int64_t session_id) const override
{
@ -686,7 +706,7 @@ struct KeeperStorageCheckRequest final : public KeeperStorageRequest
return checkACL(Coordination::ACL::Read, node_acls, session_auths);
}
using KeeperStorageRequest::KeeperStorageRequest;
using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/) const override
{
auto & container = storage.container;
@ -713,7 +733,7 @@ struct KeeperStorageCheckRequest final : public KeeperStorageRequest
};
struct KeeperStorageSetACLRequest final : public KeeperStorageRequest
struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestProcessor
{
bool checkAuth(KeeperStorage & storage, int64_t session_id) const override
{
@ -730,7 +750,7 @@ struct KeeperStorageSetACLRequest final : public KeeperStorageRequest
return checkACL(Coordination::ACL::Admin, node_acls, session_auths);
}
using KeeperStorageRequest::KeeperStorageRequest;
using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage & storage, int64_t /*zxid*/, int64_t session_id) const override
{
@ -777,7 +797,7 @@ struct KeeperStorageSetACLRequest final : public KeeperStorageRequest
}
};
struct KeeperStorageGetACLRequest final : public KeeperStorageRequest
struct KeeperStorageGetACLRequestProcessor final : public KeeperStorageRequestProcessor
{
bool checkAuth(KeeperStorage & storage, int64_t session_id) const override
{
@ -794,7 +814,7 @@ struct KeeperStorageGetACLRequest final : public KeeperStorageRequest
/// LOL, GetACL require more permissions, then SetACL...
return checkACL(Coordination::ACL::Admin | Coordination::ACL::Read, node_acls, session_auths);
}
using KeeperStorageRequest::KeeperStorageRequest;
using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/) const override
{
@ -817,7 +837,7 @@ struct KeeperStorageGetACLRequest final : public KeeperStorageRequest
}
};
struct KeeperStorageMultiRequest final : public KeeperStorageRequest
struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestProcessor
{
bool checkAuth(KeeperStorage & storage, int64_t session_id) const override
{
@ -827,9 +847,9 @@ struct KeeperStorageMultiRequest final : public KeeperStorageRequest
return true;
}
std::vector<KeeperStorageRequestPtr> concrete_requests;
explicit KeeperStorageMultiRequest(const Coordination::ZooKeeperRequestPtr & zk_request_)
: KeeperStorageRequest(zk_request_)
std::vector<KeeperStorageRequestProcessorPtr> concrete_requests;
explicit KeeperStorageMultiRequestProcessor(const Coordination::ZooKeeperRequestPtr & zk_request_)
: KeeperStorageRequestProcessor(zk_request_)
{
Coordination::ZooKeeperMultiRequest & request = dynamic_cast<Coordination::ZooKeeperMultiRequest &>(*zk_request);
concrete_requests.reserve(request.requests.size());
@ -839,19 +859,19 @@ struct KeeperStorageMultiRequest final : public KeeperStorageRequest
auto sub_zk_request = std::dynamic_pointer_cast<Coordination::ZooKeeperRequest>(sub_request);
if (sub_zk_request->getOpNum() == Coordination::OpNum::Create)
{
concrete_requests.push_back(std::make_shared<KeeperStorageCreateRequest>(sub_zk_request));
concrete_requests.push_back(std::make_shared<KeeperStorageCreateRequestProcessor>(sub_zk_request));
}
else if (sub_zk_request->getOpNum() == Coordination::OpNum::Remove)
{
concrete_requests.push_back(std::make_shared<KeeperStorageRemoveRequest>(sub_zk_request));
concrete_requests.push_back(std::make_shared<KeeperStorageRemoveRequestProcessor>(sub_zk_request));
}
else if (sub_zk_request->getOpNum() == Coordination::OpNum::Set)
{
concrete_requests.push_back(std::make_shared<KeeperStorageSetRequest>(sub_zk_request));
concrete_requests.push_back(std::make_shared<KeeperStorageSetRequestProcessor>(sub_zk_request));
}
else if (sub_zk_request->getOpNum() == Coordination::OpNum::Check)
{
concrete_requests.push_back(std::make_shared<KeeperStorageCheckRequest>(sub_zk_request));
concrete_requests.push_back(std::make_shared<KeeperStorageCheckRequestProcessor>(sub_zk_request));
}
else
throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal command as part of multi ZooKeeper request {}", sub_zk_request->getOpNum());
@ -923,18 +943,18 @@ struct KeeperStorageMultiRequest final : public KeeperStorageRequest
}
};
struct KeeperStorageCloseRequest final : public KeeperStorageRequest
struct KeeperStorageCloseRequestProcessor final : public KeeperStorageRequestProcessor
{
using KeeperStorageRequest::KeeperStorageRequest;
using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage &, int64_t, int64_t) const override
{
throw DB::Exception("Called process on close request", ErrorCodes::LOGICAL_ERROR);
}
};
struct KeeperStorageAuthRequest final : public KeeperStorageRequest
struct KeeperStorageAuthRequestProcessor final : public KeeperStorageRequestProcessor
{
using KeeperStorageRequest::KeeperStorageRequest;
using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage & storage, int64_t /*zxid*/, int64_t session_id) const override
{
Coordination::ZooKeeperAuthRequest & auth_request = dynamic_cast<Coordination::ZooKeeperAuthRequest &>(*zk_request);
@ -988,20 +1008,20 @@ void KeeperStorage::finalize()
}
class KeeperWrapperFactory final : private boost::noncopyable
class KeeperStorageRequestProcessorsFactory final : private boost::noncopyable
{
public:
using Creator = std::function<KeeperStorageRequestPtr(const Coordination::ZooKeeperRequestPtr &)>;
using Creator = std::function<KeeperStorageRequestProcessorPtr(const Coordination::ZooKeeperRequestPtr &)>;
using OpNumToRequest = std::unordered_map<Coordination::OpNum, Creator>;
static KeeperWrapperFactory & instance()
static KeeperStorageRequestProcessorsFactory & instance()
{
static KeeperWrapperFactory factory;
static KeeperStorageRequestProcessorsFactory factory;
return factory;
}
KeeperStorageRequestPtr get(const Coordination::ZooKeeperRequestPtr & zk_request) const
KeeperStorageRequestProcessorPtr get(const Coordination::ZooKeeperRequestPtr & zk_request) const
{
auto it = op_num_to_request.find(zk_request->getOpNum());
if (it == op_num_to_request.end())
@ -1018,33 +1038,33 @@ public:
private:
OpNumToRequest op_num_to_request;
KeeperWrapperFactory();
KeeperStorageRequestProcessorsFactory();
};
template<Coordination::OpNum num, typename RequestT>
void registerKeeperRequestWrapper(KeeperWrapperFactory & factory)
void registerKeeperRequestProcessor(KeeperStorageRequestProcessorsFactory & factory)
{
factory.registerRequest(num, [] (const Coordination::ZooKeeperRequestPtr & zk_request) { return std::make_shared<RequestT>(zk_request); });
}
KeeperWrapperFactory::KeeperWrapperFactory()
KeeperStorageRequestProcessorsFactory::KeeperStorageRequestProcessorsFactory()
{
registerKeeperRequestWrapper<Coordination::OpNum::Heartbeat, KeeperStorageHeartbeatRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::Sync, KeeperStorageSyncRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::Auth, KeeperStorageAuthRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::Close, KeeperStorageCloseRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::Create, KeeperStorageCreateRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::Remove, KeeperStorageRemoveRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::Exists, KeeperStorageExistsRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::Get, KeeperStorageGetRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::Set, KeeperStorageSetRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::List, KeeperStorageListRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::SimpleList, KeeperStorageListRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::Check, KeeperStorageCheckRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::Multi, KeeperStorageMultiRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::SetACL, KeeperStorageSetACLRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::GetACL, KeeperStorageGetACLRequest>(*this);
registerKeeperRequestProcessor<Coordination::OpNum::Heartbeat, KeeperStorageHeartbeatRequestProcessor>(*this);
registerKeeperRequestProcessor<Coordination::OpNum::Sync, KeeperStorageSyncRequestProcessor>(*this);
registerKeeperRequestProcessor<Coordination::OpNum::Auth, KeeperStorageAuthRequestProcessor>(*this);
registerKeeperRequestProcessor<Coordination::OpNum::Close, KeeperStorageCloseRequestProcessor>(*this);
registerKeeperRequestProcessor<Coordination::OpNum::Create, KeeperStorageCreateRequestProcessor>(*this);
registerKeeperRequestProcessor<Coordination::OpNum::Remove, KeeperStorageRemoveRequestProcessor>(*this);
registerKeeperRequestProcessor<Coordination::OpNum::Exists, KeeperStorageExistsRequestProcessor>(*this);
registerKeeperRequestProcessor<Coordination::OpNum::Get, KeeperStorageGetRequestProcessor>(*this);
registerKeeperRequestProcessor<Coordination::OpNum::Set, KeeperStorageSetRequestProcessor>(*this);
registerKeeperRequestProcessor<Coordination::OpNum::List, KeeperStorageListRequestProcessor>(*this);
registerKeeperRequestProcessor<Coordination::OpNum::SimpleList, KeeperStorageListRequestProcessor>(*this);
registerKeeperRequestProcessor<Coordination::OpNum::Check, KeeperStorageCheckRequestProcessor>(*this);
registerKeeperRequestProcessor<Coordination::OpNum::Multi, KeeperStorageMultiRequestProcessor>(*this);
registerKeeperRequestProcessor<Coordination::OpNum::SetACL, KeeperStorageSetACLRequestProcessor>(*this);
registerKeeperRequestProcessor<Coordination::OpNum::GetACL, KeeperStorageGetACLRequestProcessor>(*this);
}
@ -1059,7 +1079,8 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina
}
session_expiry_queue.update(session_id, session_and_timeout[session_id]);
if (zk_request->getOpNum() == Coordination::OpNum::Close)
if (zk_request->getOpNum() == Coordination::OpNum::Close) /// Close request is special
{
auto it = ephemerals.find(session_id);
if (it != ephemerals.end())
@ -1092,21 +1113,21 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina
session_and_timeout.erase(session_id);
results.push_back(ResponseForSession{session_id, response});
}
else if (zk_request->getOpNum() == Coordination::OpNum::Heartbeat)
else if (zk_request->getOpNum() == Coordination::OpNum::Heartbeat) /// Heartbeat request is also special
{
KeeperStorageRequestPtr storage_request = KeeperWrapperFactory::instance().get(zk_request);
KeeperStorageRequestProcessorPtr storage_request = KeeperStorageRequestProcessorsFactory::instance().get(zk_request);
auto [response, _] = storage_request->process(*this, zxid, session_id);
response->xid = zk_request->xid;
response->zxid = getZXID();
results.push_back(ResponseForSession{session_id, response});
}
else
else /// normal requests proccession
{
KeeperStorageRequestPtr storage_request = KeeperWrapperFactory::instance().get(zk_request);
KeeperStorageRequestProcessorPtr request_processor = KeeperStorageRequestProcessorsFactory::instance().get(zk_request);
Coordination::ZooKeeperResponsePtr response;
if (check_acl && !storage_request->checkAuth(*this, session_id))
if (check_acl && !request_processor->checkAuth(*this, session_id))
{
response = zk_request->makeResponse();
/// Original ZooKeeper always throws no auth, even when user provided some credentials
@ -1114,9 +1135,10 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina
}
else
{
std::tie(response, std::ignore) = storage_request->process(*this, zxid, session_id);
std::tie(response, std::ignore) = request_processor->process(*this, zxid, session_id);
}
/// Watches for this requests are added to the watches lists
if (zk_request->has_watch)
{
if (response->error == Coordination::Error::ZOK)
@ -1135,9 +1157,10 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina
}
}
/// If this requests processed successfully we need to check watches
if (response->error == Coordination::Error::ZOK)
{
auto watch_responses = storage_request->processWatches(watches, list_watches);
auto watch_responses = request_processor->processWatches(watches, list_watches);
results.insert(results.end(), watch_responses.begin(), watch_responses.end());
}
@ -1153,11 +1176,13 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina
void KeeperStorage::clearDeadWatches(int64_t session_id)
{
/// Clear all watches for this session
auto watches_it = sessions_and_watchers.find(session_id);
if (watches_it != sessions_and_watchers.end())
{
for (const auto & watch_path : watches_it->second)
{
/// Maybe it's a normal watch
auto watch = watches.find(watch_path);
if (watch != watches.end())
{
@ -1173,6 +1198,7 @@ void KeeperStorage::clearDeadWatches(int64_t session_id)
watches.erase(watch);
}
/// Maybe it's a list watch
auto list_watch = list_watches.find(watch_path);
if (list_watch != list_watches.end())
{
@ -1188,6 +1214,7 @@ void KeeperStorage::clearDeadWatches(int64_t session_id)
list_watches.erase(list_watch);
}
}
sessions_and_watchers.erase(watches_it);
}
}

View File

@ -15,14 +15,17 @@ namespace DB
{
using namespace DB;
struct KeeperStorageRequest;
using KeeperStorageRequestPtr = std::shared_ptr<KeeperStorageRequest>;
struct KeeperStorageRequestProcessor;
using KeeperStorageRequestProcessorPtr = std::shared_ptr<KeeperStorageRequestProcessor>;
using ResponseCallback = std::function<void(const Coordination::ZooKeeperResponsePtr &)>;
using ChildrenSet = std::unordered_set<std::string>;
using SessionAndTimeout = std::unordered_map<int64_t, int64_t>;
struct KeeperStorageSnapshot;
/// Keeper state machine almost equal to the ZooKeeper's state machine.
/// Implements all logic of operations, data changes, sessions allocation.
/// In-memory and not thread safe.
class KeeperStorage
{
public:
@ -77,21 +80,34 @@ public:
using Watches = std::map<String /* path, relative of root_path */, SessionIDs>;
/// Main hashtable with nodes. Contain all information about data.
/// All other structures expect session_and_timeout can be restored from
/// container.
Container container;
/// Mapping session_id -> set of ephemeral nodes paths
Ephemerals ephemerals;
/// Mapping sessuib_id -> set of watched nodes paths
SessionAndWatcher sessions_and_watchers;
/// Expiration queue for session, allows to get dead sessions at some point of time
SessionExpiryQueue session_expiry_queue;
/// All active sessions with timeout
SessionAndTimeout session_and_timeout;
/// ACLMap for more compact ACLs storage inside nodes.
ACLMap acl_map;
/// Global id of all requests applied to storage
int64_t zxid{0};
bool finalized{false};
/// Currently active watches (node_path -> subscribed sessions)
Watches watches;
Watches list_watches; /// Watches for 'list' request (watches on children).
void clearDeadWatches(int64_t session_id);
/// Get current zxid
int64_t getZXID() const
{
return zxid;
@ -102,6 +118,7 @@ public:
public:
KeeperStorage(int64_t tick_time_ms, const String & superdigest_);
/// Allocate new session id with the specified timeouts
int64_t getSessionID(int64_t session_timeout_ms)
{
auto result = session_id_counter++;
@ -110,21 +127,28 @@ public:
return result;
}
/// Add session id. Used when restoring KeeperStorage from snapshot.
void addSessionID(int64_t session_id, int64_t session_timeout_ms)
{
session_and_timeout.emplace(session_id, session_timeout_ms);
session_expiry_queue.update(session_id, session_timeout_ms);
}
/// Process user request and return response.
/// check_acl = false only when converting data from ZooKeeper.
ResponsesForSessions processRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, std::optional<int64_t> new_last_zxid, bool check_acl = true);
void finalize();
/// Set of methods for creating snapshots
/// Turn on snapshot mode, so data inside Container is not deleted, but replaced with new version.
void enableSnapshotMode()
{
container.enableSnapshotMode();
}
/// Turn off snapshot mode.
void disableSnapshotMode()
{
container.disableSnapshotMode();
@ -135,16 +159,19 @@ public:
return container.begin();
}
/// Clear outdated data from internal container.
void clearGarbageAfterSnapshot()
{
container.clearOutdatedNodes();
}
/// Get all active sessions
const SessionAndTimeout & getActiveSessions() const
{
return session_and_timeout;
}
/// Get all dead sessions
std::unordered_set<int64_t> getDeadSessions()
{
return session_expiry_queue.getExpiredSessions();

View File

@ -1299,6 +1299,82 @@ TEST(CoordinationTest, TestEphemeralNodeRemove)
}
TEST(CoordinationTest, TestRotateIntervalChanges)
{
using namespace Coordination;
ChangelogDirTest snapshots("./logs");
{
DB::KeeperLogStore changelog("./logs", 100, true);
changelog.init(0, 3);
for (size_t i = 1; i < 55; ++i)
{
std::shared_ptr<ZooKeeperCreateRequest> request = std::make_shared<ZooKeeperCreateRequest>();
request->path = "/hello_" + std::to_string(i);
auto entry = getLogEntryFromZKRequest(0, 1, request);
changelog.append(entry);
changelog.end_of_append_batch(0, 0);
}
}
EXPECT_TRUE(fs::exists("./logs/changelog_0_99.bin"));
DB::KeeperLogStore changelog_1("./logs", 10, true);
changelog_1.init(0, 50);
for (size_t i = 0; i < 55; ++i)
{
std::shared_ptr<ZooKeeperCreateRequest> request = std::make_shared<ZooKeeperCreateRequest>();
request->path = "/hello_" + std::to_string(100 + i);
auto entry = getLogEntryFromZKRequest(0, 1, request);
changelog_1.append(entry);
changelog_1.end_of_append_batch(0, 0);
}
EXPECT_TRUE(fs::exists("./logs/changelog_0_99.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_100_109.bin"));
DB::KeeperLogStore changelog_2("./logs", 7, true);
changelog_2.init(98, 55);
for (size_t i = 0; i < 17; ++i)
{
std::shared_ptr<ZooKeeperCreateRequest> request = std::make_shared<ZooKeeperCreateRequest>();
request->path = "/hello_" + std::to_string(200 + i);
auto entry = getLogEntryFromZKRequest(0, 1, request);
changelog_2.append(entry);
changelog_2.end_of_append_batch(0, 0);
}
changelog_2.compact(105);
EXPECT_FALSE(fs::exists("./logs/changelog_0_99.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_100_109.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_110_116.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_117_123.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_124_130.bin"));
DB::KeeperLogStore changelog_3("./logs", 5, true);
changelog_3.init(116, 3);
for (size_t i = 0; i < 17; ++i)
{
std::shared_ptr<ZooKeeperCreateRequest> request = std::make_shared<ZooKeeperCreateRequest>();
request->path = "/hello_" + std::to_string(300 + i);
auto entry = getLogEntryFromZKRequest(0, 1, request);
changelog_3.append(entry);
changelog_3.end_of_append_batch(0, 0);
}
changelog_3.compact(125);
EXPECT_FALSE(fs::exists("./logs/changelog_100_109.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_110_116.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_117_123.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_124_130.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_131_135.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_136_140.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_141_145.bin"));
}
int main(int argc, char ** argv)
{
Poco::AutoPtr<Poco::ConsoleChannel> channel(new Poco::ConsoleChannel(std::cerr));

View File

@ -97,7 +97,7 @@ class IColumn;
M(Bool, optimize_move_to_prewhere_if_final, false, "If query has `FINAL`, the optimization `move_to_prewhere` is not always correct and it is enabled only if both settings `optimize_move_to_prewhere` and `optimize_move_to_prewhere_if_final` are turned on", 0) \
\
M(UInt64, replication_alter_partitions_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) \
M(UInt64, replication_alter_columns_timeout, 60, "Wait for actions to change the table structure within the specified number of seconds. 0 - wait unlimited time.", 0) \
M(Int64, replication_wait_for_inactive_replica_timeout, 120, "Wait for inactive replica to execute ALTER/OPTIMIZE. Time in seconds, 0 - do not wait, negative - wait for unlimited time.", 0) \
\
M(LoadBalancing, load_balancing, LoadBalancing::RANDOM, "Which replicas (among healthy replicas) to preferably send a query to (on the first attempt) for distributed processing.", 0) \
M(UInt64, load_balancing_first_offset, 0, "Which replica to preferably send a query when FIRST_OR_RANDOM load balancing strategy is used.", 0) \
@ -125,7 +125,7 @@ class IColumn;
M(UInt64, parallel_distributed_insert_select, 0, "Process distributed INSERT SELECT query in the same cluster on local tables on every shard, if 1 SELECT is executed on each shard, if 2 SELECT and INSERT is executed on each shard", 0) \
M(UInt64, distributed_group_by_no_merge, 0, "If 1, Do not merge aggregation states from different servers for distributed queries (shards will process query up to the Complete stage, initiator just proxies the data from the shards). If 2 the initiator will apply ORDER BY and LIMIT stages (it is not in case when shard process query up to the Complete stage)", 0) \
M(UInt64, distributed_push_down_limit, 1, "If 1, LIMIT will be applied on each shard separatelly. Usually you don't need to use it, since this will be done automatically if it is possible, i.e. for simple query SELECT FROM LIMIT.", 0) \
M(Bool, optimize_distributed_group_by_sharding_key, false, "Optimize GROUP BY sharding_key queries (by avoiding costly aggregation on the initiator server).", 0) \
M(Bool, optimize_distributed_group_by_sharding_key, true, "Optimize GROUP BY sharding_key queries (by avoiding costly aggregation on the initiator server).", 0) \
M(UInt64, optimize_skip_unused_shards_limit, 1000, "Limit for number of sharding key values, turns off optimize_skip_unused_shards if the limit is reached", 0) \
M(Bool, optimize_skip_unused_shards, false, "Assumes that data is distributed by sharding_key. Optimization to skip unused shards if SELECT query filters by sharding_key.", 0) \
M(Bool, optimize_skip_unused_shards_rewrite_in, true, "Rewrite IN in query for remote shards to exclude values that does not belong to the shard (requires optimize_skip_unused_shards)", 0) \
@ -482,6 +482,8 @@ class IColumn;
M(UInt64, distributed_ddl_entry_format_version, 1, "Version of DDL entry to write into ZooKeeper", 0) \
M(UInt64, external_storage_max_read_rows, 0, "Limit maximum number of rows when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializedMySQL. If equal to 0, this setting is disabled", 0) \
M(UInt64, external_storage_max_read_bytes, 0, "Limit maximum number of bytes when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializedMySQL. If equal to 0, this setting is disabled", 0) \
M(UInt64, external_storage_connect_timeout, 100, "Connect timeout for external database (Now supported for MySQL)", 0) \
M(UInt64, external_storage_rw_timeout, 1800, "Read / write timeout for external database (Now supported for MySQL)", 0) \
M(UnionMode, union_default_mode, UnionMode::Unspecified, "Set default Union Mode in SelectWithUnion query. Possible values: empty string, 'ALL', 'DISTINCT'. If empty, query without Union Mode will throw exception.", 0) \
M(Bool, optimize_aggregators_of_group_by_keys, true, "Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section", 0) \
M(Bool, optimize_group_by_function_keys, true, "Eliminates functions of other keys in GROUP BY section", 0) \
@ -512,6 +514,7 @@ class IColumn;
M(Bool, allow_experimental_window_functions, true, "Obsolete setting, does nothing.", 0) \
M(HandleKafkaErrorMode, handle_kafka_error_mode, HandleKafkaErrorMode::DEFAULT, "Obsolete setting, does nothing.", 0) \
M(Bool, database_replicated_ddl_output, true, "Obsolete setting, does nothing.", 0) \
M(UInt64, replication_alter_columns_timeout, 60, "Obsolete setting, does nothing.", 0) \
/** The section above is for obsolete settings. Do not add anything there. */

View File

@ -526,7 +526,18 @@ void RemoteQueryExecutor::tryCancel(const char * reason, std::unique_ptr<ReadCon
was_cancelled = true;
if (read_context && *read_context)
{
/// The timer should be set for query cancellation to avoid query cancellation hung.
///
/// Since in case the remote server will abnormally terminated, neither
/// FIN nor RST packet will be sent, and the initiator will not know that
/// the connection died (unless tcp_keep_alive_timeout > 0).
///
/// Also note that it is possible to get this situation even when
/// enough data already had been read.
(*read_context)->setTimer();
(*read_context)->cancel();
}
connections->sendCancel();

View File

@ -100,7 +100,7 @@ void RemoteQueryExecutorReadContext::setConnectionFD(int fd, Poco::Timespan time
connection_fd = fd;
epoll.add(connection_fd);
receive_timeout = timeout;
receive_timeout_usec = timeout.totalMicroseconds();
connection_fd_description = fd_description;
}
@ -157,8 +157,8 @@ void RemoteQueryExecutorReadContext::setTimer() const
/// Did not get packet yet. Init timeout for the next async reading.
timer.reset();
if (receive_timeout.totalMicroseconds())
timer.setRelative(receive_timeout);
if (receive_timeout_usec)
timer.setRelative(receive_timeout_usec);
}
bool RemoteQueryExecutorReadContext::resumeRoutine()

View File

@ -34,7 +34,8 @@ public:
/// This mutex for fiber is needed because fiber could be destroyed in cancel method from another thread.
std::mutex fiber_lock;
Poco::Timespan receive_timeout;
/// atomic is required due to data-race between setConnectionFD() and setTimer() from the cancellation path.
std::atomic<uint64_t> receive_timeout_usec = 0;
IConnections & connections;
Poco::Net::Socket * last_used_socket = nullptr;
@ -75,6 +76,7 @@ class RemoteQueryExecutorReadContext
{
public:
void cancel() {}
void setTimer() {}
};
}

View File

@ -76,17 +76,17 @@ TTLBlockInputStream::TTLBlockInputStream(
algorithms.emplace_back(std::make_unique<TTLColumnAlgorithm>(
description, old_ttl_infos.columns_ttl[name], current_time_,
force_, name, default_expression, default_column_name));
force_, name, default_expression, default_column_name, isCompactPart(data_part)));
}
}
for (const auto & move_ttl : metadata_snapshot_->getMoveTTLs())
algorithms.emplace_back(std::make_unique<TTLMoveAlgorithm>(
move_ttl, old_ttl_infos.moves_ttl[move_ttl.result_column], current_time_, force_));
algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
move_ttl, TTLUpdateField::MOVES_TTL, move_ttl.result_column, old_ttl_infos.moves_ttl[move_ttl.result_column], current_time_, force_));
for (const auto & recompression_ttl : metadata_snapshot_->getRecompressionTTLs())
algorithms.emplace_back(std::make_unique<TTLRecompressionAlgorithm>(
recompression_ttl, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_));
algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
recompression_ttl, TTLUpdateField::RECOMPRESSION_TTL, recompression_ttl.result_column, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_));
}
Block reorderColumns(Block block, const Block & header)

View File

@ -0,0 +1,77 @@
#include <DataStreams/TTLCalcInputStream.h>
#include <DataStreams/TTLUpdateInfoAlgorithm.h>
namespace DB
{
TTLCalcInputStream::TTLCalcInputStream(
const BlockInputStreamPtr & input_,
const MergeTreeData & storage_,
const StorageMetadataPtr & metadata_snapshot_,
const MergeTreeData::MutableDataPartPtr & data_part_,
time_t current_time_,
bool force_)
: data_part(data_part_)
, log(&Poco::Logger::get(storage_.getLogName() + " (TTLCalcInputStream)"))
{
children.push_back(input_);
header = children.at(0)->getHeader();
auto old_ttl_infos = data_part->ttl_infos;
if (metadata_snapshot_->hasRowsTTL())
{
const auto & rows_ttl = metadata_snapshot_->getRowsTTL();
algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
rows_ttl, TTLUpdateField::TABLE_TTL, rows_ttl.result_column, old_ttl_infos.table_ttl, current_time_, force_));
}
for (const auto & where_ttl : metadata_snapshot_->getRowsWhereTTLs())
algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
where_ttl, TTLUpdateField::ROWS_WHERE_TTL, where_ttl.result_column, old_ttl_infos.rows_where_ttl[where_ttl.result_column], current_time_, force_));
for (const auto & group_by_ttl : metadata_snapshot_->getGroupByTTLs())
algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
group_by_ttl, TTLUpdateField::GROUP_BY_TTL, group_by_ttl.result_column, old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_));
if (metadata_snapshot_->hasAnyColumnTTL())
{
for (const auto & [name, description] : metadata_snapshot_->getColumnTTLs())
{
algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
description, TTLUpdateField::COLUMNS_TTL, name, old_ttl_infos.columns_ttl[name], current_time_, force_));
}
}
for (const auto & move_ttl : metadata_snapshot_->getMoveTTLs())
algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
move_ttl, TTLUpdateField::MOVES_TTL, move_ttl.result_column, old_ttl_infos.moves_ttl[move_ttl.result_column], current_time_, force_));
for (const auto & recompression_ttl : metadata_snapshot_->getRecompressionTTLs())
algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
recompression_ttl, TTLUpdateField::RECOMPRESSION_TTL, recompression_ttl.result_column, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_));
}
Block TTLCalcInputStream::readImpl()
{
auto block = children.at(0)->read();
for (const auto & algorithm : algorithms)
algorithm->execute(block);
if (!block)
return block;
Block res;
for (const auto & col : header)
res.insert(block.getByName(col.name));
return res;
}
void TTLCalcInputStream::readSuffixImpl()
{
data_part->ttl_infos = {};
for (const auto & algorithm : algorithms)
algorithm->finalize(data_part);
}
}

View File

@ -0,0 +1,44 @@
#pragma once
#include <DataStreams/IBlockInputStream.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/IMergeTreeDataPart.h>
#include <Core/Block.h>
#include <Storages/MergeTree/MergeTreeDataPartTTLInfo.h>
#include <DataStreams/ITTLAlgorithm.h>
#include <common/DateLUT.h>
namespace DB
{
class TTLCalcInputStream : public IBlockInputStream
{
public:
TTLCalcInputStream(
const BlockInputStreamPtr & input_,
const MergeTreeData & storage_,
const StorageMetadataPtr & metadata_snapshot_,
const MergeTreeData::MutableDataPartPtr & data_part_,
time_t current_time,
bool force_
);
String getName() const override { return "TTL_CALC"; }
Block getHeader() const override { return header; }
protected:
Block readImpl() override;
/// Finalizes ttl infos and updates data part
void readSuffixImpl() override;
private:
std::vector<TTLAlgorithmPtr> algorithms;
/// ttl_infos and empty_columns are updating while reading
const MergeTreeData::MutableDataPartPtr & data_part;
Poco::Logger * log;
Block header;
};
}

View File

@ -10,11 +10,13 @@ TTLColumnAlgorithm::TTLColumnAlgorithm(
bool force_,
const String & column_name_,
const ExpressionActionsPtr & default_expression_,
const String & default_column_name_)
const String & default_column_name_,
bool is_compact_part_)
: ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
, column_name(column_name_)
, default_expression(default_expression_)
, default_column_name(default_column_name_)
, is_compact_part(is_compact_part_)
{
if (!isMinTTLExpired())
{
@ -40,7 +42,7 @@ void TTLColumnAlgorithm::execute(Block & block)
return;
/// Later drop full column
if (isMaxTTLExpired())
if (isMaxTTLExpired() && !is_compact_part)
return;
auto default_column = executeExpressionAndGetColumn(default_expression, block, default_column_name);

View File

@ -17,7 +17,9 @@ public:
bool force_,
const String & column_name_,
const ExpressionActionsPtr & default_expression_,
const String & default_column_name_);
const String & default_column_name_,
bool is_compact_part_
);
void execute(Block & block) override;
void finalize(const MutableDataPartPtr & data_part) const override;
@ -28,6 +30,7 @@ private:
const String default_column_name;
bool is_fully_empty = true;
bool is_compact_part;
};
}

View File

@ -4,8 +4,15 @@ namespace DB
{
TTLUpdateInfoAlgorithm::TTLUpdateInfoAlgorithm(
const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
const TTLDescription & description_,
const TTLUpdateField ttl_update_field_,
const String ttl_update_key_,
const TTLInfo & old_ttl_info_,
time_t current_time_,
bool force_)
: ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
, ttl_update_field(ttl_update_field_)
, ttl_update_key(ttl_update_key_)
{
}
@ -22,26 +29,37 @@ void TTLUpdateInfoAlgorithm::execute(Block & block)
}
}
TTLMoveAlgorithm::TTLMoveAlgorithm(
const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
: TTLUpdateInfoAlgorithm(description_, old_ttl_info_, current_time_, force_)
void TTLUpdateInfoAlgorithm::finalize(const MutableDataPartPtr & data_part) const
{
}
if (ttl_update_field == TTLUpdateField::RECOMPRESSION_TTL)
{
data_part->ttl_infos.recompression_ttl[ttl_update_key] = new_ttl_info;
}
else if (ttl_update_field == TTLUpdateField::MOVES_TTL)
{
data_part->ttl_infos.moves_ttl[ttl_update_key] = new_ttl_info;
}
else if (ttl_update_field == TTLUpdateField::GROUP_BY_TTL)
{
data_part->ttl_infos.group_by_ttl[ttl_update_key] = new_ttl_info;
data_part->ttl_infos.updatePartMinMaxTTL(new_ttl_info.min, new_ttl_info.max);
}
else if (ttl_update_field == TTLUpdateField::ROWS_WHERE_TTL)
{
data_part->ttl_infos.rows_where_ttl[ttl_update_key] = new_ttl_info;
data_part->ttl_infos.updatePartMinMaxTTL(new_ttl_info.min, new_ttl_info.max);
}
else if (ttl_update_field == TTLUpdateField::TABLE_TTL)
{
data_part->ttl_infos.table_ttl = new_ttl_info;
data_part->ttl_infos.updatePartMinMaxTTL(new_ttl_info.min, new_ttl_info.max);
}
else if (ttl_update_field == TTLUpdateField::COLUMNS_TTL)
{
data_part->ttl_infos.columns_ttl[ttl_update_key] = new_ttl_info;
data_part->ttl_infos.updatePartMinMaxTTL(new_ttl_info.min, new_ttl_info.max);
}
void TTLMoveAlgorithm::finalize(const MutableDataPartPtr & data_part) const
{
data_part->ttl_infos.moves_ttl[description.result_column] = new_ttl_info;
}
TTLRecompressionAlgorithm::TTLRecompressionAlgorithm(
const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
: TTLUpdateInfoAlgorithm(description_, old_ttl_info_, current_time_, force_)
{
}
void TTLRecompressionAlgorithm::finalize(const MutableDataPartPtr & data_part) const
{
data_part->ttl_infos.recompression_ttl[description.result_column] = new_ttl_info;
}
}

View File

@ -5,28 +5,35 @@
namespace DB
{
enum class TTLUpdateField
{
COLUMNS_TTL,
TABLE_TTL,
ROWS_WHERE_TTL,
MOVES_TTL,
RECOMPRESSION_TTL,
GROUP_BY_TTL,
};
/// Calculates new ttl_info and does nothing with data.
class TTLUpdateInfoAlgorithm : public ITTLAlgorithm
{
public:
TTLUpdateInfoAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
TTLUpdateInfoAlgorithm(
const TTLDescription & description_,
const TTLUpdateField ttl_update_field_,
const String ttl_update_key_,
const TTLInfo & old_ttl_info_,
time_t current_time_, bool force_
);
void execute(Block & block) override;
void finalize(const MutableDataPartPtr & data_part) const override = 0;
void finalize(const MutableDataPartPtr & data_part) const override;
private:
const TTLUpdateField ttl_update_field;
const String ttl_update_key;
};
class TTLMoveAlgorithm final : public TTLUpdateInfoAlgorithm
{
public:
TTLMoveAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
void finalize(const MutableDataPartPtr & data_part) const override;
};
class TTLRecompressionAlgorithm final : public TTLUpdateInfoAlgorithm
{
public:
TTLRecompressionAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
void finalize(const MutableDataPartPtr & data_part) const override;
};
}

View File

@ -27,6 +27,8 @@ public:
bool isCategorial() const override { return true; }
bool canBeInsideNullable() const override { return true; }
bool isComparable() const override { return true; }
virtual bool contains(const IDataType & rhs) const = 0;
};
@ -76,7 +78,7 @@ public:
/// Example:
/// Enum('a' = 1, 'b' = 2) -> Enum('c' = 1, 'b' = 2, 'd' = 3) OK
/// Enum('a' = 1, 'b' = 2) -> Enum('a' = 2, 'b' = 1) NOT OK
bool contains(const IDataType & rhs) const;
bool contains(const IDataType & rhs) const override;
SerializationPtr doGetDefaultSerialization() const override;
};

View File

@ -1,4 +1,5 @@
#include <DataTypes/Serializations/ISerialization.h>
#include <Compression/CompressionFactory.h>
#include <Columns/IColumn.h>
#include <IO/WriteHelpers.h>
#include <IO/Operators.h>

View File

@ -155,7 +155,13 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
/// Split into replicas if needed.
size_t max_addresses = context->getSettingsRef().glob_expansion_max_elements;
auto addresses = parseRemoteDescriptionForExternalDatabase(host_port, max_addresses, 3306);
auto mysql_pool = mysqlxx::PoolWithFailover(mysql_database_name, addresses, mysql_user_name, mysql_user_password);
mysqlxx::PoolWithFailover mysql_pool(mysql_database_name, addresses,
mysql_user_name, mysql_user_password,
MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS,
MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS,
MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES,
context->getSettingsRef().external_storage_connect_timeout,
context->getSettingsRef().external_storage_rw_timeout);
mysql_database_settings->loadFromQueryContext(context);
mysql_database_settings->loadFromQuery(*engine_define); /// higher priority
@ -168,7 +174,6 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
MySQLClient client(remote_host_name, remote_port, mysql_user_name, mysql_user_password);
auto mysql_pool = mysqlxx::Pool(mysql_database_name, remote_host_name, mysql_user_name, mysql_user_password, remote_port);
auto materialize_mode_settings = std::make_unique<MaterializedMySQLSettings>();
if (engine_define->settings)

View File

@ -26,8 +26,8 @@ const String & getFunctionCanonicalNameIfAny(const String & name)
return FunctionFactory::instance().getCanonicalNameIfAny(name);
}
void FunctionFactory::registerFunction(const
std::string & name,
void FunctionFactory::registerFunction(
const std::string & name,
Value creator,
CaseSensitiveness case_sensitiveness)
{
@ -119,8 +119,8 @@ FunctionOverloadResolverPtr FunctionFactory::tryGetImpl(
}
FunctionOverloadResolverPtr FunctionFactory::tryGet(
const std::string & name,
ContextPtr context) const
const std::string & name,
ContextPtr context) const
{
auto impl = tryGetImpl(name, context);
return impl ? std::move(impl) : nullptr;

View File

@ -325,7 +325,7 @@ struct StringSource
};
/// Differs to StringSource by having 'offest' and 'length' in code points instead of bytes in getSlice* methods.
/// Differs to StringSource by having 'offset' and 'length' in code points instead of bytes in getSlice* methods.
/** NOTE: The behaviour of substring and substringUTF8 is inconsistent when negative offset is greater than string size:
* substring:
* hello

View File

@ -1,6 +1,7 @@
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/castTypeToEither.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeTuple.h>
@ -95,32 +96,30 @@ private:
using Offsets = ColumnArray::Offsets;
static bool matchKeyToIndex(const IColumn & data, const Offsets & offsets,
const ColumnsWithTypeAndName & arguments, PaddedPODArray<UInt64> & matched_idxs);
static bool matchKeyToIndexNumber(
const IColumn & data, const Offsets & offsets, bool is_key_const,
const IColumn & index, PaddedPODArray<UInt64> & matched_idxs);
static bool matchKeyToIndexConst(const IColumn & data, const Offsets & offsets,
static bool matchKeyToIndexNumberConst(
const IColumn & data, const Offsets & offsets,
const Field & index, PaddedPODArray<UInt64> & matched_idxs);
template <typename DataType>
static bool matchKeyToIndexNumber(const IColumn & data, const Offsets & offsets,
const ColumnsWithTypeAndName & arguments, PaddedPODArray<UInt64> & matched_idxs);
static bool matchKeyToIndexString(
const IColumn & data, const Offsets & offsets, bool is_key_const,
const IColumn & index, PaddedPODArray<UInt64> & matched_idxs);
template <typename DataType>
static bool matchKeyToIndexNumberConst(const IColumn & data, const Offsets & offsets,
const Field & index, PaddedPODArray<UInt64> & matched_idxs);
static bool matchKeyToIndexString(const IColumn & data, const Offsets & offsets,
const ColumnsWithTypeAndName & arguments, PaddedPODArray<UInt64> & matched_idxs);
static bool matchKeyToIndexFixedString(const IColumn & data, const Offsets & offsets,
const ColumnsWithTypeAndName & arguments, PaddedPODArray<UInt64> & matched_idxs);
static bool matchKeyToIndexStringConst(const IColumn & data, const Offsets & offsets,
static bool matchKeyToIndexStringConst(
const IColumn & data, const Offsets & offsets,
const Field & index, PaddedPODArray<UInt64> & matched_idxs);
template <typename Matcher>
static void executeMatchKeyToIndex(const Offsets & offsets,
PaddedPODArray<UInt64> & matched_idxs, const Matcher & matcher);
template <typename Matcher>
static void executeMatchConstKeyToIndex(
size_t num_rows, size_t num_values,
PaddedPODArray<UInt64> & matched_idxs, const Matcher & matcher);
};
@ -759,23 +758,11 @@ ColumnPtr FunctionArrayElement::executeTuple(const ColumnsWithTypeAndName & argu
namespace
{
template<typename DataColumn, typename IndexColumn>
struct MatcherString
{
const ColumnString & data;
const ColumnString & index;
bool match(size_t row_data, size_t row_index) const
{
auto data_ref = data.getDataAt(row_data);
auto index_ref = index.getDataAt(row_index);
return memequalSmallAllowOverflow15(index_ref.data, index_ref.size, data_ref.data, data_ref.size);
}
};
struct MatcherFixedString
{
const ColumnFixedString & data;
const ColumnFixedString & index;
const DataColumn & data;
const IndexColumn & index;
bool match(size_t row_data, size_t row_index) const
{
@ -785,9 +772,10 @@ struct MatcherFixedString
}
};
template<typename DataColumn>
struct MatcherStringConst
{
const ColumnString & data;
const DataColumn & data;
const String & index;
bool match(size_t row_data, size_t /* row_index */) const
@ -797,23 +785,23 @@ struct MatcherStringConst
}
};
template <typename T>
template <typename DataType, typename IndexType>
struct MatcherNumber
{
const PaddedPODArray<T> & data;
const PaddedPODArray<T> & index;
const PaddedPODArray<DataType> & data;
const PaddedPODArray<IndexType> & index;
bool match(size_t row_data, size_t row_index) const
{
return data[row_data] == index[row_index];
return data[row_data] == static_cast<DataType>(index[row_index]);
}
};
template <typename T>
template <typename DataType>
struct MatcherNumberConst
{
const PaddedPODArray<T> & data;
T index;
const PaddedPODArray<DataType> & data;
DataType index;
bool match(size_t row_data, size_t /* row_index */) const
{
@ -848,147 +836,158 @@ void FunctionArrayElement::executeMatchKeyToIndex(
}
}
template <typename Matcher>
void FunctionArrayElement::executeMatchConstKeyToIndex(
size_t num_rows, size_t num_values,
PaddedPODArray<UInt64> & matched_idxs, const Matcher & matcher)
{
for (size_t i = 0; i < num_rows; ++i)
{
bool matched = false;
for (size_t j = 0; j < num_values; ++j)
{
if (matcher.match(j, i))
{
matched_idxs.push_back(j + 1);
matched = true;
break;
}
}
if (!matched)
matched_idxs.push_back(0);
}
}
template <typename F>
static bool castColumnString(const IColumn * column, F && f)
{
return castTypeToEither<ColumnString, ColumnFixedString>(column, std::forward<F>(f));
}
bool FunctionArrayElement::matchKeyToIndexStringConst(
const IColumn & data, const Offsets & offsets,
const Field & index, PaddedPODArray<UInt64> & matched_idxs)
{
const auto * data_string = checkAndGetColumn<ColumnString>(&data);
if (!data_string)
return false;
return castColumnString(&data, [&](const auto & data_column)
{
using DataColumn = std::decay_t<decltype(data_column)>;
if (index.getType() != Field::Types::String)
return false;
MatcherStringConst matcher{*data_string, get<const String &>(index)};
executeMatchKeyToIndex(offsets, matched_idxs, matcher);
return true;
MatcherStringConst<DataColumn> matcher{data_column, get<const String &>(index)};
executeMatchKeyToIndex(offsets, matched_idxs, matcher);
return true;
});
}
bool FunctionArrayElement::matchKeyToIndexString(
const IColumn & data, const Offsets & offsets,
const ColumnsWithTypeAndName & arguments, PaddedPODArray<UInt64> & matched_idxs)
const IColumn & data, const Offsets & offsets, bool is_key_const,
const IColumn & index, PaddedPODArray<UInt64> & matched_idxs)
{
const auto * index_string = checkAndGetColumn<ColumnString>(arguments[1].column.get());
if (!index_string)
return false;
return castColumnString(&data, [&](const auto & data_column)
{
return castColumnString(&index, [&](const auto & index_column)
{
using DataColumn = std::decay_t<decltype(data_column)>;
using IndexColumn = std::decay_t<decltype(index_column)>;
const auto * data_string = checkAndGetColumn<ColumnString>(&data);
if (!data_string)
return false;
MatcherString<DataColumn, IndexColumn> matcher{data_column, index_column};
if (is_key_const)
executeMatchConstKeyToIndex(index.size(), data.size(), matched_idxs, matcher);
else
executeMatchKeyToIndex(offsets, matched_idxs, matcher);
MatcherString matcher{*data_string, *index_string};
executeMatchKeyToIndex(offsets, matched_idxs, matcher);
return true;
return true;
});
});
}
bool FunctionArrayElement::matchKeyToIndexFixedString(
const IColumn & data, const Offsets & offsets,
const ColumnsWithTypeAndName & arguments, PaddedPODArray<UInt64> & matched_idxs)
template <typename FromType, typename ToType>
static constexpr bool areConvertibleTypes =
std::is_same_v<FromType, ToType>
|| (is_integer_v<FromType> && is_integer_v<ToType>
&& std::is_convertible_v<FromType, ToType>);
template <typename F>
static bool castColumnNumeric(const IColumn * column, F && f)
{
const auto * index_string = checkAndGetColumn<ColumnFixedString>(arguments[1].column.get());
if (!index_string)
return false;
const auto * data_string = checkAndGetColumn<ColumnFixedString>(&data);
if (!data_string)
return false;
MatcherFixedString matcher{*data_string, *index_string};
executeMatchKeyToIndex(offsets, matched_idxs, matcher);
return true;
return castTypeToEither<
ColumnVector<UInt8>,
ColumnVector<UInt16>,
ColumnVector<UInt32>,
ColumnVector<UInt64>,
ColumnVector<UInt128>,
ColumnVector<UInt256>,
ColumnVector<Int8>,
ColumnVector<Int16>,
ColumnVector<Int32>,
ColumnVector<Int64>,
ColumnVector<Int128>,
ColumnVector<Int256>,
ColumnVector<UUID>
>(column, std::forward<F>(f));
}
template <typename DataType>
bool FunctionArrayElement::matchKeyToIndexNumberConst(
const IColumn & data, const Offsets & offsets,
const Field & index, PaddedPODArray<UInt64> & matched_idxs)
{
const auto * data_numeric = checkAndGetColumn<ColumnVector<DataType>>(&data);
if (!data_numeric)
return false;
std::optional<DataType> index_as_integer;
Field::dispatch([&](const auto & value)
return castColumnNumeric(&data, [&](const auto & data_column)
{
using FieldType = std::decay_t<decltype(value)>;
if constexpr (std::is_same_v<FieldType, DataType> || (is_integer_v<FieldType> && std::is_convertible_v<FieldType, DataType>))
index_as_integer = static_cast<DataType>(value);
}, index);
using DataType = typename std::decay_t<decltype(data_column)>::ValueType;
std::optional<DataType> index_as_integer;
if (!index_as_integer)
return false;
Field::dispatch([&](const auto & value)
{
using FieldType = std::decay_t<decltype(value)>;
if constexpr (areConvertibleTypes<FieldType, DataType>)
index_as_integer = static_cast<DataType>(value);
}, index);
MatcherNumberConst<DataType> matcher{data_numeric->getData(), *index_as_integer};
executeMatchKeyToIndex(offsets, matched_idxs, matcher);
return true;
if (!index_as_integer)
return false;
MatcherNumberConst<DataType> matcher{data_column.getData(), *index_as_integer};
executeMatchKeyToIndex(offsets, matched_idxs, matcher);
return true;
});
}
template <typename DataType>
bool FunctionArrayElement::matchKeyToIndexNumber(
const IColumn & data, const Offsets & offsets,
const ColumnsWithTypeAndName & arguments, PaddedPODArray<UInt64> & matched_idxs)
const IColumn & data, const Offsets & offsets, bool is_key_const,
const IColumn & index, PaddedPODArray<UInt64> & matched_idxs)
{
const auto * index_numeric = checkAndGetColumn<ColumnVector<DataType>>(arguments[1].column.get());
if (!index_numeric)
return false;
return castColumnNumeric(&data, [&](const auto & data_column)
{
return castColumnNumeric(&index, [&](const auto & index_column)
{
using DataType = typename std::decay_t<decltype(data_column)>::ValueType;
using IndexType = typename std::decay_t<decltype(index_column)>::ValueType;
const auto * data_numeric = checkAndGetColumn<ColumnVector<DataType>>(&data);
if (!data_numeric)
return false;
if constexpr (areConvertibleTypes<IndexType, DataType>)
{
MatcherNumber<DataType, IndexType> matcher{data_column.getData(), index_column.getData()};
if (is_key_const)
executeMatchConstKeyToIndex(index_column.size(), data_column.size(), matched_idxs, matcher);
else
executeMatchKeyToIndex(offsets, matched_idxs, matcher);
MatcherNumber<DataType> matcher{data_numeric->getData(), index_numeric->getData()};
executeMatchKeyToIndex(offsets, matched_idxs, matcher);
return true;
}
return true;
}
bool FunctionArrayElement::matchKeyToIndex(
const IColumn & data, const Offsets & offsets,
const ColumnsWithTypeAndName & arguments, PaddedPODArray<UInt64> & matched_idxs)
{
return matchKeyToIndexNumber<UInt8>(data, offsets, arguments, matched_idxs)
|| matchKeyToIndexNumber<UInt16>(data, offsets, arguments, matched_idxs)
|| matchKeyToIndexNumber<UInt32>(data, offsets, arguments, matched_idxs)
|| matchKeyToIndexNumber<UInt64>(data, offsets, arguments, matched_idxs)
|| matchKeyToIndexNumber<UInt128>(data, offsets, arguments, matched_idxs)
|| matchKeyToIndexNumber<UInt256>(data, offsets, arguments, matched_idxs)
|| matchKeyToIndexNumber<Int8>(data, offsets, arguments, matched_idxs)
|| matchKeyToIndexNumber<Int16>(data, offsets, arguments, matched_idxs)
|| matchKeyToIndexNumber<Int32>(data, offsets, arguments, matched_idxs)
|| matchKeyToIndexNumber<Int64>(data, offsets, arguments, matched_idxs)
|| matchKeyToIndexNumber<Int128>(data, offsets, arguments, matched_idxs)
|| matchKeyToIndexNumber<Int256>(data, offsets, arguments, matched_idxs)
|| matchKeyToIndexNumber<UInt256>(data, offsets, arguments, matched_idxs)
|| matchKeyToIndexNumber<UUID>(data, offsets, arguments, matched_idxs)
|| matchKeyToIndexString(data, offsets, arguments, matched_idxs)
|| matchKeyToIndexFixedString(data, offsets, arguments, matched_idxs);
}
bool FunctionArrayElement::matchKeyToIndexConst(
const IColumn & data, const Offsets & offsets,
const Field & index, PaddedPODArray<UInt64> & matched_idxs)
{
return matchKeyToIndexNumberConst<UInt8>(data, offsets, index, matched_idxs)
|| matchKeyToIndexNumberConst<UInt16>(data, offsets, index, matched_idxs)
|| matchKeyToIndexNumberConst<UInt32>(data, offsets, index, matched_idxs)
|| matchKeyToIndexNumberConst<UInt64>(data, offsets, index, matched_idxs)
|| matchKeyToIndexNumberConst<UInt128>(data, offsets, index, matched_idxs)
|| matchKeyToIndexNumberConst<UInt256>(data, offsets, index, matched_idxs)
|| matchKeyToIndexNumberConst<Int8>(data, offsets, index, matched_idxs)
|| matchKeyToIndexNumberConst<Int16>(data, offsets, index, matched_idxs)
|| matchKeyToIndexNumberConst<Int32>(data, offsets, index, matched_idxs)
|| matchKeyToIndexNumberConst<Int64>(data, offsets, index, matched_idxs)
|| matchKeyToIndexNumberConst<Int128>(data, offsets, index, matched_idxs)
|| matchKeyToIndexNumberConst<Int256>(data, offsets, index, matched_idxs)
|| matchKeyToIndexNumberConst<UUID>(data, offsets, index, matched_idxs)
|| matchKeyToIndexStringConst(data, offsets, index, matched_idxs);
return false;
});
});
}
ColumnPtr FunctionArrayElement::executeMap(
const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const
{
const ColumnMap * col_map = typeid_cast<const ColumnMap *>(arguments[0].column.get());
if (!col_map)
return nullptr;
const auto * col_map = checkAndGetColumn<ColumnMap>(arguments[0].column.get());
const auto * col_const_map = checkAndGetColumnConst<ColumnMap>(arguments[0].column.get());
assert(col_map || col_const_map);
if (col_const_map)
col_map = typeid_cast<const ColumnMap *>(&col_const_map->getDataColumn());
const auto & nested_column = col_map->getNestedColumn();
const auto & keys_data = col_map->getNestedData().getColumn(0);
@ -1000,29 +999,33 @@ ColumnPtr FunctionArrayElement::executeMap(
indices_column->reserve(input_rows_count);
auto & indices_data = assert_cast<ColumnVector<UInt64> &>(*indices_column).getData();
bool executed = false;
if (!isColumnConst(*arguments[1].column))
{
if (input_rows_count > 0 && !matchKeyToIndex(keys_data, offsets, arguments, indices_data))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal types of arguments: {}, {} for function {}",
arguments[0].type->getName(), arguments[1].type->getName(), getName());
executed = matchKeyToIndexNumber(keys_data, offsets, !!col_const_map, *arguments[1].column, indices_data)
|| matchKeyToIndexString(keys_data, offsets, !!col_const_map, *arguments[1].column, indices_data);
}
else
{
Field index = (*arguments[1].column)[0];
// Get Matched key's value
if (input_rows_count > 0 && !matchKeyToIndexConst(keys_data, offsets, index, indices_data))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal types of arguments: {}, {} for function {}",
arguments[0].type->getName(), arguments[1].type->getName(), getName());
executed = matchKeyToIndexNumberConst(keys_data, offsets, index, indices_data)
|| matchKeyToIndexStringConst(keys_data, offsets, index, indices_data);
}
if (!executed)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal types of arguments: {}, {} for function {}",
arguments[0].type->getName(), arguments[1].type->getName(), getName());
ColumnPtr values_array = ColumnArray::create(values_data.getPtr(), nested_column.getOffsetsPtr());
if (col_const_map)
values_array = ColumnConst::create(values_array, input_rows_count);
/// Prepare arguments to call arrayElement for array with values and calculated indices at previous step.
ColumnsWithTypeAndName new_arguments =
{
{
ColumnArray::create(values_data.getPtr(), nested_column.getOffsetsPtr()),
values_array,
std::make_shared<DataTypeArray>(result_type),
""
},
@ -1066,13 +1069,14 @@ DataTypePtr FunctionArrayElement::getReturnTypeImpl(const DataTypes & arguments)
ColumnPtr FunctionArrayElement::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const
{
/// Check nullability.
bool is_array_of_nullable = false;
const auto * col_map = checkAndGetColumn<ColumnMap>(arguments[0].column.get());
const auto * col_const_map = checkAndGetColumnConst<ColumnMap>(arguments[0].column.get());
const ColumnMap * col_map = checkAndGetColumn<ColumnMap>(arguments[0].column.get());
if (col_map)
if (col_map || col_const_map)
return executeMap(arguments, result_type, input_rows_count);
/// Check nullability.
bool is_array_of_nullable = false;
const ColumnArray * col_array = nullptr;
const ColumnArray * col_const_array = nullptr;

View File

@ -38,7 +38,7 @@ ZlibInflatingReadBuffer::ZlibInflatingReadBuffer(
#pragma GCC diagnostic pop
if (rc != Z_OK)
throw Exception(std::string("inflateInit2 failed: ") + zError(rc) + "; zlib version: " + ZLIB_VERSION, ErrorCodes::ZLIB_INFLATE_FAILED);
throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflateInit2 failed: {}; zlib version: {}.", zError(rc), ZLIB_VERSION);
}
ZlibInflatingReadBuffer::~ZlibInflatingReadBuffer()
@ -48,41 +48,60 @@ ZlibInflatingReadBuffer::~ZlibInflatingReadBuffer()
bool ZlibInflatingReadBuffer::nextImpl()
{
if (eof)
return false;
if (!zstr.avail_in)
/// Need do-while loop to prevent situation, when
/// eof was not reached, but working buffer became empty (when nothing was decompressed in current iteration)
/// (this happens with compression algorithms, same idea is implemented in ZstdInflatingReadBuffer)
do
{
in->nextIfAtEnd();
zstr.next_in = reinterpret_cast<unsigned char *>(in->position());
zstr.avail_in = in->buffer().end() - in->position();
}
zstr.next_out = reinterpret_cast<unsigned char *>(internal_buffer.begin());
zstr.avail_out = internal_buffer.size();
/// if we already found eof, we shouldn't do anything
if (eof)
return false;
int rc = inflate(&zstr, Z_NO_FLUSH);
in->position() = in->buffer().end() - zstr.avail_in;
working_buffer.resize(internal_buffer.size() - zstr.avail_out);
if (rc == Z_STREAM_END)
{
if (in->eof())
/// if there is no available bytes in zstr, move ptr to next available data
if (!zstr.avail_in)
{
eof = true;
return !working_buffer.empty();
in->nextIfAtEnd();
zstr.next_in = reinterpret_cast<unsigned char *>(in->position());
zstr.avail_in = in->buffer().end() - in->position();
}
else
{
rc = inflateReset(&zstr);
if (rc != Z_OK)
throw Exception(std::string("inflateReset failed: ") + zError(rc), ErrorCodes::ZLIB_INFLATE_FAILED);
return true;
}
}
if (rc != Z_OK)
throw Exception(std::string("inflate failed: ") + zError(rc), ErrorCodes::ZLIB_INFLATE_FAILED);
/// init output bytes (place, where decompressed data will be)
zstr.next_out = reinterpret_cast<unsigned char *>(internal_buffer.begin());
zstr.avail_out = internal_buffer.size();
int rc = inflate(&zstr, Z_NO_FLUSH);
/// move in stream on place, where reading stopped
in->position() = in->buffer().end() - zstr.avail_in;
/// change size of working buffer (it's size equal to internal_buffer size without unused uncompressed values)
working_buffer.resize(internal_buffer.size() - zstr.avail_out);
/// If end was reached, it can be end of file or end of part (for example, chunk)
if (rc == Z_STREAM_END)
{
/// if it is end of file, remember this and return
/// * true if we can work with working buffer (we still have something to read, so next must return true)
/// * false if there is no data in working buffer
if (in->eof())
{
eof = true;
return !working_buffer.empty();
}
/// If it is not end of file, we need to reset zstr and return true, because we still have some data to read
else
{
rc = inflateReset(&zstr);
if (rc != Z_OK)
throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflateReset failed: {}", zError(rc));
return true;
}
}
/// If it is not end and not OK, something went wrong, throw exception
if (rc != Z_OK)
throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflateReset failed: {}", zError(rc));
}
while (working_buffer.empty());
/// if code reach this section, working buffer is not empty, so we have some data to process
return true;
}

View File

@ -14,7 +14,7 @@
#include <Common/Throttler.h>
#include <Common/thread_local_rng.h>
#include <Common/FieldVisitorToString.h>
#include <Coordination/KeeperStorageDispatcher.h>
#include <Coordination/KeeperDispatcher.h>
#include <Compression/ICompressionCodec.h>
#include <Core/BackgroundSchedulePool.h>
#include <Formats/FormatFactory.h>
@ -146,7 +146,7 @@ struct ContextSharedPart
#if USE_NURAFT
mutable std::mutex keeper_storage_dispatcher_mutex;
mutable std::shared_ptr<KeeperStorageDispatcher> keeper_storage_dispatcher;
mutable std::shared_ptr<KeeperDispatcher> keeper_storage_dispatcher;
#endif
mutable std::mutex auxiliary_zookeepers_mutex;
mutable std::map<String, zkutil::ZooKeeperPtr> auxiliary_zookeepers; /// Map for auxiliary ZooKeeper clients.
@ -1649,7 +1649,7 @@ void Context::setSystemZooKeeperLogAfterInitializationIfNeeded()
zk.second->setZooKeeperLog(shared->system_logs->zookeeper_log);
}
void Context::initializeKeeperStorageDispatcher() const
void Context::initializeKeeperDispatcher() const
{
#if USE_NURAFT
std::lock_guard lock(shared->keeper_storage_dispatcher_mutex);
@ -1660,14 +1660,14 @@ void Context::initializeKeeperStorageDispatcher() const
const auto & config = getConfigRef();
if (config.has("keeper_server"))
{
shared->keeper_storage_dispatcher = std::make_shared<KeeperStorageDispatcher>();
shared->keeper_storage_dispatcher = std::make_shared<KeeperDispatcher>();
shared->keeper_storage_dispatcher->initialize(config, getApplicationType() == ApplicationType::KEEPER);
}
#endif
}
#if USE_NURAFT
std::shared_ptr<KeeperStorageDispatcher> & Context::getKeeperStorageDispatcher() const
std::shared_ptr<KeeperDispatcher> & Context::getKeeperDispatcher() const
{
std::lock_guard lock(shared->keeper_storage_dispatcher_mutex);
if (!shared->keeper_storage_dispatcher)
@ -1677,7 +1677,7 @@ std::shared_ptr<KeeperStorageDispatcher> & Context::getKeeperStorageDispatcher()
}
#endif
void Context::shutdownKeeperStorageDispatcher() const
void Context::shutdownKeeperDispatcher() const
{
#if USE_NURAFT
std::lock_guard lock(shared->keeper_storage_dispatcher_mutex);

Some files were not shown because too many files have changed in this diff Show More