diff --git a/CMakeLists.txt b/CMakeLists.txt index 8a5a6293f7c..de517b1b589 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,12 +127,13 @@ if (USE_STATIC_LIBRARIES) list(REVERSE CMAKE_FIND_LIBRARY_SUFFIXES) endif () -# Implies ${WITH_COVERAGE} option (ENABLE_FUZZING "Fuzzy testing using libfuzzer" OFF) if (ENABLE_FUZZING) + # Also set WITH_COVERAGE=1 for better fuzzing process + # By default this is disabled, because fuzzers are built in CI with the clickhouse itself. + # And we don't want to enable coverage for it. message (STATUS "Fuzzing instrumentation enabled") - set (WITH_COVERAGE ON) set (FUZZER "libfuzzer") endif() diff --git a/README.md b/README.md index 178547ea523..496a6357f44 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,3 @@ ClickHouse® is an open-source column-oriented database management system that a * [Code Browser](https://clickhouse.tech/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation. * [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any. * You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person. - -## Upcoming Events -* [SF Bay Area ClickHouse August Community Meetup (online)](https://www.meetup.com/San-Francisco-Bay-Area-ClickHouse-Meetup/events/279109379/) on 25 August 2021. diff --git a/base/glibc-compatibility/musl/getauxval.c b/base/glibc-compatibility/musl/getauxval.c index a429273fa1a..dad7aa938d7 100644 --- a/base/glibc-compatibility/musl/getauxval.c +++ b/base/glibc-compatibility/musl/getauxval.c @@ -1,4 +1,5 @@ #include +#include "atomic.h" #include // __environ #include @@ -17,18 +18,7 @@ static size_t __find_auxv(unsigned long type) return (size_t) -1; } -__attribute__((constructor)) static void __auxv_init() -{ - size_t i; - for (i = 0; __environ[i]; i++); - __auxv = (unsigned long *) (__environ + i + 1); - - size_t secure_idx = __find_auxv(AT_SECURE); - if (secure_idx != ((size_t) -1)) - __auxv_secure = __auxv[secure_idx]; -} - -unsigned long getauxval(unsigned long type) +unsigned long __getauxval(unsigned long type) { if (type == AT_SECURE) return __auxv_secure; @@ -43,3 +33,38 @@ unsigned long getauxval(unsigned long type) errno = ENOENT; return 0; } + +static void * volatile getauxval_func; + +static unsigned long __auxv_init(unsigned long type) +{ + if (!__environ) + { + // __environ is not initialized yet so we can't initialize __auxv right now. + // That's normally occurred only when getauxval() is called from some sanitizer's internal code. + errno = ENOENT; + return 0; + } + + // Initialize __auxv and __auxv_secure. + size_t i; + for (i = 0; __environ[i]; i++); + __auxv = (unsigned long *) (__environ + i + 1); + + size_t secure_idx = __find_auxv(AT_SECURE); + if (secure_idx != ((size_t) -1)) + __auxv_secure = __auxv[secure_idx]; + + // Now we've initialized __auxv, next time getauxval() will only call __get_auxval(). + a_cas_p(&getauxval_func, (void *)__auxv_init, (void *)__getauxval); + + return __getauxval(type); +} + +// First time getauxval() will call __auxv_init(). +static void * volatile getauxval_func = (void *)__auxv_init; + +unsigned long getauxval(unsigned long type) +{ + return ((unsigned long (*)(unsigned long))getauxval_func)(type); +} diff --git a/base/mysqlxx/PoolWithFailover.cpp b/base/mysqlxx/PoolWithFailover.cpp index e317ab7f228..f449708dc46 100644 --- a/base/mysqlxx/PoolWithFailover.cpp +++ b/base/mysqlxx/PoolWithFailover.cpp @@ -80,7 +80,9 @@ PoolWithFailover::PoolWithFailover( const std::string & password, unsigned default_connections_, unsigned max_connections_, - size_t max_tries_) + size_t max_tries_, + size_t connect_timeout, + size_t rw_timeout) : max_tries(max_tries_) , shareable(false) { @@ -90,8 +92,8 @@ PoolWithFailover::PoolWithFailover( replicas_by_priority[0].emplace_back(std::make_shared(database, host, user, password, port, /* socket_ = */ "", - MYSQLXX_DEFAULT_TIMEOUT, - MYSQLXX_DEFAULT_RW_TIMEOUT, + connect_timeout, + rw_timeout, default_connections_, max_connections_)); } @@ -130,7 +132,6 @@ PoolWithFailover::Entry PoolWithFailover::get() for (size_t try_no = 0; try_no < max_tries; ++try_no) { full_pool = nullptr; - for (auto & priority_replicas : replicas_by_priority) { Replicas & replicas = priority_replicas.second; diff --git a/base/mysqlxx/PoolWithFailover.h b/base/mysqlxx/PoolWithFailover.h index 1c7a63e76c0..e3c5ebe76e1 100644 --- a/base/mysqlxx/PoolWithFailover.h +++ b/base/mysqlxx/PoolWithFailover.h @@ -117,7 +117,9 @@ namespace mysqlxx const std::string & password, unsigned default_connections_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS, unsigned max_connections_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS, - size_t max_tries_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES); + size_t max_tries_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, + size_t connect_timeout = MYSQLXX_DEFAULT_TIMEOUT, + size_t rw_timeout = MYSQLXX_DEFAULT_RW_TIMEOUT); PoolWithFailover(const PoolWithFailover & other); diff --git a/contrib/libpqxx-cmake/CMakeLists.txt b/contrib/libpqxx-cmake/CMakeLists.txt index ae35538ccf4..65fa94cb3fd 100644 --- a/contrib/libpqxx-cmake/CMakeLists.txt +++ b/contrib/libpqxx-cmake/CMakeLists.txt @@ -22,6 +22,7 @@ set (SRCS "${LIBRARY_DIR}/src/transaction.cxx" "${LIBRARY_DIR}/src/transaction_base.cxx" "${LIBRARY_DIR}/src/row.cxx" + "${LIBRARY_DIR}/src/params.cxx" "${LIBRARY_DIR}/src/util.cxx" "${LIBRARY_DIR}/src/version.cxx" ) @@ -31,6 +32,7 @@ set (SRCS # conflicts with all includes of . set (HDRS "${LIBRARY_DIR}/include/pqxx/array.hxx" + "${LIBRARY_DIR}/include/pqxx/params.hxx" "${LIBRARY_DIR}/include/pqxx/binarystring.hxx" "${LIBRARY_DIR}/include/pqxx/composite.hxx" "${LIBRARY_DIR}/include/pqxx/connection.hxx" @@ -75,4 +77,3 @@ set(CM_CONFIG_PQ "${LIBRARY_DIR}/include/pqxx/config-internal-libpq.h") configure_file("${CM_CONFIG_H_IN}" "${CM_CONFIG_INT}" @ONLY) configure_file("${CM_CONFIG_H_IN}" "${CM_CONFIG_PUB}" @ONLY) configure_file("${CM_CONFIG_H_IN}" "${CM_CONFIG_PQ}" @ONLY) - diff --git a/contrib/libunwind b/contrib/libunwind index 6b816d2fba3..c4ea9848a69 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit 6b816d2fba3991f8fd6aaec17d92f68947eab667 +Subproject commit c4ea9848a697747dfa35325af9b3452f30841685 diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index d6614bbb9e2..b9900e34bf1 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -83,6 +83,16 @@ then mv "$COMBINED_OUTPUT.tgz" /output fi +# Also build fuzzers if any sanitizer specified +if [ -n "$SANITIZER" ] +then + # Currently we are in build/build_docker directory + ../docker/packager/other/fuzzer.sh +fi + +ccache --show-config ||: +ccache --show-stats ||: + if [ "${CCACHE_DEBUG:-}" == "1" ] then find . -name '*.ccache-*' -print0 \ @@ -95,4 +105,3 @@ then # files in place, and will fail because this directory is not writable. tar -cv -I pixz -f /output/ccache.log.txz "$CCACHE_LOGFILE" fi - diff --git a/docker/packager/deb/build.sh b/docker/packager/deb/build.sh index 4e14574b738..46f6404363d 100755 --- a/docker/packager/deb/build.sh +++ b/docker/packager/deb/build.sh @@ -23,12 +23,24 @@ then echo "Place $BINARY_OUTPUT to output" mkdir /output/binary ||: # if exists mv /build/obj-*/programs/clickhouse* /output/binary + if [ "$BINARY_OUTPUT" = "tests" ] then mv /build/obj-*/src/unit_tests_dbms /output/binary fi fi +# Also build fuzzers if any sanitizer specified +if [ -n "$SANITIZER" ] +then + # Script is supposed that we are in build directory. + mkdir -p build/build_docker + cd build/build_docker + # Launching build script + ../docker/packager/other/fuzzer.sh + cd +fi + ccache --show-config ||: ccache --show-stats ||: diff --git a/docker/packager/other/fuzzer.sh b/docker/packager/other/fuzzer.sh new file mode 100755 index 00000000000..1a8b80c3f77 --- /dev/null +++ b/docker/packager/other/fuzzer.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +# This script is responsible for building all fuzzers, and copy them to output directory +# as an archive. +# Script is supposed that we are in build directory. + +set -x -e + +printenv + +# Delete previous cache, because we add a new flags -DENABLE_FUZZING=1 and -DFUZZER=libfuzzer +rm -f CMakeCache.txt +read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}" +# Hope, that the most part of files will be in cache, so we just link new executables +cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA -DCMAKE_C_COMPILER="$CC" -DCMAKE_CXX_COMPILER="$CXX" -DENABLE_CLICKHOUSE_ODBC_BRIDGE=OFF \ + -DENABLE_LIBRARIES=0 -DENABLE_SSL=1 -DUSE_INTERNAL_SSL_LIBRARY=1 -DUSE_UNWIND=ON -DENABLE_EMBEDDED_COMPILER=0 \ + -DENABLE_EXAMPLES=0 -DENABLE_UTILS=0 -DENABLE_THINLTO=0 "-DSANITIZE=$SANITIZER" \ + -DENABLE_FUZZING=1 -DFUZZER='libfuzzer' -DENABLE_TCMALLOC=0 -DENABLE_JEMALLOC=0 \ + -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" .. + +FUZZER_TARGETS=$(find ../src -name '*_fuzzer.cpp' -execdir basename {} .cpp ';' | tr '\n' ' ') + +mkdir -p /output/fuzzers +for FUZZER_TARGET in $FUZZER_TARGETS +do + # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty. + ninja $NINJA_FLAGS $FUZZER_TARGET + # Find this binary in build directory and strip it + FUZZER_PATH=$(find ./src -name "$FUZZER_TARGET") + strip --strip-unneeded "$FUZZER_PATH" + mv "$FUZZER_PATH" /output/fuzzers +done + +tar -zcvf /output/fuzzers.tar.gz /output/fuzzers +rm -rf /output/fuzzers diff --git a/docker/packager/packager b/docker/packager/packager index 95b7fcd8568..673878bce43 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -105,6 +105,9 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ if image_type == "deb" or image_type == "unbundled": result.append("DEB_CC={}".format(cc)) result.append("DEB_CXX={}".format(cxx)) + # For building fuzzers + result.append("CC={}".format(cc)) + result.append("CXX={}".format(cxx)) elif image_type == "binary": result.append("CC={}".format(cc)) result.append("CXX={}".format(cxx)) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index c4493de477c..00af261f6c8 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -396,6 +396,9 @@ function run_tests # needs s3 01944_insert_partition_by + + # depends on Go + 02013_zlib_read_after_eof ) time clickhouse-test --hung-check -j 8 --order=random --use-skip-list \ diff --git a/docker/test/fuzzer/Dockerfile b/docker/test/fuzzer/Dockerfile index 18684145636..9a96ac1dfa7 100644 --- a/docker/test/fuzzer/Dockerfile +++ b/docker/test/fuzzer/Dockerfile @@ -16,6 +16,8 @@ RUN apt-get update \ p7zip-full \ parallel \ psmisc \ + python3 \ + python3-pip \ rsync \ tree \ tzdata \ @@ -25,6 +27,8 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* +RUN pip3 install Jinja2 + COPY * / SHELL ["/bin/bash", "-c"] diff --git a/docker/test/fuzzer/generate-test-j2.py b/docker/test/fuzzer/generate-test-j2.py new file mode 100755 index 00000000000..bcc1bf6bc84 --- /dev/null +++ b/docker/test/fuzzer/generate-test-j2.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 + +from argparse import ArgumentParser +import os +import jinja2 + + +def removesuffix(text, suffix): + """ + Added in python 3.9 + https://www.python.org/dev/peps/pep-0616/ + """ + if suffix and text.endswith(suffix): + return text[:-len(suffix)] + else: + return text[:] + + +def render_test_template(j2env, suite_dir, test_name): + """ + Render template for test and reference file if needed + """ + + test_base_name = removesuffix(test_name, ".sql.j2") + + reference_file_name = test_base_name + ".reference.j2" + reference_file_path = os.path.join(suite_dir, reference_file_name) + if os.path.isfile(reference_file_path): + tpl = j2env.get_template(reference_file_name) + tpl.stream().dump(os.path.join(suite_dir, test_base_name) + ".gen.reference") + + if test_name.endswith(".sql.j2"): + tpl = j2env.get_template(test_name) + generated_test_name = test_base_name + ".gen.sql" + tpl.stream().dump(os.path.join(suite_dir, generated_test_name)) + return generated_test_name + + return test_name + + +def main(args): + suite_dir = args.path + + print(f"Scanning {suite_dir} directory...") + + j2env = jinja2.Environment( + loader=jinja2.FileSystemLoader(suite_dir), + keep_trailing_newline=True, + ) + + test_names = os.listdir(suite_dir) + for test_name in test_names: + if not test_name.endswith(".sql.j2"): + continue + new_name = render_test_template(j2env, suite_dir, test_name) + print(f"File {new_name} generated") + + +if __name__ == "__main__": + parser = ArgumentParser(description="Jinja2 test generator") + parser.add_argument("-p", "--path", help="Path to test dir", required=True) + main(parser.parse_args()) diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 44183a50ae5..9a389edc5b2 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -71,12 +71,12 @@ function watchdog kill -9 -- $fuzzer_pid ||: } -function filter_exists +function filter_exists_and_template { local path for path in "$@"; do if [ -e "$path" ]; then - echo "$path" + echo "$path" | sed -n 's/\.sql\.j2$/.gen.sql/' else echo "'$path' does not exists" >&2 fi @@ -85,11 +85,13 @@ function filter_exists function fuzz { + /generate-test-j2.py --path ch/tests/queries/0_stateless + # Obtain the list of newly added tests. They will be fuzzed in more extreme way than other tests. # Don't overwrite the NEW_TESTS_OPT so that it can be set from the environment. - NEW_TESTS="$(sed -n 's!\(^tests/queries/0_stateless/.*\.sql\)$!ch/\1!p' ci-changed-files.txt | sort -R)" + NEW_TESTS="$(sed -n 's!\(^tests/queries/0_stateless/.*\.sql\(\.j2\)\?\)$!ch/\1!p' ci-changed-files.txt | sort -R)" # ci-changed-files.txt contains also files that has been deleted/renamed, filter them out. - NEW_TESTS="$(filter_exists $NEW_TESTS)" + NEW_TESTS="$(filter_exists_and_template $NEW_TESTS)" if [[ -n "$NEW_TESTS" ]] then NEW_TESTS_OPT="${NEW_TESTS_OPT:---interleave-queries-file ${NEW_TESTS}}" diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index f5fa86a6f33..39c8a2e5358 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -24,6 +24,8 @@ RUN apt-get update -y \ python3-pip \ qemu-user-static \ sudo \ + # golang version 1.13 on Ubuntu 20 is enough for tests + golang \ telnet \ tree \ unixodbc \ diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md index 9daf6148324..803212330dd 100644 --- a/docs/en/development/contrib.md +++ b/docs/en/development/contrib.md @@ -5,6 +5,86 @@ toc_title: Third-Party Libraries Used # Third-Party Libraries Used {#third-party-libraries-used} +The list of third-party libraries: + +| Library name | License type | +|:-|:-| +| abseil-cpp | [Apache](https://github.com/ClickHouse-Extras/abseil-cpp/blob/4f3b686f86c3ebaba7e4e926e62a79cb1c659a54/LICENSE) | +| AMQP-CPP | [Apache](https://github.com/ClickHouse-Extras/AMQP-CPP/blob/1a6c51f4ac51ac56610fa95081bd2f349911375a/LICENSE) | +| arrow | [Apache](https://github.com/ClickHouse-Extras/arrow/blob/078e21bad344747b7656ef2d7a4f7410a0a303eb/LICENSE.txt) | +| avro | [Apache](https://github.com/ClickHouse-Extras/avro/blob/e43c46e87fd32eafdc09471e95344555454c5ef8/LICENSE.txt) | +| aws | [Apache](https://github.com/ClickHouse-Extras/aws-sdk-cpp/blob/7d48b2c8193679cc4516e5bd68ae4a64b94dae7d/LICENSE.txt) | +| aws-c-common | [Apache](https://github.com/ClickHouse-Extras/aws-c-common/blob/736a82d1697c108b04a277e66438a7f4e19b6857/LICENSE) | +| aws-c-event-stream | [Apache](https://github.com/ClickHouse-Extras/aws-c-event-stream/blob/3bc33662f9ccff4f4cbcf9509cc78c26e022fde0/LICENSE) | +| aws-checksums | [Apache](https://github.com/ClickHouse-Extras/aws-checksums/blob/519d6d9093819b6cf89ffff589a27ef8f83d0f65/LICENSE) | +| base64 | [BSD 2-clause](https://github.com/ClickHouse-Extras/Turbo-Base64/blob/af9b331f2b4f30b41c70f3a571ff904a8251c1d3/LICENSE) | +| boost | [Boost](https://github.com/ClickHouse-Extras/boost/blob/9cf09dbfd55a5c6202dedbdf40781a51b02c2675/LICENSE_1_0.txt) | +| boringssl | [BSD](https://github.com/ClickHouse-Extras/boringssl/blob/a6a2e2ab3e44d97ce98e51c558e989f211de7eb3/LICENSE) | +| brotli | [MIT](https://github.com/google/brotli/blob/63be8a99401992075c23e99f7c84de1c653e39e2/LICENSE) | +| capnproto | [MIT](https://github.com/capnproto/capnproto/blob/a00ccd91b3746ef2ab51d40fe3265829949d1ace/LICENSE) | +| cassandra | [Apache](https://github.com/ClickHouse-Extras/cpp-driver/blob/eb9b68dadbb4417a2c132ad4a1c2fa76e65e6fc1/LICENSE.txt) | +| cctz | [Apache](https://github.com/ClickHouse-Extras/cctz/blob/c0f1bcb97fd2782f7c3f972fadd5aad5affac4b8/LICENSE.txt) | +| cityhash102 | [MIT](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/cityhash102/COPYING) | +| cppkafka | [BSD 2-clause](https://github.com/mfontanini/cppkafka/blob/5a119f689f8a4d90d10a9635e7ee2bee5c127de1/LICENSE) | +| croaring | [Apache](https://github.com/RoaringBitmap/CRoaring/blob/2c867e9f9c9e2a3a7032791f94c4c7ae3013f6e0/LICENSE) | +| curl | [Apache](https://github.com/curl/curl/blob/3b8bbbbd1609c638a3d3d0acb148a33dedb67be3/docs/LICENSE-MIXING.md) | +| cyrus-sasl | [BSD 2-clause](https://github.com/ClickHouse-Extras/cyrus-sasl/blob/e6466edfd638cc5073debe941c53345b18a09512/COPYING) | +| double-conversion | [BSD 3-clause](https://github.com/google/double-conversion/blob/cf2f0f3d547dc73b4612028a155b80536902ba02/LICENSE) | +| dragonbox | [Apache](https://github.com/ClickHouse-Extras/dragonbox/blob/923705af6fd953aa948fc175f6020b15f7359838/LICENSE-Apache2-LLVM) | +| fast_float | [Apache](https://github.com/fastfloat/fast_float/blob/7eae925b51fd0f570ccd5c880c12e3e27a23b86f/LICENSE) | +| fastops | [MIT](https://github.com/ClickHouse-Extras/fastops/blob/88752a5e03cf34639a4a37a4b41d8b463fffd2b5/LICENSE) | +| flatbuffers | [Apache](https://github.com/ClickHouse-Extras/flatbuffers/blob/eb3f827948241ce0e701516f16cd67324802bce9/LICENSE.txt) | +| fmtlib | [Unknown](https://github.com/fmtlib/fmt/blob/c108ee1d590089ccf642fc85652b845924067af2/LICENSE.rst) | +| gcem | [Apache](https://github.com/kthohr/gcem/blob/8d4f1b5d76ea8f6ff12f3f4f34cda45424556b00/LICENSE) | +| googletest | [BSD 3-clause](https://github.com/google/googletest/blob/e7e591764baba0a0c3c9ad0014430e7a27331d16/LICENSE) | +| grpc | [Apache](https://github.com/ClickHouse-Extras/grpc/blob/60c986e15cae70aade721d26badabab1f822fdd6/LICENSE) | +| h3 | [Apache](https://github.com/ClickHouse-Extras/h3/blob/c7f46cfd71fb60e2fefc90e28abe81657deff735/LICENSE) | +| hyperscan | [Boost](https://github.com/ClickHouse-Extras/hyperscan/blob/e9f08df0213fc637aac0a5bbde9beeaeba2fe9fa/LICENSE) | +| icu | [Public Domain](https://github.com/unicode-org/icu/blob/faa2f9f9e1fe74c5ed00eba371d2830134cdbea1/icu4c/LICENSE) | +| icudata | [Public Domain](https://github.com/ClickHouse-Extras/icudata/blob/f020820388e3faafb44cc643574a2d563dfde572/LICENSE) | +| jemalloc | [BSD 2-clause](https://github.com/ClickHouse-Extras/jemalloc/blob/e6891d9746143bf2cf617493d880ba5a0b9a3efd/COPYING) | +| krb5 | [MIT](https://github.com/ClickHouse-Extras/krb5/blob/5149dea4e2be0f67707383d2682b897c14631374/src/lib/gssapi/LICENSE) | +| libc-headers | [LGPL](https://github.com/ClickHouse-Extras/libc-headers/blob/a720b7105a610acbd7427eea475a5b6810c151eb/LICENSE) | +| libcpuid | [BSD 2-clause](https://github.com/ClickHouse-Extras/libcpuid/blob/8db3b8d2d32d22437f063ce692a1b9bb15e42d18/COPYING) | +| libcxx | [Apache](https://github.com/ClickHouse-Extras/libcxx/blob/2fa892f69acbaa40f8a18c6484854a6183a34482/LICENSE.TXT) | +| libcxxabi | [Apache](https://github.com/ClickHouse-Extras/libcxxabi/blob/df8f1e727dbc9e2bedf2282096fa189dc3fe0076/LICENSE.TXT) | +| libdivide | [zLib](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) | +| libfarmhash | [MIT](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libfarmhash/COPYING) | +| libgsasl | [LGPL](https://github.com/ClickHouse-Extras/libgsasl/blob/383ee28e82f69fa16ed43b48bd9c8ee5b313ab84/LICENSE) | +| libhdfs3 | [Apache](https://github.com/ClickHouse-Extras/libhdfs3/blob/095b9d48b400abb72d967cb0539af13b1e3d90cf/LICENSE.txt) | +| libmetrohash | [Apache](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libmetrohash/LICENSE) | +| libpq | [Unknown](https://github.com/ClickHouse-Extras/libpq/blob/e071ea570f8985aa00e34f5b9d50a3cfe666327e/COPYRIGHT) | +| libpqxx | [BSD 3-clause](https://github.com/ClickHouse-Extras/libpqxx/blob/357608d11b7a1961c3fb7db2ef9a5dbb2e87da77/COPYING) | +| librdkafka | [MIT](https://github.com/ClickHouse-Extras/librdkafka/blob/b8554f1682062c85ba519eb54ef2f90e02b812cb/LICENSE.murmur2) | +| libunwind | [Apache](https://github.com/ClickHouse-Extras/libunwind/blob/6b816d2fba3991f8fd6aaec17d92f68947eab667/LICENSE.TXT) | +| libuv | [BSD](https://github.com/ClickHouse-Extras/libuv/blob/e2e9b7e9f978ce8a1367b5fe781d97d1ce9f94ab/LICENSE) | +| llvm | [Apache](https://github.com/ClickHouse-Extras/llvm/blob/e5751459412bce1391fb7a2e9bbc01e131bf72f1/llvm/LICENSE.TXT) | +| lz4 | [BSD](https://github.com/lz4/lz4/blob/f39b79fb02962a1cd880bbdecb6dffba4f754a11/LICENSE) | +| mariadb-connector-c | [LGPL](https://github.com/ClickHouse-Extras/mariadb-connector-c/blob/5f4034a3a6376416504f17186c55fe401c6d8e5e/COPYING.LIB) | +| miniselect | [Boost](https://github.com/danlark1/miniselect/blob/be0af6bd0b6eb044d1acc4f754b229972d99903a/LICENSE_1_0.txt) | +| msgpack-c | [Boost](https://github.com/msgpack/msgpack-c/blob/46684265d50b5d1b062d4c5c428ba08462844b1d/LICENSE_1_0.txt) | +| murmurhash | [Public Domain](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/murmurhash/LICENSE) | +| NuRaft | [Apache](https://github.com/ClickHouse-Extras/NuRaft/blob/7ecb16844af6a9c283ad432d85ecc2e7d1544676/LICENSE) | +| openldap | [Unknown](https://github.com/ClickHouse-Extras/openldap/blob/0208811b6043ca06fda8631a5e473df1ec515ccb/LICENSE) | +| orc | [Apache](https://github.com/ClickHouse-Extras/orc/blob/0a936f6bbdb9303308973073f8623b5a8d82eae1/LICENSE) | +| poco | [Boost](https://github.com/ClickHouse-Extras/poco/blob/7351c4691b5d401f59e3959adfc5b4fa263b32da/LICENSE) | +| protobuf | [BSD 3-clause](https://github.com/ClickHouse-Extras/protobuf/blob/75601841d172c73ae6bf4ce8121f42b875cdbabd/LICENSE) | +| rapidjson | [MIT](https://github.com/ClickHouse-Extras/rapidjson/blob/c4ef90ccdbc21d5d5a628d08316bfd301e32d6fa/bin/jsonschema/LICENSE) | +| re2 | [BSD 3-clause](https://github.com/google/re2/blob/13ebb377c6ad763ca61d12dd6f88b1126bd0b911/LICENSE) | +| replxx | [BSD 3-clause](https://github.com/ClickHouse-Extras/replxx/blob/c81be6c68b146f15f2096b7ef80e3f21fe27004c/LICENSE.md) | +| rocksdb | [BSD 3-clause](https://github.com/ClickHouse-Extras/rocksdb/blob/b6480c69bf3ab6e298e0d019a07fd4f69029b26a/LICENSE.leveldb) | +| s2geometry | [Apache](https://github.com/ClickHouse-Extras/s2geometry/blob/20ea540d81f4575a3fc0aea585aac611bcd03ede/LICENSE) | +| sentry-native | [MIT](https://github.com/ClickHouse-Extras/sentry-native/blob/94644e92f0a3ff14bd35ed902a8622a2d15f7be4/LICENSE) | +| simdjson | [Apache](https://github.com/simdjson/simdjson/blob/8df32cea3359cb30120795da6020b3b73da01d38/LICENSE) | +| snappy | [Public Domain](https://github.com/google/snappy/blob/3f194acb57e0487531c96b97af61dcbd025a78a3/COPYING) | +| sparsehash-c11 | [BSD 3-clause](https://github.com/sparsehash/sparsehash-c11/blob/cf0bffaa456f23bc4174462a789b90f8b6f5f42f/LICENSE) | +| stats | [Apache](https://github.com/kthohr/stats/blob/b6dd459c10a88c7ea04693c007e9e35820c5d9ad/LICENSE) | +| thrift | [Apache](https://github.com/apache/thrift/blob/010ccf0a0c7023fea0f6bf4e4078ebdff7e61982/LICENSE) | +| unixodbc | [LGPL](https://github.com/ClickHouse-Extras/UnixODBC/blob/b0ad30f7f6289c12b76f04bfb9d466374bb32168/COPYING) | +| xz | [Public Domain](https://github.com/xz-mirror/xz/blob/869b9d1b4edd6df07f819d360d306251f8147353/COPYING) | +| zlib-ng | [zLib](https://github.com/ClickHouse-Extras/zlib-ng/blob/6a5e93b9007782115f7f7e5235dedc81c4f1facb/LICENSE.md) | +| zstd | [BSD](https://github.com/facebook/zstd/blob/a488ba114ec17ea1054b9057c26a046fc122b3b6/LICENSE) | + The list of third-party libraries can be obtained by the following query: ``` sql @@ -13,84 +93,6 @@ SELECT library_name, license_type, license_path FROM system.licenses ORDER BY li [Example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==) -| library_name | license_type | license_path | -|:-|:-|:-| -| abseil-cpp | Apache | /contrib/abseil-cpp/LICENSE | -| AMQP-CPP | Apache | /contrib/AMQP-CPP/LICENSE | -| arrow | Apache | /contrib/arrow/LICENSE.txt | -| avro | Apache | /contrib/avro/LICENSE.txt | -| aws | Apache | /contrib/aws/LICENSE.txt | -| aws-c-common | Apache | /contrib/aws-c-common/LICENSE | -| aws-c-event-stream | Apache | /contrib/aws-c-event-stream/LICENSE | -| aws-checksums | Apache | /contrib/aws-checksums/LICENSE | -| base64 | BSD 2-clause | /contrib/base64/LICENSE | -| boost | Boost | /contrib/boost/LICENSE_1_0.txt | -| boringssl | BSD | /contrib/boringssl/LICENSE | -| brotli | MIT | /contrib/brotli/LICENSE | -| capnproto | MIT | /contrib/capnproto/LICENSE | -| cassandra | Apache | /contrib/cassandra/LICENSE.txt | -| cctz | Apache | /contrib/cctz/LICENSE.txt | -| cityhash102 | MIT | /contrib/cityhash102/COPYING | -| cppkafka | BSD 2-clause | /contrib/cppkafka/LICENSE | -| croaring | Apache | /contrib/croaring/LICENSE | -| curl | Apache | /contrib/curl/docs/LICENSE-MIXING.md | -| cyrus-sasl | BSD 2-clause | /contrib/cyrus-sasl/COPYING | -| double-conversion | BSD 3-clause | /contrib/double-conversion/LICENSE | -| dragonbox | Apache | /contrib/dragonbox/LICENSE-Apache2-LLVM | -| fast_float | Apache | /contrib/fast_float/LICENSE | -| fastops | MIT | /contrib/fastops/LICENSE | -| flatbuffers | Apache | /contrib/flatbuffers/LICENSE.txt | -| fmtlib | Unknown | /contrib/fmtlib/LICENSE.rst | -| gcem | Apache | /contrib/gcem/LICENSE | -| googletest | BSD 3-clause | /contrib/googletest/LICENSE | -| grpc | Apache | /contrib/grpc/LICENSE | -| h3 | Apache | /contrib/h3/LICENSE | -| hyperscan | Boost | /contrib/hyperscan/LICENSE | -| icu | Public Domain | /contrib/icu/icu4c/LICENSE | -| icudata | Public Domain | /contrib/icudata/LICENSE | -| jemalloc | BSD 2-clause | /contrib/jemalloc/COPYING | -| krb5 | MIT | /contrib/krb5/src/lib/gssapi/LICENSE | -| libc-headers | LGPL | /contrib/libc-headers/LICENSE | -| libcpuid | BSD 2-clause | /contrib/libcpuid/COPYING | -| libcxx | Apache | /contrib/libcxx/LICENSE.TXT | -| libcxxabi | Apache | /contrib/libcxxabi/LICENSE.TXT | -| libdivide | zLib | /contrib/libdivide/LICENSE.txt | -| libfarmhash | MIT | /contrib/libfarmhash/COPYING | -| libgsasl | LGPL | /contrib/libgsasl/LICENSE | -| libhdfs3 | Apache | /contrib/libhdfs3/LICENSE.txt | -| libmetrohash | Apache | /contrib/libmetrohash/LICENSE | -| libpq | Unknown | /contrib/libpq/COPYRIGHT | -| libpqxx | BSD 3-clause | /contrib/libpqxx/COPYING | -| librdkafka | MIT | /contrib/librdkafka/LICENSE.murmur2 | -| libunwind | Apache | /contrib/libunwind/LICENSE.TXT | -| libuv | BSD | /contrib/libuv/LICENSE | -| llvm | Apache | /contrib/llvm/llvm/LICENSE.TXT | -| lz4 | BSD | /contrib/lz4/LICENSE | -| mariadb-connector-c | LGPL | /contrib/mariadb-connector-c/COPYING.LIB | -| miniselect | Boost | /contrib/miniselect/LICENSE_1_0.txt | -| msgpack-c | Boost | /contrib/msgpack-c/LICENSE_1_0.txt | -| murmurhash | Public Domain | /contrib/murmurhash/LICENSE | -| NuRaft | Apache | /contrib/NuRaft/LICENSE | -| openldap | Unknown | /contrib/openldap/LICENSE | -| orc | Apache | /contrib/orc/LICENSE | -| poco | Boost | /contrib/poco/LICENSE | -| protobuf | BSD 3-clause | /contrib/protobuf/LICENSE | -| rapidjson | MIT | /contrib/rapidjson/bin/jsonschema/LICENSE | -| re2 | BSD 3-clause | /contrib/re2/LICENSE | -| replxx | BSD 3-clause | /contrib/replxx/LICENSE.md | -| rocksdb | BSD 3-clause | /contrib/rocksdb/LICENSE.leveldb | -| s2geometry | Apache | /contrib/s2geometry/LICENSE | -| sentry-native | MIT | /contrib/sentry-native/LICENSE | -| simdjson | Apache | /contrib/simdjson/LICENSE | -| snappy | Public Domain | /contrib/snappy/COPYING | -| sparsehash-c11 | BSD 3-clause | /contrib/sparsehash-c11/LICENSE | -| stats | Apache | /contrib/stats/LICENSE | -| thrift | Apache | /contrib/thrift/LICENSE | -| unixodbc | LGPL | /contrib/unixodbc/COPYING | -| xz | Public Domain | /contrib/xz/COPYING | -| zlib-ng | zLib | /contrib/zlib-ng/LICENSE.md | -| zstd | BSD | /contrib/zstd/LICENSE | - ## Guidelines for adding new third-party libraries and maintaining custom changes in them {#adding-third-party-libraries} 1. All external third-party code should reside in the dedicated directories under `contrib` directory of ClickHouse repo. Prefer Git submodules, when available. diff --git a/docs/en/engines/table-engines/integrations/postgresql.md b/docs/en/engines/table-engines/integrations/postgresql.md index 4c763153a36..53ab3f5088c 100644 --- a/docs/en/engines/table-engines/integrations/postgresql.md +++ b/docs/en/engines/table-engines/integrations/postgresql.md @@ -34,6 +34,7 @@ The table structure can differ from the original PostgreSQL table structure: - `user` — PostgreSQL user. - `password` — User password. - `schema` — Non-default table schema. Optional. +- `on conflict ...` — example: `ON CONFLICT DO NOTHING`. Optional. Note: adding this option will make insertion less efficient. ## Implementation Details {#implementation-details} diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 94ef48d8d72..ce41b288f0a 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -390,20 +390,27 @@ Functions with a constant argument that is less than ngram size can’t be used - `s != 1` - `NOT startsWith(s, 'test')` -### Projections {#projections} -Projections are like materialized views but defined in part-level. It provides consistency guarantees along with automatic usage in queries. +## Projections {#projections} +Projections are like [materialized views](../../../sql-reference/statements/create/view.md#materialized) but defined in part-level. It provides consistency guarantees along with automatic usage in queries. -#### Query {#projection-query} -A projection query is what defines a projection. It has the following grammar: +Projections are an experimental feature. To enable them you must set the [allow_experimental_projection_optimization](../../../operations/settings/settings.md#allow-experimental-projection-optimization) to `1`. See also the [force_optimize_projection](../../../operations/settings/settings.md#force-optimize-projection) setting. -`SELECT [GROUP BY] [ORDER BY]` +Projections are not supported in the `SELECT` statements with the [FINAL](../../../sql-reference/statements/select/from.md#select-from-final) modifier. -It implicitly selects data from the parent table. +### Projection Query {#projection-query} +A projection query is what defines a projection. It implicitly selects data from the parent table. +**Syntax** -#### Storage {#projection-storage} -Projections are stored inside the part directory. It's similar to an index but contains a subdirectory that stores an anonymous MergeTree table's part. The table is induced by the definition query of the projection. If there is a GROUP BY clause, the underlying storage engine becomes AggregatedMergeTree, and all aggregate functions are converted to AggregateFunction. If there is an ORDER BY clause, the MergeTree table will use it as its primary key expression. During the merge process, the projection part will be merged via its storage's merge routine. The checksum of the parent table's part will combine the projection's part. Other maintenance jobs are similar to skip indices. +```sql +SELECT [GROUP BY] [ORDER BY] +``` -#### Query Analysis {#projection-query-analysis} +Projections can be modified or dropped with the [ALTER](../../../sql-reference/statements/alter/projection.md) statement. + +### Projection Storage {#projection-storage} +Projections are stored inside the part directory. It's similar to an index but contains a subdirectory that stores an anonymous `MergeTree` table's part. The table is induced by the definition query of the projection. If there is a `GROUP BY` clause, the underlying storage engine becomes [AggregatingMergeTree](aggregatingmergetree.md), and all aggregate functions are converted to `AggregateFunction`. If there is an `ORDER BY` clause, the `MergeTree` table uses it as its primary key expression. During the merge process the projection part is merged via its storage's merge routine. The checksum of the parent table's part is combined with the projection's part. Other maintenance jobs are similar to skip indices. + +### Query Analysis {#projection-query-analysis} 1. Check if the projection can be used to answer the given query, that is, it generates the same answer as querying the base table. 2. Select the best feasible match, which contains the least granules to read. 3. The query pipeline which uses projections will be different from the one that uses the original parts. If the projection is absent in some parts, we can add the pipeline to "project" it on the fly. diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 8457ea41857..70b7d59b037 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -141,7 +141,7 @@ Since version 20.5, `clickhouse-client` has automatic syntax highlighting (alway Example of a config file: -``` xml +```xml username password @@ -149,4 +149,30 @@ Example of a config file: ``` -[Original article](https://clickhouse.tech/docs/en/interfaces/cli/) +### Query ID Format {#query-id-format} + +In interactive mode `clickhouse-client` shows query ID for every query. By default, the ID is formatted like this: + +```sql +Query id: 927f137d-00f1-4175-8914-0dd066365e96 +``` + +A custom format may be specified in a configuration file inside a `query_id_formats` tag. `{query_id}` placeholder in the format string is replaced with the ID of a query. Several format strings are allowed inside the tag. +This feature can be used to generate URLs to facilitate profiling of queries. + +**Example** + +```xml + + + http://speedscope-host/#profileURL=qp%3Fid%3D{query_id} + + +``` + +If the configuration above is applied, the ID of a query is shown in the following format: + +``` text +speedscope:http://speedscope-host/#profileURL=qp%3Fid%3Dc8ecc783-e753-4b38-97f1-42cddfb98b7d +``` + diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index 44f9353063c..aafad0220bd 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -7,7 +7,7 @@ toc_title: Configuration Files ClickHouse supports multi-file configuration management. The main server configuration file is `/etc/clickhouse-server/config.xml` or `/etc/clickhouse-server/config.yaml`. Other files must be in the `/etc/clickhouse-server/config.d` directory. Note, that any configuration file can be written either in XML or YAML, but mixing formats in one file is not supported. For example, you can have main configs as `config.xml` and `users.xml` and write additional files in `config.d` and `users.d` directories in `.yaml`. -All the configuration files should be in XML or YAML formats. All XML files should have the same root element, usually ``. As for YAML, `yandex:` should not be present, the parser will insert it automatically. +All XML files should have the same root element, usually ``. As for YAML, `yandex:` should not be present, the parser will insert it automatically. ## Override {#override} @@ -32,7 +32,7 @@ You can also declare attributes as coming from environment variables by using `f ## Substitution {#substitution} -The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md)). +The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md#macros)). If you want to replace an entire element with a substitution use `include` as element name. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index a0a8a220ad6..a1c7d1aab32 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3435,3 +3435,25 @@ Possible values: - 1 — The table is automatically updated in the background, when schema changes are detected. Default value: `0`. + +## allow_experimental_projection_optimization {#allow-experimental-projection-optimization} + +Enables or disables [projection](../../engines/table-engines/mergetree-family/mergetree.md#projections) optimization when processing `SELECT` queries. + +Possible values: + +- 0 — Projection optimization disabled. +- 1 — Projection optimization enabled. + +Default value: `0`. + +## force_optimize_projection {#force-optimize-projection} + +Enables or disables the obligatory use of [projections](../../engines/table-engines/mergetree-family/mergetree.md#projections) in `SELECT` queries, when projection optimization is enabled (see [allow_experimental_projection_optimization](#allow-experimental-projection-optimization) setting). + +Possible values: + +- 0 — Projection optimization is not obligatory. +- 1 — Projection optimization is obligatory. + +Default value: `0`. diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md index 6c24ffdba57..3adedd0ae70 100644 --- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md @@ -154,7 +154,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 3) FROM └──────────────────────────────────────────────────────────────────────────────────────────┘ ``` -In this case, the function couldn’t find the event chain matching the pattern, because the event for number 3 occured between 1 and 2. If in the same case we checked the condition for number 4, the sequence would match the pattern. +In this case, the function couldn’t find the event chain matching the pattern, because the event for number 3 occurred between 1 and 2. If in the same case we checked the condition for number 4, the sequence would match the pattern. ``` sql SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM t diff --git a/docs/en/sql-reference/data-types/date.md b/docs/en/sql-reference/data-types/date.md index 0cfac4d59fe..bd39de14d47 100644 --- a/docs/en/sql-reference/data-types/date.md +++ b/docs/en/sql-reference/data-types/date.md @@ -9,9 +9,9 @@ A date. Stored in two bytes as the number of days since 1970-01-01 (unsigned). A The date value is stored without the time zone. -## Examples {#examples} +**Example** -**1.** Creating a table with a `DateTime`-type column and inserting data into it: +Creating a table with a `Date`-type column and inserting data into it: ``` sql CREATE TABLE dt @@ -23,10 +23,7 @@ ENGINE = TinyLog; ``` ``` sql -INSERT INTO dt Values (1546300800, 1), ('2019-01-01', 2); -``` - -``` sql +INSERT INTO dt VALUES (1546300800, 1), ('2019-01-01', 2); SELECT * FROM dt; ``` @@ -37,11 +34,8 @@ SELECT * FROM dt; └────────────┴──────────┘ ``` -## See Also {#see-also} +**See Also** - [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) - [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) - [`DateTime` data type](../../sql-reference/data-types/datetime.md) - - -[Original article](https://clickhouse.tech/docs/en/data_types/date/) diff --git a/docs/en/sql-reference/data-types/date32.md b/docs/en/sql-reference/data-types/date32.md new file mode 100644 index 00000000000..592f952e1be --- /dev/null +++ b/docs/en/sql-reference/data-types/date32.md @@ -0,0 +1,40 @@ +--- +toc_priority: 48 +toc_title: Date32 +--- + +# Date32 {#data_type-datetime32} + +A date. Supports the date range same with [Datetime64](../../sql-reference/data-types/datetime64.md). Stored in four bytes as the number of days since 1925-01-01. Allows storing values till 2283-11-11. + +**Examples** + +Creating a table with a `Date32`-type column and inserting data into it: + +``` sql +CREATE TABLE new +( + `timestamp` Date32, + `event_id` UInt8 +) +ENGINE = TinyLog; +``` + +``` sql +INSERT INTO new VALUES (4102444800, 1), ('2100-01-01', 2); +SELECT * FROM new; +``` + +``` text +┌──timestamp─┬─event_id─┐ +│ 2100-01-01 │ 1 │ +│ 2100-01-01 │ 2 │ +└────────────┴──────────┘ +``` + +**See Also** + +- [toDate32](../../sql-reference/functions/type-conversion-functions.md#todate32) +- [toDate32OrZero](../../sql-reference/functions/type-conversion-functions.md#todate32-or-zero) +- [toDate32OrNull](../../sql-reference/functions/type-conversion-functions.md#todate32-or-null) + diff --git a/docs/en/sql-reference/data-types/datetime64.md b/docs/en/sql-reference/data-types/datetime64.md index 1d3725b9fb3..112461535f7 100644 --- a/docs/en/sql-reference/data-types/datetime64.md +++ b/docs/en/sql-reference/data-types/datetime64.md @@ -17,7 +17,7 @@ DateTime64(precision, [timezone]) Internally, stores data as a number of ‘ticks’ since epoch start (1970-01-01 00:00:00 UTC) as Int64. The tick resolution is determined by the precision parameter. Additionally, the `DateTime64` type can store time zone that is the same for the entire column, that affects how the values of the `DateTime64` type values are displayed in text format and how the values specified as strings are parsed (‘2020-01-01 05:00:01.000’). The time zone is not stored in the rows of the table (or in resultset), but is stored in the column metadata. See details in [DateTime](../../sql-reference/data-types/datetime.md). -Supported range from January 1, 1925 till December 31, 2283. +Supported range from January 1, 1925 till November 11, 2283. ## Examples {#examples} diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 577fdd668a2..cf77444b17f 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -2236,3 +2236,74 @@ defaultRoles() Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +## queryID {#query-id} + +Returns the ID of the current query. Other parameters of a query can be extracted from the [system.query_log](../../operations/system-tables/query_log.md) table via `query_id`. + +In contrast to [initialQueryID](#initial-query-id) function, `queryID` can return different results on different shards (see example). + +**Syntax** + +``` sql +queryID() +``` + +**Returned value** + +- The ID of the current query. + +Type: [String](../../sql-reference/data-types/string.md) + +**Example** + +Query: + +``` sql +CREATE TABLE tmp (str String) ENGINE = Log; +INSERT INTO tmp (*) VALUES ('a'); +SELECT count(DISTINCT t) FROM (SELECT queryID() AS t FROM remote('127.0.0.{1..3}', currentDatabase(), 'tmp') GROUP BY queryID()); +``` + +Result: + +``` text +┌─count()─┐ +│ 3 │ +└─────────┘ +``` + +## initialQueryID {#initial-query-id} + +Returns the ID of the initial current query. Other parameters of a query can be extracted from the [system.query_log](../../operations/system-tables/query_log.md) table via `initial_query_id`. + +In contrast to [queryID](#query-id) function, `initialQueryID` returns the same results on different shards (see example). + +**Syntax** + +``` sql +initialQueryID() +``` + +**Returned value** + +- The ID of the initial current query. + +Type: [String](../../sql-reference/data-types/string.md) + +**Example** + +Query: + +``` sql +CREATE TABLE tmp (str String) ENGINE = Log; +INSERT INTO tmp (*) VALUES ('a'); +SELECT count(DISTINCT t) FROM (SELECT initialQueryID() AS t FROM remote('127.0.0.{1..3}', currentDatabase(), 'tmp') GROUP BY queryID()); +``` + +Result: + +``` text +┌─count()─┐ +│ 1 │ +└─────────┘ +``` diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index ad6edaea312..05f39e2d4e6 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -152,6 +152,104 @@ Alias: `DATE`. ## toDateTimeOrNull {#todatetimeornull} +## toDate32 {#todate32} + +Converts the argument to the [Date32](../../sql-reference/data-types/date32.md) data type. If the value is outside the range returns the border values supported by `Date32`. If the argument has [Date](../../sql-reference/data-types/date.md) type, borders of `Date` are taken into account. + +**Syntax** + +``` sql +toDate32(expr) +``` + +**Arguments** + +- `expr` — The value. [String](../../sql-reference/data-types/string.md), [UInt32](../../sql-reference/data-types/int-uint.md) or [Date](../../sql-reference/data-types/date.md). + +**Returned value** + +- A calendar date. + +Type: [Date32](../../sql-reference/data-types/date32.md). + +**Example** + +1. The value is within the range: + +``` sql +SELECT toDate32('1955-01-01') AS value, toTypeName(value); +``` + +``` text +┌──────value─┬─toTypeName(toDate32('1925-01-01'))─┐ +│ 1955-01-01 │ Date32 │ +└────────────┴────────────────────────────────────┘ +``` + +2. The value is outside the range: + +``` sql +SELECT toDate32('1924-01-01') AS value, toTypeName(value); +``` + +``` text +┌──────value─┬─toTypeName(toDate32('1925-01-01'))─┐ +│ 1925-01-01 │ Date32 │ +└────────────┴────────────────────────────────────┘ +``` + +3. With `Date`-type argument: + +``` sql +SELECT toDate32(toDate('1924-01-01')) AS value, toTypeName(value); +``` + +``` text +┌──────value─┬─toTypeName(toDate32(toDate('1924-01-01')))─┐ +│ 1970-01-01 │ Date32 │ +└────────────┴────────────────────────────────────────────┘ +``` + +## toDate32OrZero {#todate32-or-zero} + +The same as [toDate32](#todate32) but returns the min value of [Date32](../../sql-reference/data-types/date32.md) if invalid argument is received. + +**Example** + +Query: + +``` sql +SELECT toDate32OrZero('1924-01-01'), toDate32OrZero(''); +``` + +Result: + +``` text +┌─toDate32OrZero('1924-01-01')─┬─toDate32OrZero('')─┐ +│ 1925-01-01 │ 1925-01-01 │ +└──────────────────────────────┴────────────────────┘ +``` + +## toDate32OrNull {#todate32-or-null} + +The same as [toDate32](#todate32) but returns `NULL` if invalid argument is received. + +**Example** + +Query: + +``` sql +SELECT toDate32OrNull('1955-01-01'), toDate32OrNull(''); +``` + +Result: + +``` text +┌─toDate32OrNull('1955-01-01')─┬─toDate32OrNull('')─┐ +│ 1955-01-01 │ ᴺᵁᴸᴸ │ +└──────────────────────────────┴────────────────────┘ +``` + ## toDecimal(32\|64\|128\|256) {#todecimal3264128256} Converts `value` to the [Decimal](../../sql-reference/data-types/decimal.md) data type with precision of `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places. diff --git a/docs/en/sql-reference/statements/alter/projection.md b/docs/en/sql-reference/statements/alter/projection.md index 07a13fc23c4..429241ebf13 100644 --- a/docs/en/sql-reference/statements/alter/projection.md +++ b/docs/en/sql-reference/statements/alter/projection.md @@ -5,7 +5,7 @@ toc_title: PROJECTION # Manipulating Projections {#manipulations-with-projections} -The following operations are available: +The following operations with [projections](../../../engines/table-engines/mergetree-family/mergetree.md#projections) are available: - `ALTER TABLE [db].name ADD PROJECTION name AS SELECT [GROUP BY] [ORDER BY]` - Adds projection description to tables metadata. @@ -15,7 +15,7 @@ The following operations are available: - `ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` - Deletes projection files from disk without removing description. -The commands ADD, DROP and CLEAR are lightweight in a sense that they only change metadata or remove files. +The commands `ADD`, `DROP` and `CLEAR` are lightweight in a sense that they only change metadata or remove files. Also, they are replicated, syncing projections metadata via ZooKeeper. diff --git a/docs/ru/development/contrib.md b/docs/ru/development/contrib.md index 33a533d7f75..72b039261fa 100644 --- a/docs/ru/development/contrib.md +++ b/docs/ru/development/contrib.md @@ -4,40 +4,95 @@ toc_title: "Используемые сторонние библиотеки" --- -# Используемые сторонние библиотеки {#ispolzuemye-storonnie-biblioteki} +# Используемые сторонние библиотеки {#third-party-libraries-used} -| Библиотека | Лицензия | -|---------------------|----------------------------------------------------------------------------------------------------------------------------------------------| -| base64 | [BSD 2-Clause License](https://github.com/aklomp/base64/blob/a27c565d1b6c676beaf297fe503c4518185666f7/LICENSE) | -| boost | [Boost Software License 1.0](https://github.com/ClickHouse-Extras/boost-extra/blob/6883b40449f378019aec792f9983ce3afc7ff16e/LICENSE_1_0.txt) | -| brotli | [MIT](https://github.com/google/brotli/blob/master/LICENSE) | -| capnproto | [MIT](https://github.com/capnproto/capnproto/blob/master/LICENSE) | -| cctz | [Apache License 2.0](https://github.com/google/cctz/blob/4f9776a310f4952454636363def82c2bf6641d5f/LICENSE.txt) | -| double-conversion | [BSD 3-Clause License](https://github.com/google/double-conversion/blob/cf2f0f3d547dc73b4612028a155b80536902ba02/LICENSE) | -| FastMemcpy | [MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libmemcpy/impl/LICENSE) | -| googletest | [BSD 3-Clause License](https://github.com/google/googletest/blob/master/LICENSE) | -| h3 | [Apache License 2.0](https://github.com/uber/h3/blob/master/LICENSE) | -| hyperscan | [BSD 3-Clause License](https://github.com/intel/hyperscan/blob/master/LICENSE) | -| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) | -| libdivide | [Zlib License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) | -| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) | -| libhdfs3 | [Apache License 2.0](https://github.com/ClickHouse-Extras/libhdfs3/blob/bd6505cbb0c130b0db695305b9a38546fa880e5a/LICENSE.txt) | -| libmetrohash | [Apache License 2.0](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libmetrohash/LICENSE) | -| libpcg-random | [Apache License 2.0](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libpcg-random/LICENSE-APACHE.txt) | -| libressl | [OpenSSL License](https://github.com/ClickHouse-Extras/ssl/blob/master/COPYING) | -| librdkafka | [BSD 2-Clause License](https://github.com/edenhill/librdkafka/blob/363dcad5a23dc29381cc626620e68ae418b3af19/LICENSE) | -| libwidechar_width | [CC0 1.0 Universal](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libwidechar_width/LICENSE) | -| llvm | [BSD 3-Clause License](https://github.com/ClickHouse-Extras/llvm/blob/163def217817c90fb982a6daf384744d8472b92b/llvm/LICENSE.TXT) | -| lz4 | [BSD 2-Clause License](https://github.com/lz4/lz4/blob/c10863b98e1503af90616ae99725ecd120265dfb/LICENSE) | -| mariadb-connector-c | [LGPL v2.1](https://github.com/ClickHouse-Extras/mariadb-connector-c/blob/3.1/COPYING.LIB) | -| murmurhash | [Public Domain](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/murmurhash/LICENSE) | -| pdqsort | [Zlib License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/pdqsort/license.txt) | -| poco | [Boost Software License - Version 1.0](https://github.com/ClickHouse-Extras/poco/blob/fe5505e56c27b6ecb0dcbc40c49dc2caf4e9637f/LICENSE) | -| protobuf | [BSD 3-Clause License](https://github.com/ClickHouse-Extras/protobuf/blob/12735370922a35f03999afff478e1c6d7aa917a4/LICENSE) | -| re2 | [BSD 3-Clause License](https://github.com/google/re2/blob/7cf8b88e8f70f97fd4926b56aa87e7f53b2717e0/LICENSE) | -| UnixODBC | [LGPL v2.1](https://github.com/ClickHouse-Extras/UnixODBC/tree/b0ad30f7f6289c12b76f04bfb9d466374bb32168) | -| zlib-ng | [Zlib License](https://github.com/ClickHouse-Extras/zlib-ng/blob/develop/LICENSE.md) | -| zstd | [BSD 3-Clause License](https://github.com/facebook/zstd/blob/dev/LICENSE) | +Список сторонних библиотек: + +| Библиотека | Тип лицензии | +|:-|:-| +| abseil-cpp | [Apache](https://github.com/ClickHouse-Extras/abseil-cpp/blob/4f3b686f86c3ebaba7e4e926e62a79cb1c659a54/LICENSE) | +| AMQP-CPP | [Apache](https://github.com/ClickHouse-Extras/AMQP-CPP/blob/1a6c51f4ac51ac56610fa95081bd2f349911375a/LICENSE) | +| arrow | [Apache](https://github.com/ClickHouse-Extras/arrow/blob/078e21bad344747b7656ef2d7a4f7410a0a303eb/LICENSE.txt) | +| avro | [Apache](https://github.com/ClickHouse-Extras/avro/blob/e43c46e87fd32eafdc09471e95344555454c5ef8/LICENSE.txt) | +| aws | [Apache](https://github.com/ClickHouse-Extras/aws-sdk-cpp/blob/7d48b2c8193679cc4516e5bd68ae4a64b94dae7d/LICENSE.txt) | +| aws-c-common | [Apache](https://github.com/ClickHouse-Extras/aws-c-common/blob/736a82d1697c108b04a277e66438a7f4e19b6857/LICENSE) | +| aws-c-event-stream | [Apache](https://github.com/ClickHouse-Extras/aws-c-event-stream/blob/3bc33662f9ccff4f4cbcf9509cc78c26e022fde0/LICENSE) | +| aws-checksums | [Apache](https://github.com/ClickHouse-Extras/aws-checksums/blob/519d6d9093819b6cf89ffff589a27ef8f83d0f65/LICENSE) | +| base64 | [BSD 2-clause](https://github.com/ClickHouse-Extras/Turbo-Base64/blob/af9b331f2b4f30b41c70f3a571ff904a8251c1d3/LICENSE) | +| boost | [Boost](https://github.com/ClickHouse-Extras/boost/blob/9cf09dbfd55a5c6202dedbdf40781a51b02c2675/LICENSE_1_0.txt) | +| boringssl | [BSD](https://github.com/ClickHouse-Extras/boringssl/blob/a6a2e2ab3e44d97ce98e51c558e989f211de7eb3/LICENSE) | +| brotli | [MIT](https://github.com/google/brotli/blob/63be8a99401992075c23e99f7c84de1c653e39e2/LICENSE) | +| capnproto | [MIT](https://github.com/capnproto/capnproto/blob/a00ccd91b3746ef2ab51d40fe3265829949d1ace/LICENSE) | +| cassandra | [Apache](https://github.com/ClickHouse-Extras/cpp-driver/blob/eb9b68dadbb4417a2c132ad4a1c2fa76e65e6fc1/LICENSE.txt) | +| cctz | [Apache](https://github.com/ClickHouse-Extras/cctz/blob/c0f1bcb97fd2782f7c3f972fadd5aad5affac4b8/LICENSE.txt) | +| cityhash102 | [MIT](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/cityhash102/COPYING) | +| cppkafka | [BSD 2-clause](https://github.com/mfontanini/cppkafka/blob/5a119f689f8a4d90d10a9635e7ee2bee5c127de1/LICENSE) | +| croaring | [Apache](https://github.com/RoaringBitmap/CRoaring/blob/2c867e9f9c9e2a3a7032791f94c4c7ae3013f6e0/LICENSE) | +| curl | [Apache](https://github.com/curl/curl/blob/3b8bbbbd1609c638a3d3d0acb148a33dedb67be3/docs/LICENSE-MIXING.md) | +| cyrus-sasl | [BSD 2-clause](https://github.com/ClickHouse-Extras/cyrus-sasl/blob/e6466edfd638cc5073debe941c53345b18a09512/COPYING) | +| double-conversion | [BSD 3-clause](https://github.com/google/double-conversion/blob/cf2f0f3d547dc73b4612028a155b80536902ba02/LICENSE) | +| dragonbox | [Apache](https://github.com/ClickHouse-Extras/dragonbox/blob/923705af6fd953aa948fc175f6020b15f7359838/LICENSE-Apache2-LLVM) | +| fast_float | [Apache](https://github.com/fastfloat/fast_float/blob/7eae925b51fd0f570ccd5c880c12e3e27a23b86f/LICENSE) | +| fastops | [MIT](https://github.com/ClickHouse-Extras/fastops/blob/88752a5e03cf34639a4a37a4b41d8b463fffd2b5/LICENSE) | +| flatbuffers | [Apache](https://github.com/ClickHouse-Extras/flatbuffers/blob/eb3f827948241ce0e701516f16cd67324802bce9/LICENSE.txt) | +| fmtlib | [Unknown](https://github.com/fmtlib/fmt/blob/c108ee1d590089ccf642fc85652b845924067af2/LICENSE.rst) | +| gcem | [Apache](https://github.com/kthohr/gcem/blob/8d4f1b5d76ea8f6ff12f3f4f34cda45424556b00/LICENSE) | +| googletest | [BSD 3-clause](https://github.com/google/googletest/blob/e7e591764baba0a0c3c9ad0014430e7a27331d16/LICENSE) | +| grpc | [Apache](https://github.com/ClickHouse-Extras/grpc/blob/60c986e15cae70aade721d26badabab1f822fdd6/LICENSE) | +| h3 | [Apache](https://github.com/ClickHouse-Extras/h3/blob/c7f46cfd71fb60e2fefc90e28abe81657deff735/LICENSE) | +| hyperscan | [Boost](https://github.com/ClickHouse-Extras/hyperscan/blob/e9f08df0213fc637aac0a5bbde9beeaeba2fe9fa/LICENSE) | +| icu | [Public Domain](https://github.com/unicode-org/icu/blob/faa2f9f9e1fe74c5ed00eba371d2830134cdbea1/icu4c/LICENSE) | +| icudata | [Public Domain](https://github.com/ClickHouse-Extras/icudata/blob/f020820388e3faafb44cc643574a2d563dfde572/LICENSE) | +| jemalloc | [BSD 2-clause](https://github.com/ClickHouse-Extras/jemalloc/blob/e6891d9746143bf2cf617493d880ba5a0b9a3efd/COPYING) | +| krb5 | [MIT](https://github.com/ClickHouse-Extras/krb5/blob/5149dea4e2be0f67707383d2682b897c14631374/src/lib/gssapi/LICENSE) | +| libc-headers | [LGPL](https://github.com/ClickHouse-Extras/libc-headers/blob/a720b7105a610acbd7427eea475a5b6810c151eb/LICENSE) | +| libcpuid | [BSD 2-clause](https://github.com/ClickHouse-Extras/libcpuid/blob/8db3b8d2d32d22437f063ce692a1b9bb15e42d18/COPYING) | +| libcxx | [Apache](https://github.com/ClickHouse-Extras/libcxx/blob/2fa892f69acbaa40f8a18c6484854a6183a34482/LICENSE.TXT) | +| libcxxabi | [Apache](https://github.com/ClickHouse-Extras/libcxxabi/blob/df8f1e727dbc9e2bedf2282096fa189dc3fe0076/LICENSE.TXT) | +| libdivide | [zLib](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) | +| libfarmhash | [MIT](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libfarmhash/COPYING) | +| libgsasl | [LGPL](https://github.com/ClickHouse-Extras/libgsasl/blob/383ee28e82f69fa16ed43b48bd9c8ee5b313ab84/LICENSE) | +| libhdfs3 | [Apache](https://github.com/ClickHouse-Extras/libhdfs3/blob/095b9d48b400abb72d967cb0539af13b1e3d90cf/LICENSE.txt) | +| libmetrohash | [Apache](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libmetrohash/LICENSE) | +| libpq | [Unknown](https://github.com/ClickHouse-Extras/libpq/blob/e071ea570f8985aa00e34f5b9d50a3cfe666327e/COPYRIGHT) | +| libpqxx | [BSD 3-clause](https://github.com/ClickHouse-Extras/libpqxx/blob/357608d11b7a1961c3fb7db2ef9a5dbb2e87da77/COPYING) | +| librdkafka | [MIT](https://github.com/ClickHouse-Extras/librdkafka/blob/b8554f1682062c85ba519eb54ef2f90e02b812cb/LICENSE.murmur2) | +| libunwind | [Apache](https://github.com/ClickHouse-Extras/libunwind/blob/6b816d2fba3991f8fd6aaec17d92f68947eab667/LICENSE.TXT) | +| libuv | [BSD](https://github.com/ClickHouse-Extras/libuv/blob/e2e9b7e9f978ce8a1367b5fe781d97d1ce9f94ab/LICENSE) | +| llvm | [Apache](https://github.com/ClickHouse-Extras/llvm/blob/e5751459412bce1391fb7a2e9bbc01e131bf72f1/llvm/LICENSE.TXT) | +| lz4 | [BSD](https://github.com/lz4/lz4/blob/f39b79fb02962a1cd880bbdecb6dffba4f754a11/LICENSE) | +| mariadb-connector-c | [LGPL](https://github.com/ClickHouse-Extras/mariadb-connector-c/blob/5f4034a3a6376416504f17186c55fe401c6d8e5e/COPYING.LIB) | +| miniselect | [Boost](https://github.com/danlark1/miniselect/blob/be0af6bd0b6eb044d1acc4f754b229972d99903a/LICENSE_1_0.txt) | +| msgpack-c | [Boost](https://github.com/msgpack/msgpack-c/blob/46684265d50b5d1b062d4c5c428ba08462844b1d/LICENSE_1_0.txt) | +| murmurhash | [Public Domain](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/murmurhash/LICENSE) | +| NuRaft | [Apache](https://github.com/ClickHouse-Extras/NuRaft/blob/7ecb16844af6a9c283ad432d85ecc2e7d1544676/LICENSE) | +| openldap | [Unknown](https://github.com/ClickHouse-Extras/openldap/blob/0208811b6043ca06fda8631a5e473df1ec515ccb/LICENSE) | +| orc | [Apache](https://github.com/ClickHouse-Extras/orc/blob/0a936f6bbdb9303308973073f8623b5a8d82eae1/LICENSE) | +| poco | [Boost](https://github.com/ClickHouse-Extras/poco/blob/7351c4691b5d401f59e3959adfc5b4fa263b32da/LICENSE) | +| protobuf | [BSD 3-clause](https://github.com/ClickHouse-Extras/protobuf/blob/75601841d172c73ae6bf4ce8121f42b875cdbabd/LICENSE) | +| rapidjson | [MIT](https://github.com/ClickHouse-Extras/rapidjson/blob/c4ef90ccdbc21d5d5a628d08316bfd301e32d6fa/bin/jsonschema/LICENSE) | +| re2 | [BSD 3-clause](https://github.com/google/re2/blob/13ebb377c6ad763ca61d12dd6f88b1126bd0b911/LICENSE) | +| replxx | [BSD 3-clause](https://github.com/ClickHouse-Extras/replxx/blob/c81be6c68b146f15f2096b7ef80e3f21fe27004c/LICENSE.md) | +| rocksdb | [BSD 3-clause](https://github.com/ClickHouse-Extras/rocksdb/blob/b6480c69bf3ab6e298e0d019a07fd4f69029b26a/LICENSE.leveldb) | +| s2geometry | [Apache](https://github.com/ClickHouse-Extras/s2geometry/blob/20ea540d81f4575a3fc0aea585aac611bcd03ede/LICENSE) | +| sentry-native | [MIT](https://github.com/ClickHouse-Extras/sentry-native/blob/94644e92f0a3ff14bd35ed902a8622a2d15f7be4/LICENSE) | +| simdjson | [Apache](https://github.com/simdjson/simdjson/blob/8df32cea3359cb30120795da6020b3b73da01d38/LICENSE) | +| snappy | [Public Domain](https://github.com/google/snappy/blob/3f194acb57e0487531c96b97af61dcbd025a78a3/COPYING) | +| sparsehash-c11 | [BSD 3-clause](https://github.com/sparsehash/sparsehash-c11/blob/cf0bffaa456f23bc4174462a789b90f8b6f5f42f/LICENSE) | +| stats | [Apache](https://github.com/kthohr/stats/blob/b6dd459c10a88c7ea04693c007e9e35820c5d9ad/LICENSE) | +| thrift | [Apache](https://github.com/apache/thrift/blob/010ccf0a0c7023fea0f6bf4e4078ebdff7e61982/LICENSE) | +| unixodbc | [LGPL](https://github.com/ClickHouse-Extras/UnixODBC/blob/b0ad30f7f6289c12b76f04bfb9d466374bb32168/COPYING) | +| xz | [Public Domain](https://github.com/xz-mirror/xz/blob/869b9d1b4edd6df07f819d360d306251f8147353/COPYING) | +| zlib-ng | [zLib](https://github.com/ClickHouse-Extras/zlib-ng/blob/6a5e93b9007782115f7f7e5235dedc81c4f1facb/LICENSE.md) | +| zstd | [BSD](https://github.com/facebook/zstd/blob/a488ba114ec17ea1054b9057c26a046fc122b3b6/LICENSE) | + +Список всех сторонних библиотек можно получить с помощью запроса: + +``` sql +SELECT library_name, license_type, license_path FROM system.licenses ORDER BY library_name COLLATE 'en'; +``` + +[Пример](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==) ## Рекомендации по добавлению сторонних библиотек и поддержанию в них пользовательских изменений {#adding-third-party-libraries} diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index 7e517be2d66..e8152441101 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -377,23 +377,33 @@ INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARIT - `s != 1` - `NOT startsWith(s, 'test')` -### Проекции {#projections} -Проекции похожи на материализованные представления, но определяются на уровне партов. Это обеспечивает гарантии согласованности наряду с автоматическим использованием в запросах. +## Проекции {#projections} +Проекции похожи на [материализованные представления](../../../sql-reference/statements/create/view.md#materialized), но определяются на уровне кусков данных. Это обеспечивает гарантии согласованности данных наряду с автоматическим использованием в запросах. -#### Запрос {#projection-query} -Запрос проекции — это то, что определяет проекцию. Он имеет следующую грамматику: +Проекции — это экспериментальная возможность. Чтобы включить поддержку проекций, установите настройку [allow_experimental_projection_optimization](../../../operations/settings/settings.md#allow-experimental-projection-optimization) в значение `1`. См. также настройку [force_optimize_projection ](../../../operations/settings/settings.md#force-optimize-projection). -`SELECT [GROUP BY] [ORDER BY]` +Проекции не поддерживаются для запросов `SELECT` с модификатором [FINAL](../../../sql-reference/statements/select/from.md#select-from-final). -Он неявно выбирает данные из родительской таблицы. +### Запрос проекции {#projection-query} +Запрос проекции — это то, что определяет проекцию. Такой запрос неявно выбирает данные из родительской таблицы. +**Синтаксис** -#### Хранение {#projection-storage} -Проекции хранятся в каталоге парта. Это похоже на хранение индексов, но используется подкаталог, в котором хранится анонимный парт таблицы MergeTree. Таблица создается запросом определения проекции. Если есть конструкция GROUP BY, то базовый механизм хранения становится AggregatedMergeTree, а все агрегатные функции преобразуются в AggregateFunction. Если есть конструкция ORDER BY, таблица MergeTree будет использовать его в качестве выражения первичного ключа. Во время процесса слияния парт проекции будет слит с помощью процедуры слияния ее хранилища. Контрольная сумма парта родительской таблицы будет включать парт проекции. Другие процедуры аналогичны индексам пропуска данных. +```sql +SELECT [GROUP BY] [ORDER BY] +``` -#### Анализ запросов {#projection-query-analysis} -1. Проверить, можно ли использовать проекцию в данном запросе, то есть, что с ней выходит тот же результат, что и с запросом к базовой таблице. -2. Выбрать наиболее подходящее совпадение, содержащее наименьшее количество гранул для чтения. -3. План запроса, который использует проекции, будет отличаться от того, который использует исходные парты. При отсутствии проекции в некоторых партах можно расширить план, чтобы «проецировать» на лету. +Проекции можно изменить или удалить с помощью запроса [ALTER](../../../sql-reference/statements/alter/projection.md). + +### Хранение проекции {#projection-storage} +Проекции хранятся в каталоге куска данных. Это похоже на хранение индексов, но используется подкаталог, в котором хранится анонимный кусок таблицы `MergeTree`. Таблица создается запросом определения проекции. +Если присутствует секция `GROUP BY`, то используется движок [AggregatingMergeTree](aggregatingmergetree.md), а все агрегатные функции преобразуются в `AggregateFunction`. +Если присутствует секция `ORDER BY`, таблица `MergeTree` использует ее в качестве выражения для первичного ключа. +Во время процесса слияния кусок данных проекции объединяется с помощью процедуры слияния хранилища. Контрольная сумма куска данных родительской таблицы включает кусок данных проекции. Другие процедуры аналогичны индексам пропуска данных. + +### Анализ запросов {#projection-query-analysis} +1. Проверьте, можно ли использовать проекцию в данном запросе, то есть, что с ней получается тот же результат, что и с запросом к базовой таблице. +2. Выберите наиболее подходящее совпадение, содержащее наименьшее количество гранул для чтения. +3. План запроса, который использует проекции, отличается от того, который использует исходные куски данных. Если в некоторых кусках проекции отсутствуют, можно расширить план, чтобы «проецировать» на лету. ## Конкурентный доступ к данным {#concurrent-data-access} diff --git a/docs/ru/interfaces/cli.md b/docs/ru/interfaces/cli.md index 277b73a6d36..bbb66b70371 100644 --- a/docs/ru/interfaces/cli.md +++ b/docs/ru/interfaces/cli.md @@ -26,7 +26,7 @@ Connected to ClickHouse server version 20.13.1 revision 54442. Клиент может быть использован в интерактивном и не интерактивном (batch) режиме. Чтобы использовать batch режим, укажите параметр query, или отправьте данные в stdin (проверяется, что stdin - не терминал), или и то, и другое. -Аналогично HTTP интерфейсу, при использовании одновременно параметра query и отправке данных в stdin, запрос составляется из конкатенации параметра query, перевода строки, и данных в stdin. Это удобно для больших INSERT запросов. +Аналогично HTTP интерфейсу, при использовании одновременно параметра query и отправке данных в stdin, запрос составляется из конкатенации параметра query, перевода строки и данных в stdin. Это удобно для больших `INSERT` запросов. Примеры использования клиента для вставки данных: @@ -41,17 +41,17 @@ _EOF $ cat file.csv | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV"; ``` -В batch режиме в качестве формата данных по умолчанию используется формат TabSeparated. Формат может быть указан в секции FORMAT запроса. +В batch режиме в качестве формата данных по умолчанию используется формат `TabSeparated`. Формат может быть указан в запросе в секции `FORMAT`. -По умолчанию, в batch режиме вы можете выполнить только один запрос. Чтобы выполнить несколько запросов из «скрипта», используйте параметр –multiquery. Это работает для всех запросов кроме INSERT. Результаты запросов выводятся подряд без дополнительных разделителей. -Также, при необходимости выполнить много запросов, вы можете запускать clickhouse-client на каждый запрос. Заметим, что запуск программы clickhouse-client может занимать десятки миллисекунд. +По умолчанию в batch режиме вы можете выполнить только один запрос. Чтобы выполнить несколько запросов из «скрипта», используйте параметр `–-multiquery`. Это работает для всех запросов кроме `INSERT`. Результаты запросов выводятся подряд без дополнительных разделителей. +Если нужно выполнить много запросов, вы можете запускать clickhouse-client отдельно на каждый запрос. Заметим, что запуск программы clickhouse-client может занимать десятки миллисекунд. -В интерактивном режиме, вы получите командную строку, в которую можно вводить запросы. +В интерактивном режиме вы получаете командную строку, в которую можно вводить запросы. Если не указано multiline (по умолчанию): -Чтобы выполнить запрос, нажмите Enter. Точка с запятой на конце запроса не обязательна. Чтобы ввести запрос, состоящий из нескольких строк, перед переводом строки, введите символ обратного слеша: `\` - тогда после нажатия Enter, вам предложат ввести следующую строку запроса. +Чтобы выполнить запрос, нажмите Enter. Точка с запятой на конце запроса необязательна. Чтобы ввести запрос, состоящий из нескольких строк, в конце строки поставьте символ обратного слеша `\`, тогда после нажатия Enter вы сможете ввести следующую строку запроса. -Если указано multiline (многострочный режим): +Если указан параметр `--multiline` (многострочный режим): Чтобы выполнить запрос, завершите его точкой с запятой и нажмите Enter. Если в конце введённой строки не было точки с запятой, то вам предложат ввести следующую строчку запроса. Исполняется только один запрос, поэтому всё, что введено после точки с запятой, игнорируется. @@ -61,20 +61,20 @@ $ cat file.csv | clickhouse-client --database=test --query="INSERT INTO test FOR Командная строка сделана на основе readline (и history) (или libedit, или без какой-либо библиотеки, в зависимости от сборки) - то есть, в ней работают привычные сочетания клавиш, а также присутствует история. История пишется в `~/.clickhouse-client-history`. -По умолчанию, в качестве формата, используется формат PrettyCompact (красивые таблички). Вы можете изменить формат с помощью секции FORMAT запроса, или с помощью указания `\G` на конце запроса, с помощью аргумента командной строки `--format` или `--vertical`, или с помощью конфигурационного файла клиента. +По умолчанию используется формат вывода `PrettyCompact` (он поддерживает красивый вывод таблиц). Вы можете изменить формат вывода результатов запроса следующими способами: с помощью секции `FORMAT` в запросе, указав символ `\G` в конце запроса, используя аргументы командной строки `--format` или `--vertical` или с помощью конфигурационного файла клиента. -Чтобы выйти из клиента, нажмите Ctrl+D, или наберите вместо запроса одно из: «exit», «quit», «logout», «учше», «йгше», «дщпщге», «exit;», «quit;», «logout;», «учшеж», «йгшеж», «дщпщгеж», «q», «й», «q», «Q», «:q», «й», «Й», «Жй» +Чтобы выйти из клиента, нажмите Ctrl+D или наберите вместо запроса одно из: «exit», «quit», «logout», «учше», «йгше», «дщпщге», «exit;», «quit;», «logout;», «учшеж», «йгшеж», «дщпщгеж», «q», «й», «q», «Q», «:q», «й», «Й», «Жй». -При выполнении запроса, клиент показывает: +При выполнении запроса клиент показывает: -1. Прогресс выполнение запроса, который обновляется не чаще, чем 10 раз в секунду (по умолчанию). При быстрых запросах, прогресс может не успеть отобразиться. +1. Прогресс выполнение запроса, который обновляется не чаще, чем 10 раз в секунду (по умолчанию). При быстрых запросах прогресс может не успеть отобразиться. 2. Отформатированный запрос после его парсинга - для отладки. 3. Результат в заданном формате. 4. Количество строк результата, прошедшее время, а также среднюю скорость выполнения запроса. -Вы можете прервать длинный запрос, нажав Ctrl+C. При этом вам всё равно придётся чуть-чуть подождать, пока сервер остановит запрос. На некоторых стадиях выполнения, запрос невозможно прервать. Если вы не дождётесь и нажмёте Ctrl+C второй раз, то клиент будет завершён. +Вы можете прервать длинный запрос, нажав Ctrl+C. При этом вам всё равно придётся чуть-чуть подождать, пока сервер остановит запрос. На некоторых стадиях выполнения запрос невозможно прервать. Если вы не дождётесь и нажмёте Ctrl+C второй раз, то клиент будет завершён. -Клиент командной строки позволяет передать внешние данные (внешние временные таблицы) для использования запроса. Подробнее смотрите раздел «Внешние данные для обработки запроса» +Клиент командной строки позволяет передать внешние данные (внешние временные таблицы) для выполнения запроса. Подробнее смотрите раздел «Внешние данные для обработки запроса». ### Запросы с параметрами {#cli-queries-with-parameters} @@ -84,7 +84,7 @@ $ cat file.csv | clickhouse-client --database=test --query="INSERT INTO test FOR clickhouse-client --param_parName="[1, 2]" -q "SELECT * FROM table WHERE a = {parName:Array(UInt16)}" ``` -#### Cинтаксис запроса {#cli-queries-with-parameters-syntax} +#### Синтаксис запроса {#cli-queries-with-parameters-syntax} Отформатируйте запрос обычным способом. Представьте значения, которые вы хотите передать из параметров приложения в запрос в следующем формате: @@ -155,3 +155,29 @@ $ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="numbe ``` +### Формат ID запроса {#query-id-format} + +В интерактивном режиме `clickhouse-client` показывает ID для каждого запроса. По умолчанию ID выводится в таком виде: + +```sql +Query id: 927f137d-00f1-4175-8914-0dd066365e96 +``` + +Произвольный формат ID можно задать в конфигурационном файле внутри тега `query_id_formats`. ID подставляется вместо `{query_id}` в строке формата. В теге может быть перечислено несколько строк формата. +Эта возможность может быть полезна для генерации URL, с помощью которых выполняется профилирование запросов. + +**Пример** + +```xml + + + http://speedscope-host/#profileURL=qp%3Fid%3D{query_id} + + +``` + +Если применить приведённую выше конфигурацию, то ID запроса будет выводиться в следующем виде: + +``` text +speedscope:http://speedscope-host/#profileURL=qp%3Fid%3Dc8ecc783-e753-4b38-97f1-42cddfb98b7d +``` diff --git a/docs/ru/operations/configuration-files.md b/docs/ru/operations/configuration-files.md index 8b4b0da8f2b..2d39aa20375 100644 --- a/docs/ru/operations/configuration-files.md +++ b/docs/ru/operations/configuration-files.md @@ -6,19 +6,51 @@ toc_title: "Конфигурационные файлы" # Конфигурационные файлы {#configuration_files} -Основной конфигурационный файл сервера - `config.xml` или `config.yaml`. Он расположен в директории `/etc/clickhouse-server/`. +ClickHouse поддерживает многофайловое управление конфигурацией. Основной конфигурационный файл сервера — `/etc/clickhouse-server/config.xml` или `/etc/clickhouse-server/config.yaml`. Остальные файлы должны находиться в директории `/etc/clickhouse-server/config.d`. Обратите внимание, что конфигурационные файлы могут быть записаны в форматах XML или YAML, но смешение этих форматов в одном файле не поддерживается. Например, можно хранить основные конфигурационные файлы как `config.xml` и `users.xml`, а дополнительные файлы записать в директории `config.d` и `users.d` в формате `.yaml`. -Отдельные настройки могут быть переопределены в файлах `*.xml` и `*.conf`, а также `.yaml` (для файлов в формате YAML) из директории `config.d` рядом с конфигом. +Все XML файлы должны иметь одинаковый корневой элемент, обычно ``. Для YAML элемент `yandex:` должен отсутствовать, так как парсер вставляет его автоматически. -У элементов этих конфигурационных файлов могут быть указаны атрибуты `replace` или `remove`. +## Переопределение {#override} -Если ни один не указан - объединить содержимое элементов рекурсивно с заменой значений совпадающих детей. +Некоторые настройки, определенные в основном конфигурационном файле, могут быть переопределены в других файлах: -Если указано `replace` - заменить весь элемент на указанный. +- У элементов этих конфигурационных файлов могут быть указаны атрибуты `replace` или `remove`. +- Если ни один атрибут не указан, сервер объединит содержимое элементов рекурсивно, заменяя совпадающие значения дочерних элементов. +- Если указан атрибут `replace`, сервер заменит весь элемент на указанный. +- Если указан атрибут `remove`, сервер удалит элемент. -Если указано `remove` - удалить элемент. +Также возможно указать атрибуты как переменные среды с помощью `from_env="VARIABLE_NAME"`: -Также в конфиге могут быть указаны «подстановки». Если у элемента присутствует атрибут `incl`, то в качестве значения будет использована соответствующая подстановка из файла. По умолчанию, путь к файлу с подстановками - `/etc/metrika.xml`. Он может быть изменён в конфигурации сервера в элементе [include_from](server-configuration-parameters/settings.md#server_configuration_parameters-include_from). Значения подстановок указываются в элементах `/yandex/имя_подстановки` этого файла. Если подстановка, заданная в `incl` отсутствует, то в лог попадает соответствующая запись. Чтобы ClickHouse не писал в лог об отсутствии подстановки, необходимо указать атрибут `optional="true"` (например, настройка [macros](server-configuration-parameters/settings.md)). +```xml + + + + + + + +``` + +## Подстановки {#substitution} + +В конфигурационном файле могут быть указаны «подстановки». Если у элемента присутствует атрибут `incl`, то в качестве значения будет использована соответствующая подстановка из файла. По умолчанию путь к файлу с подстановками - `/etc/metrika.xml`. Он может быть изменён в конфигурации сервера в элементе [include_from](server-configuration-parameters/settings.md#server_configuration_parameters-include_from). Значения подстановок указываются в элементах `/yandex/имя_подстановки` этого файла. Если подстановка, заданная в `incl`, отсутствует, то делается соответствующая запись в лог. Чтобы ClickHouse фиксировал в логе отсутствие подстановки, необходимо указать атрибут `optional="true"` (например, настройки для [macros](server-configuration-parameters/settings.md#macros)). + +Если нужно заменить весь элемент подстановкой, можно использовать `include` как имя элемента. + +Пример подстановки XML: + +```xml + + + + + + + + + + +``` Подстановки могут также выполняться из ZooKeeper. Для этого укажите у элемента атрибут `from_zk = "/path/to/node"`. Значение элемента заменится на содержимое узла `/path/to/node` в ZooKeeper. В ZooKeeper-узел также можно положить целое XML-поддерево, оно будет целиком вставлено в исходный элемент. @@ -115,3 +147,9 @@ seq: 123 abc ``` + +## Детали реализации {#implementation-details} + +При старте сервера для каждого конфигурационного файла создаются файлы предобработки `file-preprocessed.xml`. Они содержат все выполненные подстановки и переопределения (эти сведения записываются просто для информации). Если в конфигурационном файле настроены подстановки ZooKeeper, но при старте сервера ZooKeeper не доступен, то сервер загружает конфигурацию из соответствующего файла предобработки. + +Сервер отслеживает как изменения в конфигурационных файлах, так и файлы и узы ZooKeeper, которые были использованы при выполнении подстановок и переопределений, и на ходу перезагружает настройки для пользователей и кластеров. Это означает, что можно изменять кластеры, пользователей и их настройки без перезапуска сервера. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 31cd3a15c86..aac9c30658c 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -3252,3 +3252,25 @@ SETTINGS index_granularity = 8192 │ - 1 — таблица обновляется автоматически в фоновом режиме при обнаружении изменений схемы. Значение по умолчанию: `0`. + +## allow_experimental_projection_optimization {#allow-experimental-projection-optimization} + +Включает или отключает поддержку [проекций](../../engines/table-engines/mergetree-family/mergetree.md#projections) при обработке запросов `SELECT`. + +Возможные значения: + +- 0 — Проекции не поддерживаются. +- 1 — Проекции поддерживаются. + +Значение по умолчанию: `0`. + +## force_optimize_projection {#force-optimize-projection} + +Включает или отключает обязательное использование [проекций](../../engines/table-engines/mergetree-family/mergetree.md#projections) в запросах `SELECT`, если поддержка проекций включена (см. настройку [allow_experimental_projection_optimization](#allow-experimental-projection-optimization)). + +Возможные значения: + +- 0 — Проекции используются опционально. +- 1 — Проекции обязательно используются. + +Значение по умолчанию: `0`. \ No newline at end of file diff --git a/docs/ru/sql-reference/data-types/date.md b/docs/ru/sql-reference/data-types/date.md index 50508de96a3..17b4ec99d9a 100644 --- a/docs/ru/sql-reference/data-types/date.md +++ b/docs/ru/sql-reference/data-types/date.md @@ -9,9 +9,9 @@ toc_title: Date Дата хранится без учёта часового пояса. -## Примеры {#examples} +**Пример** -**1.** Создание таблицы и добавление в неё данных: +Создание таблицы и добавление в неё данных: ``` sql CREATE TABLE dt @@ -24,9 +24,6 @@ ENGINE = TinyLog; ``` sql INSERT INTO dt Values (1546300800, 1), ('2019-01-01', 2); -``` - -``` sql SELECT * FROM dt; ``` @@ -37,7 +34,7 @@ SELECT * FROM dt; └────────────┴──────────┘ ``` -## Смотрите также {#see-also} +**См. также** - [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md) - [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime) diff --git a/docs/ru/sql-reference/data-types/date32.md b/docs/ru/sql-reference/data-types/date32.md new file mode 100644 index 00000000000..a335eba8e80 --- /dev/null +++ b/docs/ru/sql-reference/data-types/date32.md @@ -0,0 +1,40 @@ +--- +toc_priority: 48 +toc_title: Date32 +--- + +# Date32 {#data_type-datetime32} + +Дата. Поддерживается такой же диапазон дат, как для типа [Datetime64](../../sql-reference/data-types/datetime64.md). Значение хранится в четырех байтах и соответствует числу дней с 1925-01-01 по 2283-11-11. + +**Пример** + +Создание таблицы со столбцом типа `Date32`и добавление в нее данных: + +``` sql +CREATE TABLE new +( + `timestamp` Date32, + `event_id` UInt8 +) +ENGINE = TinyLog; +``` + +``` sql +INSERT INTO new VALUES (4102444800, 1), ('2100-01-01', 2); +SELECT * FROM new; +``` + +``` text +┌──timestamp─┬─event_id─┐ +│ 2100-01-01 │ 1 │ +│ 2100-01-01 │ 2 │ +└────────────┴──────────┘ +``` + +**См. также** + +- [toDate32](../../sql-reference/functions/type-conversion-functions.md#todate32) +- [toDate32OrZero](../../sql-reference/functions/type-conversion-functions.md#todate32-or-zero) +- [toDate32OrNull](../../sql-reference/functions/type-conversion-functions.md#todate32-or-null) + diff --git a/docs/ru/sql-reference/data-types/datetime64.md b/docs/ru/sql-reference/data-types/datetime64.md index 3a08da75bb7..73daada3af3 100644 --- a/docs/ru/sql-reference/data-types/datetime64.md +++ b/docs/ru/sql-reference/data-types/datetime64.md @@ -17,7 +17,7 @@ DateTime64(precision, [timezone]) Данные хранятся в виде количества ‘тиков’, прошедших с момента начала эпохи (1970-01-01 00:00:00 UTC), в Int64. Размер тика определяется параметром precision. Дополнительно, тип `DateTime64` позволяет хранить часовой пояс, единый для всей колонки, который влияет на то, как будут отображаться значения типа `DateTime64` в текстовом виде и как будут парситься значения заданные в виде строк (‘2020-01-01 05:00:01.000’). Часовой пояс не хранится в строках таблицы (выборки), а хранится в метаданных колонки. Подробнее см. [DateTime](datetime.md). -Поддерживаются значения от 1 января 1925 г. и до 31 декабря 2283 г. +Поддерживаются значения от 1 января 1925 г. и до 11 ноября 2283 г. ## Примеры {#examples} diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index c023786b788..5aae0eee9f9 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -2185,3 +2185,75 @@ defaultRoles() - Список ролей по умолчанию. Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). + +## queryID {#query-id} + +Возвращает идентификатор текущего запроса. Другие параметры запроса могут быть извлечены из системной таблицы [system.query_log](../../operations/system-tables/query_log.md) через `query_id`. + +В отличие от [initialQueryID](#initial-query-id), функция `queryID` может возвращать различные значения для разных шардов (см. пример). + +**Синтаксис** + +``` sql +queryID() +``` + +**Возвращаемое значение** + +- Идентификатор текущего запроса. + +Тип: [String](../../sql-reference/data-types/string.md) + +**Пример** + +Запрос: + +``` sql +CREATE TABLE tmp (str String) ENGINE = Log; +INSERT INTO tmp (*) VALUES ('a'); +SELECT count(DISTINCT t) FROM (SELECT queryID() AS t FROM remote('127.0.0.{1..3}', currentDatabase(), 'tmp') GROUP BY queryID()); +``` + +Результат: + +``` text +┌─count()─┐ +│ 3 │ +└─────────┘ +``` + +## initialQueryID {#initial-query-id} + +Возвращает идентификатор родительского запроса. Другие параметры запроса могут быть извлечены из системной таблицы [system.query_log](../../operations/system-tables/query_log.md) через `initial_query_id`. + +В отличие от [queryID](#query-id), функция `initialQueryID` возвращает одинаковые значения для разных шардов (см. пример). + +**Синтаксис** + +``` sql +initialQueryID() +``` + +**Возвращаемое значение** + +- Идентификатор родительского запроса. + +Тип: [String](../../sql-reference/data-types/string.md) + +**Пример** + +Запрос: + +``` sql +CREATE TABLE tmp (str String) ENGINE = Log; +INSERT INTO tmp (*) VALUES ('a'); +SELECT count(DISTINCT t) FROM (SELECT initialQueryID() AS t FROM remote('127.0.0.{1..3}', currentDatabase(), 'tmp') GROUP BY queryID()); +``` + +Результат: + +``` text +┌─count()─┐ +│ 1 │ +└─────────┘ +``` diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index 757afca9588..16639386b67 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -152,6 +152,104 @@ Cиноним: `DATE`. ## toDateTimeOrNull {#todatetimeornull} +## toDate32 {#todate32} + +Конвертирует аргумент в значение типа [Date32](../../sql-reference/data-types/date32.md). Если значение выходит за границы диапазона, возвращается пограничное значение `Date32`. Если аргумент имеет тип [Date](../../sql-reference/data-types/date.md), учитываются границы типа `Date`. + +**Синтаксис** + +``` sql +toDate32(value) +``` + +**Аргументы** + +- `value` — Значение даты. [String](../../sql-reference/data-types/string.md), [UInt32](../../sql-reference/data-types/int-uint.md) или [Date](../../sql-reference/data-types/date.md). + +**Возвращаемое значение** + +- Календарная дата. + +Тип: [Date32](../../sql-reference/data-types/date32.md). + +**Пример** + +1. Значение находится в границах диапазона: + +``` sql +SELECT toDate32('1955-01-01') AS value, toTypeName(value); +``` + +``` text +┌──────value─┬─toTypeName(toDate32('1925-01-01'))─┐ +│ 1955-01-01 │ Date32 │ +└────────────┴────────────────────────────────────┘ +``` + +2. Значение выходит за границы диапазона: + +``` sql +SELECT toDate32('1924-01-01') AS value, toTypeName(value); +``` + +``` text +┌──────value─┬─toTypeName(toDate32('1925-01-01'))─┐ +│ 1925-01-01 │ Date32 │ +└────────────┴────────────────────────────────────┘ +``` + +3. С аргументом типа `Date`: + +``` sql +SELECT toDate32(toDate('1924-01-01')) AS value, toTypeName(value); +``` + +``` text +┌──────value─┬─toTypeName(toDate32(toDate('1924-01-01')))─┐ +│ 1970-01-01 │ Date32 │ +└────────────┴────────────────────────────────────────────┘ +``` + +## toDate32OrZero {#todate32-or-zero} + +То же самое, что и [toDate32](#todate32), но возвращает минимальное значение типа [Date32](../../sql-reference/data-types/date32.md), если получен недопустимый аргумент. + +**Пример** + +Запрос: + +``` sql +SELECT toDate32OrZero('1924-01-01'), toDate32OrZero(''); +``` + +Результат: + +``` text +┌─toDate32OrZero('1924-01-01')─┬─toDate32OrZero('')─┐ +│ 1925-01-01 │ 1925-01-01 │ +└──────────────────────────────┴────────────────────┘ +``` + +## toDate32OrNull {#todate32-or-null} + +То же самое, что и [toDate32](#todate32), но возвращает `NULL`, если получен недопустимый аргумент. + +**Пример** + +Запрос: + +``` sql +SELECT toDate32OrNull('1955-01-01'), toDate32OrNull(''); +``` + +Результат: + +``` text +┌─toDate32OrNull('1955-01-01')─┬─toDate32OrNull('')─┐ +│ 1955-01-01 │ ᴺᵁᴸᴸ │ +└──────────────────────────────┴────────────────────┘ +``` + ## toDecimal(32\|64\|128\|256) {#todecimal3264128} Преобразует `value` к типу данных [Decimal](../../sql-reference/functions/type-conversion-functions.md) с точностью `S`. `value` может быть числом или строкой. Параметр `S` (scale) задаёт число десятичных знаков. diff --git a/docs/ru/sql-reference/statements/alter/projection.md b/docs/ru/sql-reference/statements/alter/projection.md index db116963aa6..4b0d7f7865b 100644 --- a/docs/ru/sql-reference/statements/alter/projection.md +++ b/docs/ru/sql-reference/statements/alter/projection.md @@ -5,7 +5,7 @@ toc_title: PROJECTION # Манипуляции с проекциями {#manipulations-with-projections} -Доступны следующие операции: +Доступны следующие операции с [проекциями](../../../engines/table-engines/mergetree-family/mergetree.md#projections): - `ALTER TABLE [db].name ADD PROJECTION name AS SELECT [GROUP BY] [ORDER BY]` — добавляет описание проекции в метаданные. @@ -15,7 +15,7 @@ toc_title: PROJECTION - `ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` — удаляет файлы проекции с диска без удаления описания. -Комманды ADD, DROP и CLEAR — легковесны, поскольку они только меняют метаданные или удаляют файлы. +Команды `ADD`, `DROP` и `CLEAR` — легковесны, поскольку они только меняют метаданные или удаляют файлы. Также команды реплицируются, синхронизируя описания проекций в метаданных с помощью ZooKeeper. diff --git a/docs/zh/getting-started/example-datasets/star-schema.md b/docs/zh/getting-started/example-datasets/star-schema.md index fcb6e90c694..563b7922238 100644 --- a/docs/zh/getting-started/example-datasets/star-schema.md +++ b/docs/zh/getting-started/example-datasets/star-schema.md @@ -7,7 +7,7 @@ toc_title: Star Schema Benchmark 编译 dbgen: -``` bash +```bash $ git clone git@github.com:vadimtk/ssb-dbgen.git $ cd ssb-dbgen $ make @@ -16,9 +16,9 @@ $ make 开始生成数据: !!! warning "注意" - 使用`-s 100`dbgen将生成6亿行数据(67GB), 如果使用`-s 1000`它会生成60亿行数据(这需要很多时间)) +使用`-s 100`dbgen 将生成 6 亿行数据(67GB), 如果使用`-s 1000`它会生成 60 亿行数据(这需要很多时间)) -``` bash +```bash $ ./dbgen -s 1000 -T c $ ./dbgen -s 1000 -T l $ ./dbgen -s 1000 -T p @@ -26,9 +26,9 @@ $ ./dbgen -s 1000 -T s $ ./dbgen -s 1000 -T d ``` -在ClickHouse中创建数据表: +在 ClickHouse 中创建数据表: -``` sql +```sql CREATE TABLE customer ( C_CUSTKEY UInt32, @@ -93,7 +93,7 @@ ENGINE = MergeTree ORDER BY S_SUPPKEY; 写入数据: -``` bash +```bash $ clickhouse-client --query "INSERT INTO customer FORMAT CSV" < customer.tbl $ clickhouse-client --query "INSERT INTO part FORMAT CSV" < part.tbl $ clickhouse-client --query "INSERT INTO supplier FORMAT CSV" < supplier.tbl @@ -102,100 +102,267 @@ $ clickhouse-client --query "INSERT INTO lineorder FORMAT CSV" < lineorder.tbl 将`star schema`转换为`flat schema`: -``` sql -SET max_memory_usage = 20000000000, allow_experimental_multiple_joins_emulation = 1; +```sql +SET max_memory_usage = 20000000000; CREATE TABLE lineorder_flat ENGINE = MergeTree PARTITION BY toYear(LO_ORDERDATE) ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS -SELECT l.*, c.*, s.*, p.* -FROM lineorder l - ANY INNER JOIN customer c ON (c.C_CUSTKEY = l.LO_CUSTKEY) - ANY INNER JOIN supplier s ON (s.S_SUPPKEY = l.LO_SUPPKEY) - ANY INNER JOIN part p ON (p.P_PARTKEY = l.LO_PARTKEY); - -ALTER TABLE lineorder_flat DROP COLUMN C_CUSTKEY, DROP COLUMN S_SUPPKEY, DROP COLUMN P_PARTKEY; +SELECT + l.LO_ORDERKEY AS LO_ORDERKEY, + l.LO_LINENUMBER AS LO_LINENUMBER, + l.LO_CUSTKEY AS LO_CUSTKEY, + l.LO_PARTKEY AS LO_PARTKEY, + l.LO_SUPPKEY AS LO_SUPPKEY, + l.LO_ORDERDATE AS LO_ORDERDATE, + l.LO_ORDERPRIORITY AS LO_ORDERPRIORITY, + l.LO_SHIPPRIORITY AS LO_SHIPPRIORITY, + l.LO_QUANTITY AS LO_QUANTITY, + l.LO_EXTENDEDPRICE AS LO_EXTENDEDPRICE, + l.LO_ORDTOTALPRICE AS LO_ORDTOTALPRICE, + l.LO_DISCOUNT AS LO_DISCOUNT, + l.LO_REVENUE AS LO_REVENUE, + l.LO_SUPPLYCOST AS LO_SUPPLYCOST, + l.LO_TAX AS LO_TAX, + l.LO_COMMITDATE AS LO_COMMITDATE, + l.LO_SHIPMODE AS LO_SHIPMODE, + c.C_NAME AS C_NAME, + c.C_ADDRESS AS C_ADDRESS, + c.C_CITY AS C_CITY, + c.C_NATION AS C_NATION, + c.C_REGION AS C_REGION, + c.C_PHONE AS C_PHONE, + c.C_MKTSEGMENT AS C_MKTSEGMENT, + s.S_NAME AS S_NAME, + s.S_ADDRESS AS S_ADDRESS, + s.S_CITY AS S_CITY, + s.S_NATION AS S_NATION, + s.S_REGION AS S_REGION, + s.S_PHONE AS S_PHONE, + p.P_NAME AS P_NAME, + p.P_MFGR AS P_MFGR, + p.P_CATEGORY AS P_CATEGORY, + p.P_BRAND AS P_BRAND, + p.P_COLOR AS P_COLOR, + p.P_TYPE AS P_TYPE, + p.P_SIZE AS P_SIZE, + p.P_CONTAINER AS P_CONTAINER +FROM lineorder AS l +INNER JOIN customer AS c ON c.C_CUSTKEY = l.LO_CUSTKEY +INNER JOIN supplier AS s ON s.S_SUPPKEY = l.LO_SUPPKEY +INNER JOIN part AS p ON p.P_PARTKEY = l.LO_PARTKEY; ``` 运行查询: Q1.1 -``` sql -SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat WHERE toYear(LO_ORDERDATE) = 1993 AND LO_DISCOUNT BETWEEN 1 AND 3 AND LO_QUANTITY < 25; +```sql +SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue +FROM lineorder_flat +WHERE toYear(LO_ORDERDATE) = 1993 AND LO_DISCOUNT BETWEEN 1 AND 3 AND LO_QUANTITY < 25; ``` Q1.2 -``` sql -SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat WHERE toYYYYMM(LO_ORDERDATE) = 199401 AND LO_DISCOUNT BETWEEN 4 AND 6 AND LO_QUANTITY BETWEEN 26 AND 35; +```sql +SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue +FROM lineorder_flat +WHERE toYYYYMM(LO_ORDERDATE) = 199401 AND LO_DISCOUNT BETWEEN 4 AND 6 AND LO_QUANTITY BETWEEN 26 AND 35; ``` Q1.3 -``` sql -SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat WHERE toISOWeek(LO_ORDERDATE) = 6 AND toYear(LO_ORDERDATE) = 1994 AND LO_DISCOUNT BETWEEN 5 AND 7 AND LO_QUANTITY BETWEEN 26 AND 35; +```sql +SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue +FROM lineorder_flat +WHERE toISOWeek(LO_ORDERDATE) = 6 AND toYear(LO_ORDERDATE) = 1994 + AND LO_DISCOUNT BETWEEN 5 AND 7 AND LO_QUANTITY BETWEEN 26 AND 35; ``` Q2.1 -``` sql -SELECT sum(LO_REVENUE), toYear(LO_ORDERDATE) AS year, P_BRAND FROM lineorder_flat WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA' GROUP BY year, P_BRAND ORDER BY year, P_BRAND; +```sql +SELECT + sum(LO_REVENUE), + toYear(LO_ORDERDATE) AS year, + P_BRAND +FROM lineorder_flat +WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA' +GROUP BY + year, + P_BRAND +ORDER BY + year, + P_BRAND; ``` Q2.2 -``` sql -SELECT sum(LO_REVENUE), toYear(LO_ORDERDATE) AS year, P_BRAND FROM lineorder_flat WHERE P_BRAND BETWEEN 'MFGR#2221' AND 'MFGR#2228' AND S_REGION = 'ASIA' GROUP BY year, P_BRAND ORDER BY year, P_BRAND; +```sql +SELECT + sum(LO_REVENUE), + toYear(LO_ORDERDATE) AS year, + P_BRAND +FROM lineorder_flat +WHERE P_BRAND >= 'MFGR#2221' AND P_BRAND <= 'MFGR#2228' AND S_REGION = 'ASIA' +GROUP BY + year, + P_BRAND +ORDER BY + year, + P_BRAND; ``` Q2.3 -``` sql -SELECT sum(LO_REVENUE), toYear(LO_ORDERDATE) AS year, P_BRAND FROM lineorder_flat WHERE P_BRAND = 'MFGR#2239' AND S_REGION = 'EUROPE' GROUP BY year, P_BRAND ORDER BY year, P_BRAND; +```sql +SELECT + sum(LO_REVENUE), + toYear(LO_ORDERDATE) AS year, + P_BRAND +FROM lineorder_flat +WHERE P_BRAND = 'MFGR#2239' AND S_REGION = 'EUROPE' +GROUP BY + year, + P_BRAND +ORDER BY + year, + P_BRAND; ``` Q3.1 -``` sql -SELECT C_NATION, S_NATION, toYear(LO_ORDERDATE) AS year, sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE C_REGION = 'ASIA' AND S_REGION = 'ASIA' AND year >= 1992 AND year <= 1997 GROUP BY C_NATION, S_NATION, year ORDER BY year asc, revenue desc; +```sql +SELECT + C_NATION, + S_NATION, + toYear(LO_ORDERDATE) AS year, + sum(LO_REVENUE) AS revenue +FROM lineorder_flat +WHERE C_REGION = 'ASIA' AND S_REGION = 'ASIA' AND year >= 1992 AND year <= 1997 +GROUP BY + C_NATION, + S_NATION, + year +ORDER BY + year ASC, + revenue DESC; ``` Q3.2 -``` sql -SELECT C_CITY, S_CITY, toYear(LO_ORDERDATE) AS year, sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE C_NATION = 'UNITED STATES' AND S_NATION = 'UNITED STATES' AND year >= 1992 AND year <= 1997 GROUP BY C_CITY, S_CITY, year ORDER BY year asc, revenue desc; +```sql +SELECT + C_CITY, + S_CITY, + toYear(LO_ORDERDATE) AS year, + sum(LO_REVENUE) AS revenue +FROM lineorder_flat +WHERE C_NATION = 'UNITED STATES' AND S_NATION = 'UNITED STATES' AND year >= 1992 AND year <= 1997 +GROUP BY + C_CITY, + S_CITY, + year +ORDER BY + year ASC, + revenue DESC; ``` Q3.3 -``` sql -SELECT C_CITY, S_CITY, toYear(LO_ORDERDATE) AS year, sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE (C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5') AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5') AND year >= 1992 AND year <= 1997 GROUP BY C_CITY, S_CITY, year ORDER BY year asc, revenue desc; +```sql +SELECT + C_CITY, + S_CITY, + toYear(LO_ORDERDATE) AS year, + sum(LO_REVENUE) AS revenue +FROM lineorder_flat +WHERE (C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5') AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5') AND year >= 1992 AND year <= 1997 +GROUP BY + C_CITY, + S_CITY, + year +ORDER BY + year ASC, + revenue DESC; ``` Q3.4 -``` sql -SELECT C_CITY, S_CITY, toYear(LO_ORDERDATE) AS year, sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE (C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5') AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5') AND toYYYYMM(LO_ORDERDATE) = '199712' GROUP BY C_CITY, S_CITY, year ORDER BY year asc, revenue desc; +```sql +SELECT + C_CITY, + S_CITY, + toYear(LO_ORDERDATE) AS year, + sum(LO_REVENUE) AS revenue +FROM lineorder_flat +WHERE (C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5') AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5') AND toYYYYMM(LO_ORDERDATE) = 199712 +GROUP BY + C_CITY, + S_CITY, + year +ORDER BY + year ASC, + revenue DESC; ``` Q4.1 -``` sql -SELECT toYear(LO_ORDERDATE) AS year, C_NATION, sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') GROUP BY year, C_NATION ORDER BY year, C_NATION; +```sql +SELECT + toYear(LO_ORDERDATE) AS year, + C_NATION, + sum(LO_REVENUE - LO_SUPPLYCOST) AS profit +FROM lineorder_flat +WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') +GROUP BY + year, + C_NATION +ORDER BY + year ASC, + C_NATION ASC; ``` Q4.2 -``` sql -SELECT toYear(LO_ORDERDATE) AS year, S_NATION, P_CATEGORY, sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (year = 1997 OR year = 1998) AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') GROUP BY year, S_NATION, P_CATEGORY ORDER BY year, S_NATION, P_CATEGORY; +```sql +SELECT + toYear(LO_ORDERDATE) AS year, + S_NATION, + P_CATEGORY, + sum(LO_REVENUE - LO_SUPPLYCOST) AS profit +FROM lineorder_flat +WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (year = 1997 OR year = 1998) AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') +GROUP BY + year, + S_NATION, + P_CATEGORY +ORDER BY + year ASC, + S_NATION ASC, + P_CATEGORY ASC; ``` Q4.3 -``` sql -SELECT toYear(LO_ORDERDATE) AS year, S_CITY, P_BRAND, sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE S_NATION = 'UNITED STATES' AND (year = 1997 OR year = 1998) AND P_CATEGORY = 'MFGR#14' GROUP BY year, S_CITY, P_BRAND ORDER BY year, S_CITY, P_BRAND; +```sql +SELECT + toYear(LO_ORDERDATE) AS year, + S_CITY, + P_BRAND, + sum(LO_REVENUE - LO_SUPPLYCOST) AS profit +FROM lineorder_flat +WHERE S_NATION = 'UNITED STATES' AND (year = 1997 OR year = 1998) AND P_CATEGORY = 'MFGR#14' +GROUP BY + year, + S_CITY, + P_BRAND +ORDER BY + year ASC, + S_CITY ASC, + P_BRAND ASC; ``` [原始文章](https://clickhouse.tech/docs/en/getting_started/example_datasets/star_schema/) diff --git a/docs/zh/getting-started/tutorial.md b/docs/zh/getting-started/tutorial.md index 4b1427cad2a..902064936f5 100644 --- a/docs/zh/getting-started/tutorial.md +++ b/docs/zh/getting-started/tutorial.md @@ -520,7 +520,7 @@ WHERE (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403) AND (domain(StartU ClickHouse集群是一个同质集群。 设置步骤: 1. 在群集的所有机器上安装ClickHouse服务端 -2. 在配置文件中设置群集配置 +2. 在配置文件中设置集群配置 3. 在每个实例上创建本地表 4. 创建一个[分布式表](../engines/table-engines/special/distributed.md) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 4d01a523853..759feffb90e 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -300,9 +300,9 @@ int Keeper::main(const std::vector & /*args*/) if (config().has("keeper_server.storage_path")) path = config().getString("keeper_server.storage_path"); else if (config().has("keeper_server.log_storage_path")) - path = config().getString("keeper_server.log_storage_path"); + path = std::filesystem::path(config().getString("keeper_server.log_storage_path")).parent_path(); else if (config().has("keeper_server.snapshot_storage_path")) - path = config().getString("keeper_server.snapshot_storage_path"); + path = std::filesystem::path(config().getString("keeper_server.snapshot_storage_path")).parent_path(); else path = std::filesystem::path{KEEPER_DEFAULT_PATH}; @@ -359,7 +359,7 @@ int Keeper::main(const std::vector & /*args*/) auto servers = std::make_shared>(); /// Initialize test keeper RAFT. Do nothing if no nu_keeper_server in config. - global_context->initializeKeeperStorageDispatcher(); + global_context->initializeKeeperDispatcher(); for (const auto & listen_host : listen_hosts) { /// TCP Keeper @@ -428,7 +428,7 @@ int Keeper::main(const std::vector & /*args*/) else LOG_INFO(log, "Closed connections to Keeper."); - global_context->shutdownKeeperStorageDispatcher(); + global_context->shutdownKeeperDispatcher(); /// Wait server pool to avoid use-after-free of destroyed context in the handlers server_pool.joinAll(); diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index bdcb95119f0..2b1b6185321 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -270,6 +271,9 @@ try /// Load global settings from default_profile and system_profile. global_context->setDefaultProfiles(config()); + /// We load temporary database first, because projections need it. + DatabaseCatalog::instance().initializeAndLoadTemporaryDatabase(); + /** Init dummy default DB * NOTE: We force using isolated default database to avoid conflicts with default database from server environment * Otherwise, metadata of temporary File(format, EXPLICIT_PATH) tables will pollute metadata/ directory; @@ -287,6 +291,12 @@ try /// Lock path directory before read status.emplace(path + "status", StatusFile::write_full_info); + fs::create_directories(fs::path(path) / "user_defined/"); + LOG_DEBUG(log, "Loading user defined objects from {}", path); + Poco::File(path + "user_defined/").createDirectories(); + UserDefinedObjectsLoader::instance().loadObjects(global_context); + LOG_DEBUG(log, "Loaded user defined objects."); + LOG_DEBUG(log, "Loading metadata from {}", path); fs::create_directories(fs::path(path) / "data/"); fs::create_directories(fs::path(path) / "metadata/"); diff --git a/programs/odbc-bridge/ODBCBlockOutputStream.cpp b/programs/odbc-bridge/ODBCBlockOutputStream.cpp index b4b514d1473..8a4387c2389 100644 --- a/programs/odbc-bridge/ODBCBlockOutputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockOutputStream.cpp @@ -5,40 +5,16 @@ #include #include #include -#include -#include -#include #include "getIdentifierQuote.h" #include #include #include +#include namespace DB { -namespace -{ - using ValueType = ExternalResultDescription::ValueType; - - std::string getInsertQuery(const std::string & db_name, const std::string & table_name, const ColumnsWithTypeAndName & columns, IdentifierQuotingStyle quoting) - { - ASTInsertQuery query; - query.table_id.database_name = db_name; - query.table_id.table_name = table_name; - query.columns = std::make_shared(','); - query.children.push_back(query.columns); - for (const auto & column : columns) - query.columns->children.emplace_back(std::make_shared(column.name)); - - WriteBufferFromOwnString buf; - IAST::FormatSettings settings(buf, true); - settings.always_quote_identifiers = true; - settings.identifier_quoting_style = quoting; - query.IAST::format(settings); - return buf.str(); - } -} ODBCBlockOutputStream::ODBCBlockOutputStream(nanodbc::ConnectionHolderPtr connection_holder_, const std::string & remote_database_name_, diff --git a/programs/odbc-bridge/ODBCBlockOutputStream.h b/programs/odbc-bridge/ODBCBlockOutputStream.h index 1b42119e490..16a1602d3cd 100644 --- a/programs/odbc-bridge/ODBCBlockOutputStream.h +++ b/programs/odbc-bridge/ODBCBlockOutputStream.h @@ -13,6 +13,7 @@ namespace DB class ODBCBlockOutputStream : public IBlockOutputStream { +using ValueType = ExternalResultDescription::ValueType; public: ODBCBlockOutputStream( diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 8fe3bee90b7..ddbc4c4e433 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -53,6 +53,7 @@ #include #include #include +#include #include #include #include @@ -774,6 +775,7 @@ if (ThreadFuzzer::instance().isEffective()) { fs::create_directories(path / "data/"); fs::create_directories(path / "metadata/"); + fs::create_directories(path / "user_defined/"); /// Directory with metadata of tables, which was marked as dropped by Atomic database fs::create_directories(path / "metadata_dropped/"); @@ -994,7 +996,7 @@ if (ThreadFuzzer::instance().isEffective()) { #if USE_NURAFT /// Initialize test keeper RAFT. Do nothing if no nu_keeper_server in config. - global_context->initializeKeeperStorageDispatcher(); + global_context->initializeKeeperDispatcher(); for (const auto & listen_host : listen_hosts) { /// TCP Keeper @@ -1077,7 +1079,7 @@ if (ThreadFuzzer::instance().isEffective()) else LOG_INFO(log, "Closed connections to servers for tables."); - global_context->shutdownKeeperStorageDispatcher(); + global_context->shutdownKeeperDispatcher(); } /// Wait server pool to avoid use-after-free of destroyed context in the handlers @@ -1098,19 +1100,31 @@ if (ThreadFuzzer::instance().isEffective()) /// system logs may copy global context. global_context->setCurrentDatabaseNameInGlobalContext(default_database); + LOG_INFO(log, "Loading user defined objects from {}", path_str); + try + { + UserDefinedObjectsLoader::instance().loadObjects(global_context); + } + catch (...) + { + tryLogCurrentException(log, "Caught exception while loading user defined objects"); + throw; + } + LOG_DEBUG(log, "Loaded user defined objects"); + LOG_INFO(log, "Loading metadata from {}", path_str); try { + auto & database_catalog = DatabaseCatalog::instance(); + /// We load temporary database first, because projections need it. + database_catalog.initializeAndLoadTemporaryDatabase(); loadMetadataSystem(global_context); /// After attaching system databases we can initialize system log. global_context->initializeSystemLogs(); global_context->setSystemZooKeeperLogAfterInitializationIfNeeded(); - auto & database_catalog = DatabaseCatalog::instance(); /// After the system database is created, attach virtual system tables (in addition to query_log and part_log) attachSystemTablesServer(*database_catalog.getSystemDatabase(), has_zookeeper); - /// We load temporary database first, because projections need it. - database_catalog.initializeAndLoadTemporaryDatabase(); /// Then, load remaining databases loadMetadata(global_context, default_database); database_catalog.loadDatabases(); diff --git a/release b/release index de549595d43..6e6970d7b00 100755 --- a/release +++ b/release @@ -60,9 +60,6 @@ then elif [[ "$SANITIZER" == "thread" ]]; then VERSION_POSTFIX+="+tsan" elif [[ "$SANITIZER" == "memory" ]]; then VERSION_POSTFIX+="+msan" elif [[ "$SANITIZER" == "undefined" ]]; then VERSION_POSTFIX+="+ubsan" - elif [[ "$SANITIZER" == "libfuzzer" ]]; then - VERSION_POSTFIX+="+libfuzzer" - MALLOC_OPTS="-DENABLE_TCMALLOC=0 -DENABLE_JEMALLOC=0" else echo "Unknown value of SANITIZER variable: $SANITIZER" exit 3 diff --git a/src/Access/AccessType.h b/src/Access/AccessType.h index 02d7e4982f9..b1b49a6ba75 100644 --- a/src/Access/AccessType.h +++ b/src/Access/AccessType.h @@ -87,6 +87,7 @@ enum class AccessType M(CREATE_DICTIONARY, "", DICTIONARY, CREATE) /* allows to execute {CREATE|ATTACH} DICTIONARY */\ M(CREATE_TEMPORARY_TABLE, "", GLOBAL, CREATE) /* allows to create and manipulate temporary tables; implicitly enabled by the grant CREATE_TABLE on any table */ \ + M(CREATE_FUNCTION, "", DATABASE, CREATE) /* allows to execute CREATE FUNCTION */ \ M(CREATE, "", GROUP, ALL) /* allows to execute {CREATE|ATTACH} */ \ \ M(DROP_DATABASE, "", DATABASE, DROP) /* allows to execute {DROP|DETACH} DATABASE */\ @@ -94,6 +95,7 @@ enum class AccessType M(DROP_VIEW, "", VIEW, DROP) /* allows to execute {DROP|DETACH} TABLE for views; implicitly enabled by the grant DROP_TABLE */\ M(DROP_DICTIONARY, "", DICTIONARY, DROP) /* allows to execute {DROP|DETACH} DICTIONARY */\ + M(DROP_FUNCTION, "", DATABASE, DROP) /* allows to execute DROP FUNCTION */\ M(DROP, "", GROUP, ALL) /* allows to execute {DROP|DETACH} */\ \ M(TRUNCATE, "TRUNCATE TABLE", TABLE, ALL) \ diff --git a/src/Access/tests/gtest_access_rights_ops.cpp b/src/Access/tests/gtest_access_rights_ops.cpp index 3d7b396a6f2..2881825dd17 100644 --- a/src/Access/tests/gtest_access_rights_ops.cpp +++ b/src/Access/tests/gtest_access_rights_ops.cpp @@ -45,7 +45,7 @@ TEST(AccessRights, Union) lhs.grant(AccessType::INSERT); rhs.grant(AccessType::ALL, "db1"); lhs.makeUnion(rhs); - ASSERT_EQ(lhs.toString(), "GRANT INSERT ON *.*, GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, DROP, TRUNCATE, OPTIMIZE, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*"); + ASSERT_EQ(lhs.toString(), "GRANT INSERT ON *.*, GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, CREATE FUNCTION, DROP, TRUNCATE, OPTIMIZE, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*"); } diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index bfa80b5e3b2..72904c5ab8f 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -301,7 +301,7 @@ size_t ColumnUnique::getNullValueIndex() const template size_t ColumnUnique::uniqueInsert(const Field & x) { - if (x.getType() == Field::Types::Null) + if (x.isNull()) return getNullValueIndex(); if (valuesHaveFixedSize()) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 49fcb3b222f..084a0e3e93b 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -578,6 +578,12 @@ M(607, BACKUP_ELEMENT_DUPLICATE) \ M(608, CANNOT_RESTORE_TABLE) \ \ + M(598, FUNCTION_ALREADY_EXISTS) \ + M(599, CANNOT_DROP_SYSTEM_FUNCTION) \ + M(600, CANNOT_CREATE_RECURSIVE_FUNCTION) \ + M(601, OBJECT_ALREADY_STORED_ON_DISK) \ + M(602, OBJECT_WAS_NOT_STORED_ON_DISK) \ + \ M(998, POSTGRESQL_CONNECTION_FAILURE) \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Common/TLDListsHolder.cpp b/src/Common/TLDListsHolder.cpp index 34bef8248b5..db0a762f826 100644 --- a/src/Common/TLDListsHolder.cpp +++ b/src/Common/TLDListsHolder.cpp @@ -64,7 +64,8 @@ size_t TLDListsHolder::parseAndAddTldList(const std::string & name, const std::s while (!in.eof()) { readEscapedStringUntilEOL(line, in); - ++in.position(); + if (!in.eof()) + ++in.position(); /// Skip comments if (line.size() > 2 && line[0] == '/' && line[1] == '/') continue; diff --git a/src/Common/TimerDescriptor.cpp b/src/Common/TimerDescriptor.cpp index 793f7ed1352..086d462eeb2 100644 --- a/src/Common/TimerDescriptor.cpp +++ b/src/Common/TimerDescriptor.cpp @@ -74,17 +74,24 @@ void TimerDescriptor::drain() const } } -void TimerDescriptor::setRelative(Poco::Timespan timespan) const +void TimerDescriptor::setRelative(uint64_t usec) const { + static constexpr uint32_t TIMER_PRECISION = 1e6; + itimerspec spec; spec.it_interval.tv_nsec = 0; spec.it_interval.tv_sec = 0; - spec.it_value.tv_sec = timespan.totalSeconds(); - spec.it_value.tv_nsec = timespan.useconds() * 1000; + spec.it_value.tv_sec = usec / TIMER_PRECISION; + spec.it_value.tv_nsec = (usec % TIMER_PRECISION) * 1'000; if (-1 == timerfd_settime(timer_fd, 0 /*relative timer */, &spec, nullptr)) throwFromErrno("Cannot set time for timer_fd", ErrorCodes::CANNOT_SET_TIMER_PERIOD); } +void TimerDescriptor::setRelative(Poco::Timespan timespan) const +{ + setRelative(timespan.totalMicroseconds()); +} + } #endif diff --git a/src/Common/TimerDescriptor.h b/src/Common/TimerDescriptor.h index 30a610f37f0..8ca69344b53 100644 --- a/src/Common/TimerDescriptor.h +++ b/src/Common/TimerDescriptor.h @@ -24,6 +24,7 @@ public: void reset() const; void drain() const; + void setRelative(uint64_t usec) const; void setRelative(Poco::Timespan timespan) const; }; diff --git a/src/Compression/CompressedReadBufferBase.cpp b/src/Compression/CompressedReadBufferBase.cpp index 749f174677c..006b3fab2d8 100644 --- a/src/Compression/CompressedReadBufferBase.cpp +++ b/src/Compression/CompressedReadBufferBase.cpp @@ -253,4 +253,3 @@ CompressedReadBufferBase::~CompressedReadBufferBase() = default; /// Proper d } - diff --git a/src/Compression/CompressionCodecDelta.cpp b/src/Compression/CompressionCodecDelta.cpp index e281609ff43..e8c5b4f878d 100644 --- a/src/Compression/CompressionCodecDelta.cpp +++ b/src/Compression/CompressionCodecDelta.cpp @@ -82,8 +82,10 @@ void compressDataForType(const char * source, UInt32 source_size, char * dest) } template -void decompressDataForType(const char * source, UInt32 source_size, char * dest) +void decompressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 output_size) { + const char * output_end = dest + output_size; + if (source_size % sizeof(T) != 0) throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot delta decompress, data size {} is not aligned to {}", source_size, sizeof(T)); @@ -92,6 +94,8 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest) while (source < source_end) { accumulator += unalignedLoad(source); + if (dest + sizeof(accumulator) > output_end) + throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress the data"); unalignedStore(dest, accumulator); source += sizeof(T); @@ -137,6 +141,7 @@ void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_ throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS); UInt8 bytes_to_skip = uncompressed_size % bytes_size; + UInt32 output_size = uncompressed_size - bytes_to_skip; if (UInt32(2 + bytes_to_skip) > source_size) throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS); @@ -146,16 +151,16 @@ void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_ switch (bytes_size) { case 1: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]); + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); break; case 2: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]); + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); break; case 4: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]); + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); break; case 8: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]); + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); break; } } @@ -209,4 +214,10 @@ void registerCodecDelta(CompressionCodecFactory & factory) return std::make_shared(delta_bytes_size); }); } + +CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size) +{ + return std::make_shared(delta_bytes_size); +} + } diff --git a/src/Compression/CompressionCodecDoubleDelta.cpp b/src/Compression/CompressionCodecDoubleDelta.cpp index c416582eb6b..3f96cfa8ff8 100644 --- a/src/Compression/CompressionCodecDoubleDelta.cpp +++ b/src/Compression/CompressionCodecDoubleDelta.cpp @@ -353,12 +353,13 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) } template -void decompressDataForType(const char * source, UInt32 source_size, char * dest) +void decompressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 output_size) { static_assert(is_unsigned_v, "ValueType must be unsigned."); using UnsignedDeltaType = ValueType; const char * source_end = source + source_size; + const char * output_end = dest + output_size; if (source + sizeof(UInt32) > source_end) return; @@ -374,6 +375,8 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest) return; prev_value = unalignedLoad(source); + if (dest + sizeof(prev_value) > output_end) + throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress the data"); unalignedStore(dest, prev_value); source += sizeof(prev_value); @@ -385,6 +388,8 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest) prev_delta = unalignedLoad(source); prev_value = prev_value + static_cast(prev_delta); + if (dest + sizeof(prev_value) > output_end) + throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress the data"); unalignedStore(dest, prev_value); source += sizeof(prev_delta); @@ -416,6 +421,8 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest) const UnsignedDeltaType delta = double_delta + prev_delta; const ValueType curr_value = prev_value + delta; + if (dest + sizeof(curr_value) > output_end) + throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress the data"); unalignedStore(dest, curr_value); dest += sizeof(curr_value); @@ -507,6 +514,7 @@ void CompressionCodecDoubleDelta::doDecompressData(const char * source, UInt32 s throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS); UInt8 bytes_to_skip = uncompressed_size % bytes_size; + UInt32 output_size = uncompressed_size - bytes_to_skip; if (UInt32(2 + bytes_to_skip) > source_size) throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS); @@ -516,16 +524,16 @@ void CompressionCodecDoubleDelta::doDecompressData(const char * source, UInt32 s switch (bytes_size) { case 1: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]); + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); break; case 2: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]); + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); break; case 4: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]); + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); break; case 8: - decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]); + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); break; } } @@ -543,4 +551,10 @@ void registerCodecDoubleDelta(CompressionCodecFactory & factory) return std::make_shared(data_bytes_size); }); } + +CompressionCodecPtr getCompressionCodecDoubleDelta(UInt8 data_bytes_size) +{ + return std::make_shared(data_bytes_size); +} + } diff --git a/src/Compression/CompressionCodecEncrypted.h b/src/Compression/CompressionCodecEncrypted.h index bacd58bcd2f..cef9b2e6072 100644 --- a/src/Compression/CompressionCodecEncrypted.h +++ b/src/Compression/CompressionCodecEncrypted.h @@ -51,7 +51,7 @@ namespace DB */ static void setMasterKey(const std::string_view & master_key); - CompressionCodecEncrypted(const std::string_view & cipher); + explicit CompressionCodecEncrypted(const std::string_view & cipher); uint8_t getMethodByte() const override; void updateHash(SipHash & hash) const override; @@ -88,7 +88,7 @@ namespace DB */ struct KeyHolder : private boost::noncopyable { - KeyHolder(const std::string_view & master_key); + explicit KeyHolder(const std::string_view & master_key); ~KeyHolder(); std::string keygen_key; @@ -99,6 +99,11 @@ namespace DB static inline std::optional keys; }; + + inline CompressionCodecPtr getCompressionCodecEncrypted(const std::string_view & master_key) + { + return std::make_shared(master_key); + } } #endif /* USE_SSL && USE_INTERNAL_SSL_LIBRARY */ diff --git a/src/Compression/CompressionCodecLZ4.cpp b/src/Compression/CompressionCodecLZ4.cpp index 396f6fad2c3..12f138dc95a 100644 --- a/src/Compression/CompressionCodecLZ4.cpp +++ b/src/Compression/CompressionCodecLZ4.cpp @@ -147,4 +147,10 @@ CompressionCodecLZ4HC::CompressionCodecLZ4HC(int level_) setCodecDescription("LZ4HC", {std::make_shared(static_cast(level))}); } + +CompressionCodecPtr getCompressionCodecLZ4(int level) +{ + return std::make_shared(level); +} + } diff --git a/src/Compression/CompressionCodecMultiple.h b/src/Compression/CompressionCodecMultiple.h index 1eb61842048..1d63fe1da55 100644 --- a/src/Compression/CompressionCodecMultiple.h +++ b/src/Compression/CompressionCodecMultiple.h @@ -9,7 +9,7 @@ class CompressionCodecMultiple final : public ICompressionCodec { public: CompressionCodecMultiple() = default; /// Need for CompressionFactory to register codec by method byte. - CompressionCodecMultiple(Codecs codecs_); + explicit CompressionCodecMultiple(Codecs codecs_); uint8_t getMethodByte() const override; diff --git a/src/Compression/CompressionCodecZSTD.cpp b/src/Compression/CompressionCodecZSTD.cpp index a950f1fee74..def2188d90a 100644 --- a/src/Compression/CompressionCodecZSTD.cpp +++ b/src/Compression/CompressionCodecZSTD.cpp @@ -156,4 +156,9 @@ void registerCodecZSTD(CompressionCodecFactory & factory) }); } +CompressionCodecPtr getCompressionCodecZSTD(int level) +{ + return std::make_shared(level); +} + } diff --git a/src/Compression/ICompressionCodec.h b/src/Compression/ICompressionCodec.h index c49c16d8bad..3f603087f2b 100644 --- a/src/Compression/ICompressionCodec.h +++ b/src/Compression/ICompressionCodec.h @@ -18,6 +18,8 @@ using Codecs = std::vector; class IDataType; +extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size); + /** * Represents interface for compression codecs like LZ4, ZSTD, etc. */ @@ -84,6 +86,8 @@ public: virtual bool isNone() const { return false; } protected: + /// This is used for fuzz testing + friend int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size); /// Return size of compressed data without header virtual UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const { return uncompressed_size; } diff --git a/src/Compression/LZ4_decompress_faster.cpp b/src/Compression/LZ4_decompress_faster.cpp index 28a285f00f4..1aeae6b1b9a 100644 --- a/src/Compression/LZ4_decompress_faster.cpp +++ b/src/Compression/LZ4_decompress_faster.cpp @@ -450,7 +450,11 @@ bool NO_INLINE decompressImpl( const unsigned token = *ip++; length = token >> 4; if (length == 0x0F) + { + if (unlikely(ip + 1 >= input_end)) + return false; continue_read_length(); + } /// Copy literals. @@ -470,6 +474,20 @@ bool NO_INLINE decompressImpl( if (unlikely(copy_end > output_end)) return false; + // Due to implementation specifics the copy length is always a multiple of copy_amount + size_t real_length = 0; + + static_assert(copy_amount == 8 || copy_amount == 16 || copy_amount == 32); + if constexpr (copy_amount == 8) + real_length = (((length >> 3) + 1) * 8); + else if constexpr (copy_amount == 16) + real_length = (((length >> 4) + 1) * 16); + else if constexpr (copy_amount == 32) + real_length = (((length >> 5) + 1) * 32); + + if (unlikely(ip + real_length >= input_end + ADDITIONAL_BYTES_AT_END_OF_BUFFER)) + return false; + wildCopy(op, ip, copy_end); /// Here we can write up to copy_amount - 1 bytes after buffer. if (copy_end == output_end) @@ -494,7 +512,11 @@ bool NO_INLINE decompressImpl( length = token & 0x0F; if (length == 0x0F) + { + if (unlikely(ip + 1 >= input_end)) + return false; continue_read_length(); + } length += 4; /// Copy match within block, that produce overlapping pattern. Match may replicate itself. diff --git a/src/Compression/fuzzers/CMakeLists.txt b/src/Compression/fuzzers/CMakeLists.txt index 74bf2d2649b..189aea66a92 100644 --- a/src/Compression/fuzzers/CMakeLists.txt +++ b/src/Compression/fuzzers/CMakeLists.txt @@ -1,2 +1,20 @@ +# Our code has strong cohesion and target associated with `Compression` also depends on `DataTypes`. +# But we can exclude some files which have dependencies in case of +# fuzzer related build (we are interested in fuzzing only particular part of our code). +# So, some symbols will be declared, but not defined. Unfortunately, this trick doesn't work with UBSan. +# If you want really small size of the resulted binary, just link with fuzz_compression and clickhouse_common_io + add_executable (compressed_buffer_fuzzer compressed_buffer_fuzzer.cpp) -target_link_libraries (compressed_buffer_fuzzer PRIVATE fuzz_compression clickhouse_common_io ${LIB_FUZZING_ENGINE}) +target_link_libraries (compressed_buffer_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE}) + +add_executable (lz4_decompress_fuzzer lz4_decompress_fuzzer.cpp) +target_link_libraries (lz4_decompress_fuzzer PUBLIC dbms lz4 ${LIB_FUZZING_ENGINE}) + +add_executable (delta_decompress_fuzzer delta_decompress_fuzzer.cpp) +target_link_libraries (delta_decompress_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE}) + +add_executable (double_delta_decompress_fuzzer double_delta_decompress_fuzzer.cpp) +target_link_libraries (double_delta_decompress_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE}) + +add_executable (encrypted_decompress_fuzzer encrypted_decompress_fuzzer.cpp) +target_link_libraries (encrypted_decompress_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE}) diff --git a/src/Compression/fuzzers/compressed_buffer_fuzzer.cpp b/src/Compression/fuzzers/compressed_buffer_fuzzer.cpp index a87046eff5c..1f669696fb9 100644 --- a/src/Compression/fuzzers/compressed_buffer_fuzzer.cpp +++ b/src/Compression/fuzzers/compressed_buffer_fuzzer.cpp @@ -17,6 +17,5 @@ try } catch (...) { - std::cerr << DB::getCurrentExceptionMessage(true) << std::endl; return 1; } diff --git a/src/Compression/fuzzers/delta_decompress_fuzzer.cpp b/src/Compression/fuzzers/delta_decompress_fuzzer.cpp new file mode 100644 index 00000000000..b039777da15 --- /dev/null +++ b/src/Compression/fuzzers/delta_decompress_fuzzer.cpp @@ -0,0 +1,44 @@ +#include +#include + +#include +#include + +namespace DB +{ + CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size); +} + +struct AuxiliaryRandomData +{ + UInt8 delta_size_bytes; + size_t decompressed_size; +}; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) +try +{ + if (size < sizeof(AuxiliaryRandomData)) + return 0; + + const auto * p = reinterpret_cast(data); + auto codec = DB::getCompressionCodecDelta(p->delta_size_bytes); + + size_t output_buffer_size = p->decompressed_size % 65536; + size -= sizeof(AuxiliaryRandomData); + data += sizeof(AuxiliaryRandomData) / sizeof(uint8_t); + + // std::string input = std::string(reinterpret_cast(data), size); + // fmt::print(stderr, "Using input {} of size {}, output size is {}. \n", input, size, output_buffer_size); + + DB::Memory<> memory; + memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer()); + + codec->doDecompressData(reinterpret_cast(data), size, memory.data(), output_buffer_size); + + return 0; +} +catch (...) +{ + return 1; +} diff --git a/src/Compression/fuzzers/double_delta_decompress_fuzzer.cpp b/src/Compression/fuzzers/double_delta_decompress_fuzzer.cpp new file mode 100644 index 00000000000..f9822daa3bd --- /dev/null +++ b/src/Compression/fuzzers/double_delta_decompress_fuzzer.cpp @@ -0,0 +1,44 @@ +#include +#include + +#include +#include + +namespace DB +{ + CompressionCodecPtr getCompressionCodecDoubleDelta(UInt8 data_bytes_size); +} + +struct AuxiliaryRandomData +{ + UInt8 data_bytes_size; + size_t decompressed_size; +}; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) +try +{ + if (size < sizeof(AuxiliaryRandomData)) + return 0; + + const auto * p = reinterpret_cast(data); + auto codec = DB::getCompressionCodecDoubleDelta(p->data_bytes_size); + + size_t output_buffer_size = p->decompressed_size % 65536; + size -= sizeof(AuxiliaryRandomData); + data += sizeof(AuxiliaryRandomData) / sizeof(uint8_t); + + // std::string input = std::string(reinterpret_cast(data), size); + // fmt::print(stderr, "Using input {} of size {}, output size is {}. \n", input, size, output_buffer_size); + + DB::Memory<> memory; + memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer()); + + codec->doDecompressData(reinterpret_cast(data), size, memory.data(), output_buffer_size); + + return 0; +} +catch (...) +{ + return 1; +} diff --git a/src/Compression/fuzzers/encrypted_decompress_fuzzer.cpp b/src/Compression/fuzzers/encrypted_decompress_fuzzer.cpp new file mode 100644 index 00000000000..6211fd4b9cc --- /dev/null +++ b/src/Compression/fuzzers/encrypted_decompress_fuzzer.cpp @@ -0,0 +1,52 @@ +#include +#include + +#include +#include +#include + +namespace DB +{ + CompressionCodecPtr getCompressionCodecEncrypted(const std::string_view & master_key); +} + +constexpr size_t key_size = 20; + +struct AuxiliaryRandomData +{ + char key[key_size]; + size_t decompressed_size; +}; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) +try +{ + if (size < sizeof(AuxiliaryRandomData)) + return 0; + + const auto * p = reinterpret_cast(data); + + std::string key = std::string(p->key, key_size); + auto codec = DB::getCompressionCodecEncrypted(key); + + size_t output_buffer_size = p->decompressed_size % 65536; + size -= sizeof(AuxiliaryRandomData); + data += sizeof(AuxiliaryRandomData) / sizeof(uint8_t); + + std::string input = std::string(reinterpret_cast(data), size); + fmt::print(stderr, "Using input {} of size {}, output size is {}. \n", input, size, output_buffer_size); + + if (output_buffer_size < size) + return 0; + + DB::Memory<> memory; + memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer()); + + codec->doDecompressData(reinterpret_cast(data), size, memory.data(), output_buffer_size); + + return 0; +} +catch (...) +{ + return 1; +} diff --git a/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp b/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp new file mode 100644 index 00000000000..85c4c9bd329 --- /dev/null +++ b/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp @@ -0,0 +1,47 @@ +#include +#include + +#include +#include +#include + +namespace DB +{ + CompressionCodecPtr getCompressionCodecLZ4(int level); +} + +struct AuxiliaryRandomData +{ + size_t level; + size_t decompressed_size; +}; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) +try +{ + + if (size < sizeof(AuxiliaryRandomData) + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER) + return 0; + + const auto * p = reinterpret_cast(data); + auto codec = DB::getCompressionCodecLZ4(p->level); + + size_t output_buffer_size = p->decompressed_size % 65536; + size -= sizeof(AuxiliaryRandomData); + size -= LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER; + data += sizeof(AuxiliaryRandomData) / sizeof(uint8_t); + + // std::string input = std::string(reinterpret_cast(data), size); + // fmt::print(stderr, "Using input {} of size {}, output size is {}. \n", input, size, output_buffer_size); + + DB::Memory<> memory; + memory.resize(output_buffer_size + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER); + + codec->doDecompressData(reinterpret_cast(data), size, memory.data(), output_buffer_size); + + return 0; +} +catch (...) +{ + return 1; +} diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index 6ec9b17d0a7..df5d8792b25 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -165,10 +165,11 @@ public: while (!read_buf.eof()) { result.last_position = read_buf.count(); + /// Read checksum Checksum record_checksum; readIntBinary(record_checksum, read_buf); - /// Initialization is required, otherwise checksums may fail + /// Read header ChangelogRecord record; readIntBinary(record.header.version, read_buf); readIntBinary(record.header.index, read_buf); @@ -179,6 +180,7 @@ public: if (record.header.version > CURRENT_CHANGELOG_VERSION) throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unsupported changelog version {} on path {}", record.header.version, filepath); + /// Read data if (record.header.blob_size != 0) { auto buffer = nuraft::buffer::alloc(record.header.blob_size); @@ -189,11 +191,13 @@ public: else record.blob = nullptr; + /// Check changelog integrity if (previous_index != 0 && previous_index + 1 != record.header.index) throw Exception(ErrorCodes::CORRUPTED_DATA, "Previous log entry {}, next log entry {}, seems like some entries skipped", previous_index, record.header.index); previous_index = record.header.index; + /// Compare checksums Checksum checksum = computeRecordChecksum(record); if (checksum != record_checksum) { @@ -202,22 +206,25 @@ public: filepath, record.header.version, record.header.index, record.header.blob_size); } + /// Check for duplicated changelog ids if (logs.count(record.header.index) != 0) throw Exception(ErrorCodes::CORRUPTED_DATA, "Duplicated index id {} in log {}", record.header.index, filepath); result.entries_read += 1; + /// Read but skip this entry because our state is already more fresh if (record.header.index < start_log_index) - { continue; - } + /// Create log entry for read data auto log_entry = nuraft::cs_new(record.header.term, record.blob, record.header.value_type); if (result.first_read_index == 0) result.first_read_index = record.header.index; + /// Put it into in memory structure logs.emplace(record.header.index, log_entry); index_to_offset[record.header.index] = result.last_position; + if (result.entries_read % 50000 == 0) LOG_TRACE(log, "Reading changelog from path {}, entries {}", filepath, result.entries_read); } @@ -235,6 +242,7 @@ public: result.error = true; tryLogCurrentException(log); } + LOG_TRACE(log, "Totally read from changelog {} {} entries", filepath, result.entries_read); return result; @@ -255,6 +263,7 @@ Changelog::Changelog( , force_sync(force_sync_) , log(log_) { + /// Load all files in changelog directory namespace fs = std::filesystem; if (!fs::exists(changelogs_dir)) fs::create_directories(changelogs_dir); @@ -264,45 +273,70 @@ Changelog::Changelog( auto file_description = getChangelogFileDescription(p.path()); existing_changelogs[file_description.from_log_index] = file_description; } + + if (existing_changelogs.empty()) + LOG_WARNING(log, "No logs exists in {}. It's Ok if it's the first run of clickhouse-keeper.", changelogs_dir); } void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uint64_t logs_to_keep) { uint64_t total_read = 0; + + /// Amount of entries in last log index uint64_t entries_in_last = 0; - uint64_t incomplete_log_index = 0; + /// Log idx of the first incomplete log (key in existing_changelogs) + int64_t first_incomplete_log_start_index = -1; /// if -1 then no incomplete log exists + ChangelogReadResult result{}; + /// First log index which was read from all changelogs uint64_t first_read_index = 0; + /// We must start to read from this log index uint64_t start_to_read_from = last_commited_log_index; + + /// If we need to have some reserved log read additional `logs_to_keep` logs if (start_to_read_from > logs_to_keep) start_to_read_from -= logs_to_keep; else start_to_read_from = 1; + /// At least we read something bool started = false; + + /// Got through changelog files in order of start_index for (const auto & [changelog_start_index, changelog_description] : existing_changelogs) { - entries_in_last = changelog_description.to_log_index - changelog_description.from_log_index + 1; + /// How many entries we have in the last changelog + entries_in_last = changelog_description.expectedEntriesCountInLog(); + /// [from_log_index.>=.......start_to_read_from.....<=.to_log_index] if (changelog_description.to_log_index >= start_to_read_from) { - if (!started) + if (!started) /// still nothing was read { + /// Our first log starts from the more fresh log_id than we required to read and this changelog is not empty log. + /// So we are missing something in our logs, but it's not dataloss, we will receive snapshot and required + /// entries from leader. if (changelog_description.from_log_index > last_commited_log_index && (changelog_description.from_log_index - last_commited_log_index) > 1) { LOG_ERROR(log, "Some records was lost, last committed log index {}, smallest available log index on disk {}. Hopefully will receive missing records from leader.", last_commited_log_index, changelog_description.from_log_index); - incomplete_log_index = changelog_start_index; + first_incomplete_log_start_index = changelog_start_index; break; } else if (changelog_description.from_log_index > start_to_read_from) + { + /// We don't have required amount of reserved logs, but nothing was lost. LOG_WARNING(log, "Don't have required amount of reserved log records. Need to read from {}, smallest available log index on disk {}.", start_to_read_from, changelog_description.from_log_index); + } } - started = true; ChangelogReader reader(changelog_description.path); result = reader.readChangelog(logs, start_to_read_from, index_to_start_pos, log); + + started = true; + + /// Otherwise we have already initialized it if (first_read_index == 0) first_read_index = result.first_read_index; @@ -311,7 +345,7 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin /// May happen after truncate, crash or simply unfinished log if (result.entries_read < entries_in_last) { - incomplete_log_index = changelog_start_index; + first_incomplete_log_start_index = changelog_start_index; break; } } @@ -319,14 +353,16 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin if (first_read_index != 0) start_index = first_read_index; - else + else /// We just may have no logs (only snapshot) start_index = last_commited_log_index; - if (incomplete_log_index != 0) + /// Found some broken or non finished logs + /// We have to remove broken data and continue to write into incomplete log. + if (first_incomplete_log_start_index != -1) /// otherwise all logs completed so just start a new one { auto start_remove_from = existing_changelogs.begin(); if (started) - start_remove_from = existing_changelogs.upper_bound(incomplete_log_index); + start_remove_from = existing_changelogs.upper_bound(first_incomplete_log_start_index); /// All subsequent logs shouldn't exist. But they may exist if we crashed after writeAt started. Remove them. for (auto itr = start_remove_from; itr != existing_changelogs.end();) @@ -340,6 +376,9 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin if (!existing_changelogs.empty()) { auto description = existing_changelogs.rbegin()->second; + if (description.expectedEntriesCountInLog() != rotate_interval) + LOG_TRACE(log, "Looks like rotate_logs_interval was changed, current {}, expected entries in last log {}", rotate_interval, description.expectedEntriesCountInLog()); + LOG_TRACE(log, "Continue to write into {}", description.path); current_writer = std::make_unique(description.path, WriteMode::Append, description.from_log_index); current_writer->setEntriesWritten(result.entries_read); @@ -363,6 +402,7 @@ void Changelog::rotate(uint64_t new_start_log_index) /// Flush previous log flush(); + /// Start new one ChangelogFileDescription new_description; new_description.prefix = DEFAULT_PREFIX; new_description.from_log_index = new_start_log_index; @@ -378,7 +418,7 @@ void Changelog::rotate(uint64_t new_start_log_index) ChangelogRecord Changelog::buildRecord(uint64_t index, const LogEntryPtr & log_entry) { ChangelogRecord record; - record.header.version = ChangelogVersion::V0; + record.header.version = ChangelogVersion::V1; record.header.index = index; record.header.term = log_entry->get_term(); record.header.value_type = log_entry->get_val_type(); @@ -401,10 +441,13 @@ void Changelog::appendEntry(uint64_t index, const LogEntryPtr & log_entry) if (logs.empty()) start_index = index; - if (current_writer->getEntriesWritten() == rotate_interval) + const auto & current_changelog_description = existing_changelogs[current_writer->getStartIndex()]; + const bool log_is_complete = current_writer->getEntriesWritten() == current_changelog_description.expectedEntriesCountInLog(); + + if (log_is_complete) rotate(index); - auto offset = current_writer->appendRecord(buildRecord(index, log_entry)); + const auto offset = current_writer->appendRecord(buildRecord(index, log_entry)); if (!index_to_start_pos.try_emplace(index, offset).second) throw Exception(ErrorCodes::LOGICAL_ERROR, "Record with index {} already exists", index); @@ -416,26 +459,31 @@ void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry) if (index_to_start_pos.count(index) == 0) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot write at index {} because changelog doesn't contain it", index); - bool go_to_previous_file = index < current_writer->getStartIndex(); + /// This write_at require to overwrite everything in this file and also in previous file(s) + const bool go_to_previous_file = index < current_writer->getStartIndex(); + if (go_to_previous_file) { auto index_changelog = existing_changelogs.lower_bound(index); + ChangelogFileDescription description; - if (index_changelog->first == index) + + if (index_changelog->first == index) /// exactly this file starts from index description = index_changelog->second; else description = std::prev(index_changelog)->second; + /// Initialize writer from this log file current_writer = std::make_unique(description.path, WriteMode::Append, index_changelog->first); current_writer->setEntriesWritten(description.to_log_index - description.from_log_index + 1); } - auto entries_written = current_writer->getEntriesWritten(); + /// Truncate current file current_writer->truncateToLength(index_to_start_pos[index]); if (go_to_previous_file) { - /// Remove all subsequent files + /// Remove all subsequent files if overwritten something in previous one auto to_remove_itr = existing_changelogs.upper_bound(index); for (auto itr = to_remove_itr; itr != existing_changelogs.end();) { @@ -444,19 +492,22 @@ void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry) } } + auto entries_written = current_writer->getEntriesWritten(); /// Remove redundant logs from memory + /// Everything >= index must be removed for (uint64_t i = index; ; ++i) { auto log_itr = logs.find(i); if (log_itr == logs.end()) break; + logs.erase(log_itr); index_to_start_pos.erase(i); entries_written--; } - current_writer->setEntriesWritten(entries_written); + /// Now we can actually override entry at index appendEntry(index, log_entry); } @@ -467,7 +518,6 @@ void Changelog::compact(uint64_t up_to_log_index) /// Remove all completely outdated changelog files if (itr->second.to_log_index <= up_to_log_index) { - LOG_INFO(log, "Removing changelog {} because of compaction", itr->second.path); std::erase_if(index_to_start_pos, [right_index = itr->second.to_log_index] (const auto & item) { return item.first <= right_index; }); std::filesystem::remove(itr->second.path); @@ -482,9 +532,10 @@ void Changelog::compact(uint64_t up_to_log_index) LogEntryPtr Changelog::getLastEntry() const { + /// This entry treaded in special way by NuRaft static LogEntryPtr fake_entry = nuraft::cs_new(0, nuraft::buffer::alloc(sizeof(uint64_t))); - uint64_t next_index = getNextEntryIndex() - 1; + const uint64_t next_index = getNextEntryIndex() - 1; auto entry = logs.find(next_index); if (entry == logs.end()) return fake_entry; diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index 893fe16abdf..119fc91c2c7 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -53,13 +53,19 @@ struct ChangelogFileDescription uint64_t to_log_index; std::string path; + + /// How many entries should be stored in this log + uint64_t expectedEntriesCountInLog() const + { + return to_log_index - from_log_index + 1; + } }; class ChangelogWriter; /// Simplest changelog with files rotation. -/// No compression, no metadata, just entries with headers one by one -/// Able to read broken files/entries and discard them. +/// No compression, no metadata, just entries with headers one by one. +/// Able to read broken files/entries and discard them. Not thread safe. class Changelog { @@ -128,10 +134,16 @@ private: const bool force_sync; Poco::Logger * log; + /// Currently existing changelogs std::map existing_changelogs; + + /// Current writer for changelog file std::unique_ptr current_writer; + /// Mapping log_id -> binary offset in log file IndexToOffset index_to_start_pos; + /// Mapping log_id -> log_entry IndexToLogEntry logs; + /// Start log_id which exists in all "active" logs uint64_t start_index = 0; }; diff --git a/src/Coordination/KeeperStorageDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp similarity index 86% rename from src/Coordination/KeeperStorageDispatcher.cpp rename to src/Coordination/KeeperDispatcher.cpp index 7c416b38d8b..26db925b4c5 100644 --- a/src/Coordination/KeeperStorageDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -9,19 +9,18 @@ namespace DB namespace ErrorCodes { - extern const int LOGICAL_ERROR; extern const int TIMEOUT_EXCEEDED; } -KeeperStorageDispatcher::KeeperStorageDispatcher() +KeeperDispatcher::KeeperDispatcher() : coordination_settings(std::make_shared()) , log(&Poco::Logger::get("KeeperDispatcher")) { } -void KeeperStorageDispatcher::requestThread() +void KeeperDispatcher::requestThread() { setThreadName("KeeperReqT"); @@ -133,7 +132,7 @@ void KeeperStorageDispatcher::requestThread() } } -void KeeperStorageDispatcher::responseThread() +void KeeperDispatcher::responseThread() { setThreadName("KeeperRspT"); while (!shutdown_called) @@ -159,7 +158,7 @@ void KeeperStorageDispatcher::responseThread() } } -void KeeperStorageDispatcher::snapshotThread() +void KeeperDispatcher::snapshotThread() { setThreadName("KeeperSnpT"); while (!shutdown_called) @@ -181,9 +180,11 @@ void KeeperStorageDispatcher::snapshotThread() } } -void KeeperStorageDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response) +void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response) { std::lock_guard lock(session_to_response_callback_mutex); + + /// Special new session response. if (response->xid != Coordination::WATCH_XID && response->getOpNum() == Coordination::OpNum::SessionID) { const Coordination::ZooKeeperSessionIDResponse & session_id_resp = dynamic_cast(*response); @@ -196,25 +197,28 @@ void KeeperStorageDispatcher::setResponse(int64_t session_id, const Coordination callback(response); new_session_id_response_callback.erase(session_id_resp.internal_id); } - else + else /// Normal response, just write to client { - auto session_writer = session_to_response_callback.find(session_id); - if (session_writer == session_to_response_callback.end()) + auto session_response_callback = session_to_response_callback.find(session_id); + + /// Session was disconnected, just skip this response + if (session_response_callback == session_to_response_callback.end()) return; - session_writer->second(response); + session_response_callback->second(response); /// Session closed, no more writes if (response->xid != Coordination::WATCH_XID && response->getOpNum() == Coordination::OpNum::Close) { - session_to_response_callback.erase(session_writer); + session_to_response_callback.erase(session_response_callback); } } } -bool KeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id) +bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id) { { + /// If session was already disconnected than we will ignore requests std::lock_guard lock(session_to_response_callback_mutex); if (session_to_response_callback.count(session_id) == 0) return false; @@ -237,7 +241,7 @@ bool KeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr return true; } -void KeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper) +void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper) { LOG_DEBUG(log, "Initializing storage dispatcher"); int myid = config.getInt("keeper_server.server_id"); @@ -251,6 +255,7 @@ void KeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfiguration server = std::make_unique( myid, coordination_settings, config, responses_queue, snapshots_queue, standalone_keeper); + try { LOG_DEBUG(log, "Waiting server to initialize"); @@ -266,13 +271,13 @@ void KeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfiguration throw; } - + /// Start it after keeper server start session_cleaner_thread = ThreadFromGlobalPool([this] { sessionCleanerTask(); }); LOG_DEBUG(log, "Dispatcher initialized"); } -void KeeperStorageDispatcher::shutdown() +void KeeperDispatcher::shutdown() { try { @@ -306,6 +311,8 @@ void KeeperStorageDispatcher::shutdown() server->shutdown(); KeeperStorage::RequestForSession request_for_session; + + /// Set session expired for all pending requests while (requests_queue->tryPop(request_for_session)) { if (request_for_session.request) @@ -320,6 +327,7 @@ void KeeperStorageDispatcher::shutdown() } } + /// Clear all registered sessions std::lock_guard lock(session_to_response_callback_mutex); session_to_response_callback.clear(); } @@ -331,19 +339,19 @@ void KeeperStorageDispatcher::shutdown() LOG_DEBUG(log, "Dispatcher shut down"); } -KeeperStorageDispatcher::~KeeperStorageDispatcher() +KeeperDispatcher::~KeeperDispatcher() { shutdown(); } -void KeeperStorageDispatcher::registerSession(int64_t session_id, ZooKeeperResponseCallback callback) +void KeeperDispatcher::registerSession(int64_t session_id, ZooKeeperResponseCallback callback) { std::lock_guard lock(session_to_response_callback_mutex); if (!session_to_response_callback.try_emplace(session_id, callback).second) throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session with id {} already registered in dispatcher", session_id); } -void KeeperStorageDispatcher::sessionCleanerTask() +void KeeperDispatcher::sessionCleanerTask() { while (true) { @@ -352,12 +360,16 @@ void KeeperStorageDispatcher::sessionCleanerTask() try { + /// Only leader node must check dead sessions if (isLeader()) { auto dead_sessions = server->getDeadSessions(); + for (int64_t dead_session : dead_sessions) { LOG_INFO(log, "Found dead session {}, will try to close it", dead_session); + + /// Close session == send close request to raft server Coordination::ZooKeeperRequestPtr request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Close); request->xid = Coordination::CLOSE_XID; KeeperStorage::RequestForSession request_info; @@ -367,6 +379,8 @@ void KeeperStorageDispatcher::sessionCleanerTask() std::lock_guard lock(push_request_mutex); requests_queue->push(std::move(request_info)); } + + /// Remove session from registered sessions finishSession(dead_session); LOG_INFO(log, "Dead session close request pushed"); } @@ -381,7 +395,7 @@ void KeeperStorageDispatcher::sessionCleanerTask() } } -void KeeperStorageDispatcher::finishSession(int64_t session_id) +void KeeperDispatcher::finishSession(int64_t session_id) { std::lock_guard lock(session_to_response_callback_mutex); auto session_it = session_to_response_callback.find(session_id); @@ -389,7 +403,7 @@ void KeeperStorageDispatcher::finishSession(int64_t session_id) session_to_response_callback.erase(session_it); } -void KeeperStorageDispatcher::addErrorResponses(const KeeperStorage::RequestsForSessions & requests_for_sessions, Coordination::Error error) +void KeeperDispatcher::addErrorResponses(const KeeperStorage::RequestsForSessions & requests_for_sessions, Coordination::Error error) { for (const auto & [session_id, request] : requests_for_sessions) { @@ -402,7 +416,7 @@ void KeeperStorageDispatcher::addErrorResponses(const KeeperStorage::RequestsFor } } -void KeeperStorageDispatcher::forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions) +void KeeperDispatcher::forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions) { if (!result->has_result()) result->get(); @@ -417,10 +431,14 @@ void KeeperStorageDispatcher::forceWaitAndProcessResult(RaftAppendResult & resul requests_for_sessions.clear(); } -int64_t KeeperStorageDispatcher::getSessionID(int64_t session_timeout_ms) +int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms) { + /// New session id allocation is a special request, because we cannot process it in normal + /// way: get request -> put to raft -> set response for registered callback. KeeperStorage::RequestForSession request_info; std::shared_ptr request = std::make_shared(); + /// Internal session id. It's a temporary number which is unique for each client on this server + /// but can be same on different servers. request->internal_id = internal_session_id_counter.fetch_add(1); request->session_timeout_ms = session_timeout_ms; request->server_id = server->getServerID(); @@ -430,6 +448,7 @@ int64_t KeeperStorageDispatcher::getSessionID(int64_t session_timeout_ms) auto promise = std::make_shared>(); auto future = promise->get_future(); + { std::lock_guard lock(session_to_response_callback_mutex); new_session_id_response_callback[request->internal_id] = [promise, internal_id = request->internal_id] (const Coordination::ZooKeeperResponsePtr & response) @@ -452,6 +471,7 @@ int64_t KeeperStorageDispatcher::getSessionID(int64_t session_timeout_ms) }; } + /// Push new session request to queue { std::lock_guard lock(push_request_mutex); if (!requests_queue->tryPush(std::move(request_info), session_timeout_ms)) @@ -461,6 +481,8 @@ int64_t KeeperStorageDispatcher::getSessionID(int64_t session_timeout_ms) if (future.wait_for(std::chrono::milliseconds(session_timeout_ms)) != std::future_status::ready) throw Exception("Cannot receive session id within session timeout", ErrorCodes::TIMEOUT_EXCEEDED); + /// Forcefully wait for request execution because we cannot process any other + /// requests for this client until it get new session id. return future.get(); } diff --git a/src/Coordination/KeeperStorageDispatcher.h b/src/Coordination/KeeperDispatcher.h similarity index 80% rename from src/Coordination/KeeperStorageDispatcher.h rename to src/Coordination/KeeperDispatcher.h index cc95de04ce9..a20603f12c5 100644 --- a/src/Coordination/KeeperStorageDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -22,7 +22,9 @@ namespace DB using ZooKeeperResponseCallback = std::function; -class KeeperStorageDispatcher +/// Highlevel wrapper for ClickHouse Keeper. +/// Process user requests via consensus and return responses. +class KeeperDispatcher { private: @@ -45,6 +47,7 @@ private: /// (get, set, list, etc.). Dispatcher determines callback for each response /// using session id from this map. SessionToResponseCallback session_to_response_callback; + /// But when client connects to the server for the first time it doesn't /// have session_id. It request it from server. We give temporary /// internal id for such requests just to much client with its response. @@ -60,7 +63,7 @@ private: /// Dumping new snapshots to disk ThreadFromGlobalPool snapshot_thread; - /// RAFT wrapper. Most important class. + /// RAFT wrapper. std::unique_ptr server; Poco::Logger * log; @@ -69,10 +72,15 @@ private: std::atomic internal_session_id_counter{0}; private: + /// Thread put requests to raft void requestThread(); + /// Thread put responses for subscribed sessions void responseThread(); + /// Thread clean disconnected sessions from memory void sessionCleanerTask(); + /// Thread create snapshots in the background void snapshotThread(); + void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response); /// Add error responses for requests to responses queue. @@ -84,16 +92,23 @@ private: void forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions); public: - KeeperStorageDispatcher(); + /// Just allocate some objects, real initialization is done by `intialize method` + KeeperDispatcher(); + /// Call shutdown + ~KeeperDispatcher(); + + /// Initialization from config. + /// standalone_keeper -- we are standalone keeper application (not inside clickhouse server) void initialize(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper); + /// Shutdown internal keeper parts (server, state machine, log storage, etc) void shutdown(); - ~KeeperStorageDispatcher(); - + /// Put request to ClickHouse Keeper bool putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id); + /// Are we leader bool isLeader() const { return server->isLeader(); @@ -104,9 +119,12 @@ public: return server->isLeaderAlive(); } + /// Get new session ID int64_t getSessionID(int64_t session_timeout_ms); + /// Register session and subscribe for responses with callback void registerSession(int64_t session_id, ZooKeeperResponseCallback callback); + /// Call if we don't need any responses for this session no more (session was expired) void finishSession(int64_t session_id); }; diff --git a/src/Coordination/KeeperLogStore.h b/src/Coordination/KeeperLogStore.h index 01315e6e879..d8ac8330c05 100644 --- a/src/Coordination/KeeperLogStore.h +++ b/src/Coordination/KeeperLogStore.h @@ -9,39 +9,53 @@ namespace DB { +/// Wrapper around Changelog class. Implements RAFT log storage. class KeeperLogStore : public nuraft::log_store { public: KeeperLogStore(const std::string & changelogs_path, uint64_t rotate_interval_, bool force_sync_); + /// Read log storage from filesystem starting from last_commited_log_index void init(uint64_t last_commited_log_index, uint64_t logs_to_keep); uint64_t start_index() const override; uint64_t next_slot() const override; + /// return last entry from log nuraft::ptr last_entry() const override; + /// Append new entry to log uint64_t append(nuraft::ptr & entry) override; + /// Remove all entries starting from index and write entry into index position void write_at(uint64_t index, nuraft::ptr & entry) override; + /// Return entries between [start, end) nuraft::ptr>> log_entries(uint64_t start, uint64_t end) override; + /// Return entry at index nuraft::ptr entry_at(uint64_t index) override; + /// Term if the index uint64_t term_at(uint64_t index) override; + /// Serialize entries in interval [index, index + cnt) nuraft::ptr pack(uint64_t index, int32_t cnt) override; + /// Apply serialized entries starting from index void apply_pack(uint64_t index, nuraft::buffer & pack) override; + /// Entries from last_log_index can be removed from memory and from disk bool compact(uint64_t last_log_index) override; + /// Call fsync to the stored data bool flush() override; + /// Current log storage size uint64_t size() const; + /// Flush batch of appended entries void end_of_append_batch(uint64_t start_index, uint64_t count) override; private: diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h index 282a7b48dfb..d1138ccef1a 100644 --- a/src/Coordination/KeeperServer.h +++ b/src/Coordination/KeeperServer.h @@ -38,6 +38,8 @@ private: Poco::Logger * log; + /// Callback func which is called by NuRaft on all internal events. + /// Used to determine the moment when raft is ready to server new requests nuraft::cb_func::ReturnCode callbackFunc(nuraft::cb_func::Type type, nuraft::cb_func::Param * param); /// Almost copy-paste from nuraft::launcher, but with separated server init and start @@ -57,18 +59,25 @@ public: SnapshotsQueue & snapshots_queue_, bool standalone_keeper); + /// Load state machine from the latest snapshot and load log storage. Start NuRaft with required settings. void startup(); + /// Put local read request and execute in state machine directly and response into + /// responses queue void putLocalReadRequest(const KeeperStorage::RequestForSession & request); + /// Put batch of requests into Raft and get result of put. Responses will be set separately into + /// responses_queue. RaftAppendResult putRequestBatch(const KeeperStorage::RequestsForSessions & requests); + /// Return set of the non-active sessions std::unordered_set getDeadSessions(); bool isLeader() const; bool isLeaderAlive() const; + /// Wait server initialization (see callbackFunc) void waitInit(); void shutdown(); diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index a76b86a8171..2e5e7214e3e 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -14,29 +14,32 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -KeeperStorage::RequestForSession parseRequest(nuraft::buffer & data) +namespace { - ReadBufferFromNuraftBuffer buffer(data); - KeeperStorage::RequestForSession request_for_session; - readIntBinary(request_for_session.session_id, buffer); + KeeperStorage::RequestForSession parseRequest(nuraft::buffer & data) + { + ReadBufferFromNuraftBuffer buffer(data); + KeeperStorage::RequestForSession request_for_session; + readIntBinary(request_for_session.session_id, buffer); - int32_t length; - Coordination::read(length, buffer); + int32_t length; + Coordination::read(length, buffer); - int32_t xid; - Coordination::read(xid, buffer); + int32_t xid; + Coordination::read(xid, buffer); - Coordination::OpNum opnum; + Coordination::OpNum opnum; - Coordination::read(opnum, buffer); + Coordination::read(opnum, buffer); - request_for_session.request = Coordination::ZooKeeperRequestFactory::instance().get(opnum); - request_for_session.request->xid = xid; - request_for_session.request->readImpl(buffer); - return request_for_session; + request_for_session.request = Coordination::ZooKeeperRequestFactory::instance().get(opnum); + request_for_session.request->xid = xid; + request_for_session.request->readImpl(buffer); + return request_for_session; + } } - KeeperStateMachine::KeeperStateMachine( +KeeperStateMachine::KeeperStateMachine( ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, const std::string & snapshots_path_, @@ -58,6 +61,7 @@ void KeeperStateMachine::init() LOG_DEBUG(log, "Totally have {} snapshots", snapshot_manager.totalSnapshots()); bool loaded = false; bool has_snapshots = snapshot_manager.totalSnapshots() != 0; + /// Deserialize latest snapshot from disk while (snapshot_manager.totalSnapshots() != 0) { uint64_t latest_log_index = snapshot_manager.getLatestSnapshotIndex(); @@ -97,6 +101,7 @@ void KeeperStateMachine::init() nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, nuraft::buffer & data) { auto request_for_session = parseRequest(data); + /// Special processing of session_id request if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID) { const Coordination::ZooKeeperSessionIDRequest & session_id_request = dynamic_cast(*request_for_session.request); @@ -136,7 +141,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s) { LOG_DEBUG(log, "Applying snapshot {}", s.get_last_log_idx()); nuraft::ptr latest_snapshot_ptr; - { + { /// save snapshot into memory std::lock_guard lock(snapshots_lock); if (s.get_last_log_idx() != latest_snapshot_meta->get_last_log_idx()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Required to apply snapshot with last log index {}, but our last log index is {}", @@ -144,10 +149,11 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s) latest_snapshot_ptr = latest_snapshot_buf; } - { + { /// deserialize and apply snapshot to storage std::lock_guard lock(storage_lock); std::tie(latest_snapshot_meta, storage) = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_ptr); } + last_committed_idx = s.get_last_log_idx(); return true; } @@ -168,18 +174,19 @@ void KeeperStateMachine::create_snapshot( nuraft::ptr snp_buf = s.serialize(); auto snapshot_meta_copy = nuraft::snapshot::deserialize(*snp_buf); CreateSnapshotTask snapshot_task; - { + { /// lock storage for a short period time to turn on "snapshot mode". After that we can read consistent storage state without locking. std::lock_guard lock(storage_lock); snapshot_task.snapshot = std::make_shared(storage.get(), snapshot_meta_copy); } + /// create snapshot task for background execution (in snapshot thread) snapshot_task.create_snapshot = [this, when_done] (KeeperStorageSnapshotPtr && snapshot) { nuraft::ptr exception(nullptr); bool ret = true; try { - { + { /// Read storage data without locks and create snapshot std::lock_guard lock(snapshots_lock); auto snapshot_buf = snapshot_manager.serializeSnapshotToBuffer(*snapshot); auto result_path = snapshot_manager.serializeSnapshotBufferToDisk(*snapshot_buf, snapshot->snapshot_meta->get_last_log_idx()); @@ -192,6 +199,7 @@ void KeeperStateMachine::create_snapshot( { /// Must do it with lock (clearing elements from list) std::lock_guard lock(storage_lock); + /// Turn off "snapshot mode" and clear outdate part of storage state storage->clearGarbageAfterSnapshot(); /// Destroy snapshot with lock snapshot.reset(); @@ -209,7 +217,9 @@ void KeeperStateMachine::create_snapshot( when_done(ret, exception); }; + LOG_DEBUG(log, "In memory snapshot {} created, queueing task to flash to disk", s.get_last_log_idx()); + /// Flush snapshot to disk in a separate thread. snapshots_queue.push(std::move(snapshot_task)); } @@ -224,7 +234,7 @@ void KeeperStateMachine::save_logical_snp_obj( nuraft::ptr cloned_buffer; nuraft::ptr cloned_meta; - if (obj_id == 0) + if (obj_id == 0) /// Fake snapshot required by NuRaft at startup { std::lock_guard lock(storage_lock); KeeperStorageSnapshot snapshot(storage.get(), s.get_last_log_idx()); @@ -232,15 +242,18 @@ void KeeperStateMachine::save_logical_snp_obj( } else { + /// copy snapshot into memory cloned_buffer = nuraft::buffer::clone(data); } + /// copy snapshot meta into memory nuraft::ptr snp_buf = s.serialize(); cloned_meta = nuraft::snapshot::deserialize(*snp_buf); try { std::lock_guard lock(snapshots_lock); + /// Serialize snapshot to disk and switch in memory pointers. auto result_path = snapshot_manager.serializeSnapshotBufferToDisk(*cloned_buffer, s.get_last_log_idx()); latest_snapshot_buf = cloned_buffer; latest_snapshot_meta = cloned_meta; @@ -262,7 +275,7 @@ int KeeperStateMachine::read_logical_snp_obj( { LOG_DEBUG(log, "Reading snapshot {} obj_id {}", s.get_last_log_idx(), obj_id); - if (obj_id == 0) + if (obj_id == 0) /// Fake snapshot required by NuRaft at startup { data_out = nuraft::buffer::alloc(sizeof(int32_t)); nuraft::buffer_serializer bs(data_out); @@ -272,6 +285,8 @@ int KeeperStateMachine::read_logical_snp_obj( else { std::lock_guard lock(snapshots_lock); + /// Our snapshot is not equal to required. Maybe we still creating it in the background. + /// Let's wait and NuRaft will retry this call. if (s.get_last_log_idx() != latest_snapshot_meta->get_last_log_idx()) { LOG_WARNING(log, "Required to apply snapshot with last log index {}, but our last log index is {}. Will ignore this one and retry", @@ -281,11 +296,13 @@ int KeeperStateMachine::read_logical_snp_obj( data_out = nuraft::buffer::clone(*latest_snapshot_buf); is_last_obj = true; } + return 1; } void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSession & request_for_session) { + /// Pure local request, just process it with storage KeeperStorage::ResponsesForSessions responses; { std::lock_guard lock(storage_lock); diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index fb46f507baf..06be270b66e 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -13,6 +13,8 @@ namespace DB using ResponsesQueue = ThreadSafeQueue; using SnapshotsQueue = ConcurrentBoundedQueue; +/// ClickHouse Keeper state machine. Wrapper for KeeperStorage. +/// Responsible for entries commit, snapshots creation and so on. class KeeperStateMachine : public nuraft::state_machine { public: @@ -21,24 +23,30 @@ public: const std::string & snapshots_path_, const CoordinationSettingsPtr & coordination_settings_, const std::string & superdigest_ = ""); + /// Read state from the latest snapshot void init(); + /// Currently not supported nuraft::ptr pre_commit(const uint64_t /*log_idx*/, nuraft::buffer & /*data*/) override { return nullptr; } nuraft::ptr commit(const uint64_t log_idx, nuraft::buffer & data) override; + /// Currently not supported void rollback(const uint64_t /*log_idx*/, nuraft::buffer & /*data*/) override {} uint64_t last_commit_index() override { return last_committed_idx; } + /// Apply preliminarily saved (save_logical_snp_obj) snapshot to our state. bool apply_snapshot(nuraft::snapshot & s) override; nuraft::ptr last_snapshot() override; + /// Create new snapshot from current state. void create_snapshot( nuraft::snapshot & s, nuraft::async_result::handler_type & when_done) override; + /// Save snapshot which was send by leader to us. After that we will apply it in apply_snapshot. void save_logical_snp_obj( nuraft::snapshot & s, uint64_t & obj_id, @@ -46,6 +54,8 @@ public: bool is_first_obj, bool is_last_obj) override; + /// Better name is `serialize snapshot` -- save existing snapshot (created by create_snapshot) into + /// in-memory buffer data_out. int read_logical_snp_obj( nuraft::snapshot & s, void* & user_snp_ctx, @@ -58,6 +68,7 @@ public: return *storage; } + /// Process local read request void processReadRequest(const KeeperStorage::RequestForSession & request_for_session); std::unordered_set getDeadSessions(); @@ -66,18 +77,25 @@ public: private: + /// In our state machine we always have a single snapshot which is stored + /// in memory in compressed (serialized) format. SnapshotMetadataPtr latest_snapshot_meta = nullptr; nuraft::ptr latest_snapshot_buf = nullptr; CoordinationSettingsPtr coordination_settings; + /// Main state machine logic KeeperStoragePtr storage; + /// Save/Load and Serialize/Deserialize logic for snapshots. KeeperSnapshotManager snapshot_manager; + /// Put processed responses into this queue ResponsesQueue & responses_queue; + /// Snapshots to create by snapshot thread SnapshotsQueue & snapshots_queue; + /// Mutex for snapshots std::mutex snapshots_lock; @@ -88,6 +106,7 @@ private: std::atomic last_committed_idx; Poco::Logger * log; + /// Special part of ACL system -- superdigest specified in server config. const std::string superdigest; }; diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 320754c7d31..3053ce17ad1 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -151,19 +151,39 @@ static KeeperStorage::ResponsesForSessions processWatchesImpl(const String & pat } auto parent_path = parentPath(path); - it = list_watches.find(parent_path); - if (it != list_watches.end()) - { - std::shared_ptr watch_list_response = std::make_shared(); - watch_list_response->path = parent_path; - watch_list_response->xid = Coordination::WATCH_XID; - watch_list_response->zxid = -1; - watch_list_response->type = Coordination::Event::CHILD; - watch_list_response->state = Coordination::State::CONNECTED; - for (auto watcher_session : it->second) - result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_list_response}); - list_watches.erase(it); + Strings paths_to_check_for_list_watches; + if (event_type == Coordination::Event::CREATED) + { + paths_to_check_for_list_watches.push_back(parent_path); /// Trigger list watches for parent + } + else if (event_type == Coordination::Event::DELETED) + { + paths_to_check_for_list_watches.push_back(path); /// Trigger both list watches for this path + paths_to_check_for_list_watches.push_back(parent_path); /// And for parent path + } + /// CHANGED event never trigger list wathes + + for (const auto & path_to_check : paths_to_check_for_list_watches) + { + it = list_watches.find(path_to_check); + if (it != list_watches.end()) + { + std::shared_ptr watch_list_response = std::make_shared(); + watch_list_response->path = path_to_check; + watch_list_response->xid = Coordination::WATCH_XID; + watch_list_response->zxid = -1; + if (path_to_check == parent_path) + watch_list_response->type = Coordination::Event::CHILD; + else + watch_list_response->type = Coordination::Event::DELETED; + + watch_list_response->state = Coordination::State::CONNECTED; + for (auto watcher_session : it->second) + result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_list_response}); + + list_watches.erase(it); + } } return result; } @@ -177,32 +197,32 @@ KeeperStorage::KeeperStorage(int64_t tick_time_ms, const String & superdigest_) using Undo = std::function; -struct KeeperStorageRequest +struct KeeperStorageRequestProcessor { Coordination::ZooKeeperRequestPtr zk_request; - explicit KeeperStorageRequest(const Coordination::ZooKeeperRequestPtr & zk_request_) + explicit KeeperStorageRequestProcessor(const Coordination::ZooKeeperRequestPtr & zk_request_) : zk_request(zk_request_) {} virtual std::pair process(KeeperStorage & storage, int64_t zxid, int64_t session_id) const = 0; virtual KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & /*watches*/, KeeperStorage::Watches & /*list_watches*/) const { return {}; } virtual bool checkAuth(KeeperStorage & /*storage*/, int64_t /*session_id*/) const { return true; } - virtual ~KeeperStorageRequest() = default; + virtual ~KeeperStorageRequestProcessor() = default; }; -struct KeeperStorageHeartbeatRequest final : public KeeperStorageRequest +struct KeeperStorageHeartbeatRequestProcessor final : public KeeperStorageRequestProcessor { - using KeeperStorageRequest::KeeperStorageRequest; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; std::pair process(KeeperStorage & /* storage */, int64_t /* zxid */, int64_t /* session_id */) const override { return {zk_request->makeResponse(), {}}; } }; -struct KeeperStorageSyncRequest final : public KeeperStorageRequest +struct KeeperStorageSyncRequestProcessor final : public KeeperStorageRequestProcessor { - using KeeperStorageRequest::KeeperStorageRequest; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; std::pair process(KeeperStorage & /* storage */, int64_t /* zxid */, int64_t /* session_id */) const override { auto response = zk_request->makeResponse(); @@ -212,9 +232,9 @@ struct KeeperStorageSyncRequest final : public KeeperStorageRequest } }; -struct KeeperStorageCreateRequest final : public KeeperStorageRequest +struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestProcessor { - using KeeperStorageRequest::KeeperStorageRequest; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override { @@ -363,7 +383,7 @@ struct KeeperStorageCreateRequest final : public KeeperStorageRequest } }; -struct KeeperStorageGetRequest final : public KeeperStorageRequest +struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProcessor { bool checkAuth(KeeperStorage & storage, int64_t session_id) const override @@ -381,7 +401,7 @@ struct KeeperStorageGetRequest final : public KeeperStorageRequest return checkACL(Coordination::ACL::Read, node_acls, session_auths); } - using KeeperStorageRequest::KeeperStorageRequest; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; std::pair process(KeeperStorage & storage, int64_t /* zxid */, int64_t /* session_id */) const override { auto & container = storage.container; @@ -423,7 +443,7 @@ namespace } } -struct KeeperStorageRemoveRequest final : public KeeperStorageRequest +struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestProcessor { bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { @@ -440,7 +460,7 @@ struct KeeperStorageRemoveRequest final : public KeeperStorageRequest return checkACL(Coordination::ACL::Delete, node_acls, session_auths); } - using KeeperStorageRequest::KeeperStorageRequest; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; std::pair process(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/) const override { auto & container = storage.container; @@ -520,9 +540,9 @@ struct KeeperStorageRemoveRequest final : public KeeperStorageRequest } }; -struct KeeperStorageExistsRequest final : public KeeperStorageRequest +struct KeeperStorageExistsRequestProcessor final : public KeeperStorageRequestProcessor { - using KeeperStorageRequest::KeeperStorageRequest; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /* session_id */) const override { auto & container = storage.container; @@ -546,7 +566,7 @@ struct KeeperStorageExistsRequest final : public KeeperStorageRequest } }; -struct KeeperStorageSetRequest final : public KeeperStorageRequest +struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProcessor { bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { @@ -563,7 +583,7 @@ struct KeeperStorageSetRequest final : public KeeperStorageRequest return checkACL(Coordination::ACL::Write, node_acls, session_auths); } - using KeeperStorageRequest::KeeperStorageRequest; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; std::pair process(KeeperStorage & storage, int64_t zxid, int64_t /* session_id */) const override { auto & container = storage.container; @@ -624,7 +644,7 @@ struct KeeperStorageSetRequest final : public KeeperStorageRequest } }; -struct KeeperStorageListRequest final : public KeeperStorageRequest +struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProcessor { bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { @@ -641,7 +661,7 @@ struct KeeperStorageListRequest final : public KeeperStorageRequest return checkACL(Coordination::ACL::Read, node_acls, session_auths); } - using KeeperStorageRequest::KeeperStorageRequest; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/) const override { auto & container = storage.container; @@ -669,7 +689,7 @@ struct KeeperStorageListRequest final : public KeeperStorageRequest } }; -struct KeeperStorageCheckRequest final : public KeeperStorageRequest +struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestProcessor { bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { @@ -686,7 +706,7 @@ struct KeeperStorageCheckRequest final : public KeeperStorageRequest return checkACL(Coordination::ACL::Read, node_acls, session_auths); } - using KeeperStorageRequest::KeeperStorageRequest; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/) const override { auto & container = storage.container; @@ -713,7 +733,7 @@ struct KeeperStorageCheckRequest final : public KeeperStorageRequest }; -struct KeeperStorageSetACLRequest final : public KeeperStorageRequest +struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestProcessor { bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { @@ -730,7 +750,7 @@ struct KeeperStorageSetACLRequest final : public KeeperStorageRequest return checkACL(Coordination::ACL::Admin, node_acls, session_auths); } - using KeeperStorageRequest::KeeperStorageRequest; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t session_id) const override { @@ -777,7 +797,7 @@ struct KeeperStorageSetACLRequest final : public KeeperStorageRequest } }; -struct KeeperStorageGetACLRequest final : public KeeperStorageRequest +struct KeeperStorageGetACLRequestProcessor final : public KeeperStorageRequestProcessor { bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { @@ -794,7 +814,7 @@ struct KeeperStorageGetACLRequest final : public KeeperStorageRequest /// LOL, GetACL require more permissions, then SetACL... return checkACL(Coordination::ACL::Admin | Coordination::ACL::Read, node_acls, session_auths); } - using KeeperStorageRequest::KeeperStorageRequest; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/) const override { @@ -817,7 +837,7 @@ struct KeeperStorageGetACLRequest final : public KeeperStorageRequest } }; -struct KeeperStorageMultiRequest final : public KeeperStorageRequest +struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestProcessor { bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { @@ -827,9 +847,9 @@ struct KeeperStorageMultiRequest final : public KeeperStorageRequest return true; } - std::vector concrete_requests; - explicit KeeperStorageMultiRequest(const Coordination::ZooKeeperRequestPtr & zk_request_) - : KeeperStorageRequest(zk_request_) + std::vector concrete_requests; + explicit KeeperStorageMultiRequestProcessor(const Coordination::ZooKeeperRequestPtr & zk_request_) + : KeeperStorageRequestProcessor(zk_request_) { Coordination::ZooKeeperMultiRequest & request = dynamic_cast(*zk_request); concrete_requests.reserve(request.requests.size()); @@ -839,19 +859,19 @@ struct KeeperStorageMultiRequest final : public KeeperStorageRequest auto sub_zk_request = std::dynamic_pointer_cast(sub_request); if (sub_zk_request->getOpNum() == Coordination::OpNum::Create) { - concrete_requests.push_back(std::make_shared(sub_zk_request)); + concrete_requests.push_back(std::make_shared(sub_zk_request)); } else if (sub_zk_request->getOpNum() == Coordination::OpNum::Remove) { - concrete_requests.push_back(std::make_shared(sub_zk_request)); + concrete_requests.push_back(std::make_shared(sub_zk_request)); } else if (sub_zk_request->getOpNum() == Coordination::OpNum::Set) { - concrete_requests.push_back(std::make_shared(sub_zk_request)); + concrete_requests.push_back(std::make_shared(sub_zk_request)); } else if (sub_zk_request->getOpNum() == Coordination::OpNum::Check) { - concrete_requests.push_back(std::make_shared(sub_zk_request)); + concrete_requests.push_back(std::make_shared(sub_zk_request)); } else throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal command as part of multi ZooKeeper request {}", sub_zk_request->getOpNum()); @@ -923,18 +943,18 @@ struct KeeperStorageMultiRequest final : public KeeperStorageRequest } }; -struct KeeperStorageCloseRequest final : public KeeperStorageRequest +struct KeeperStorageCloseRequestProcessor final : public KeeperStorageRequestProcessor { - using KeeperStorageRequest::KeeperStorageRequest; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; std::pair process(KeeperStorage &, int64_t, int64_t) const override { throw DB::Exception("Called process on close request", ErrorCodes::LOGICAL_ERROR); } }; -struct KeeperStorageAuthRequest final : public KeeperStorageRequest +struct KeeperStorageAuthRequestProcessor final : public KeeperStorageRequestProcessor { - using KeeperStorageRequest::KeeperStorageRequest; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t session_id) const override { Coordination::ZooKeeperAuthRequest & auth_request = dynamic_cast(*zk_request); @@ -988,20 +1008,20 @@ void KeeperStorage::finalize() } -class KeeperWrapperFactory final : private boost::noncopyable +class KeeperStorageRequestProcessorsFactory final : private boost::noncopyable { public: - using Creator = std::function; + using Creator = std::function; using OpNumToRequest = std::unordered_map; - static KeeperWrapperFactory & instance() + static KeeperStorageRequestProcessorsFactory & instance() { - static KeeperWrapperFactory factory; + static KeeperStorageRequestProcessorsFactory factory; return factory; } - KeeperStorageRequestPtr get(const Coordination::ZooKeeperRequestPtr & zk_request) const + KeeperStorageRequestProcessorPtr get(const Coordination::ZooKeeperRequestPtr & zk_request) const { auto it = op_num_to_request.find(zk_request->getOpNum()); if (it == op_num_to_request.end()) @@ -1018,33 +1038,33 @@ public: private: OpNumToRequest op_num_to_request; - KeeperWrapperFactory(); + KeeperStorageRequestProcessorsFactory(); }; template -void registerKeeperRequestWrapper(KeeperWrapperFactory & factory) +void registerKeeperRequestProcessor(KeeperStorageRequestProcessorsFactory & factory) { factory.registerRequest(num, [] (const Coordination::ZooKeeperRequestPtr & zk_request) { return std::make_shared(zk_request); }); } -KeeperWrapperFactory::KeeperWrapperFactory() +KeeperStorageRequestProcessorsFactory::KeeperStorageRequestProcessorsFactory() { - registerKeeperRequestWrapper(*this); - registerKeeperRequestWrapper(*this); - registerKeeperRequestWrapper(*this); - registerKeeperRequestWrapper(*this); - registerKeeperRequestWrapper(*this); - registerKeeperRequestWrapper(*this); - registerKeeperRequestWrapper(*this); - registerKeeperRequestWrapper(*this); - registerKeeperRequestWrapper(*this); - registerKeeperRequestWrapper(*this); - registerKeeperRequestWrapper(*this); - registerKeeperRequestWrapper(*this); - registerKeeperRequestWrapper(*this); - registerKeeperRequestWrapper(*this); - registerKeeperRequestWrapper(*this); + registerKeeperRequestProcessor(*this); + registerKeeperRequestProcessor(*this); + registerKeeperRequestProcessor(*this); + registerKeeperRequestProcessor(*this); + registerKeeperRequestProcessor(*this); + registerKeeperRequestProcessor(*this); + registerKeeperRequestProcessor(*this); + registerKeeperRequestProcessor(*this); + registerKeeperRequestProcessor(*this); + registerKeeperRequestProcessor(*this); + registerKeeperRequestProcessor(*this); + registerKeeperRequestProcessor(*this); + registerKeeperRequestProcessor(*this); + registerKeeperRequestProcessor(*this); + registerKeeperRequestProcessor(*this); } @@ -1059,7 +1079,8 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina } session_expiry_queue.update(session_id, session_and_timeout[session_id]); - if (zk_request->getOpNum() == Coordination::OpNum::Close) + + if (zk_request->getOpNum() == Coordination::OpNum::Close) /// Close request is special { auto it = ephemerals.find(session_id); if (it != ephemerals.end()) @@ -1092,21 +1113,21 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina session_and_timeout.erase(session_id); results.push_back(ResponseForSession{session_id, response}); } - else if (zk_request->getOpNum() == Coordination::OpNum::Heartbeat) + else if (zk_request->getOpNum() == Coordination::OpNum::Heartbeat) /// Heartbeat request is also special { - KeeperStorageRequestPtr storage_request = KeeperWrapperFactory::instance().get(zk_request); + KeeperStorageRequestProcessorPtr storage_request = KeeperStorageRequestProcessorsFactory::instance().get(zk_request); auto [response, _] = storage_request->process(*this, zxid, session_id); response->xid = zk_request->xid; response->zxid = getZXID(); results.push_back(ResponseForSession{session_id, response}); } - else + else /// normal requests proccession { - KeeperStorageRequestPtr storage_request = KeeperWrapperFactory::instance().get(zk_request); + KeeperStorageRequestProcessorPtr request_processor = KeeperStorageRequestProcessorsFactory::instance().get(zk_request); Coordination::ZooKeeperResponsePtr response; - if (check_acl && !storage_request->checkAuth(*this, session_id)) + if (check_acl && !request_processor->checkAuth(*this, session_id)) { response = zk_request->makeResponse(); /// Original ZooKeeper always throws no auth, even when user provided some credentials @@ -1114,9 +1135,10 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina } else { - std::tie(response, std::ignore) = storage_request->process(*this, zxid, session_id); + std::tie(response, std::ignore) = request_processor->process(*this, zxid, session_id); } + /// Watches for this requests are added to the watches lists if (zk_request->has_watch) { if (response->error == Coordination::Error::ZOK) @@ -1135,9 +1157,10 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina } } + /// If this requests processed successfully we need to check watches if (response->error == Coordination::Error::ZOK) { - auto watch_responses = storage_request->processWatches(watches, list_watches); + auto watch_responses = request_processor->processWatches(watches, list_watches); results.insert(results.end(), watch_responses.begin(), watch_responses.end()); } @@ -1153,11 +1176,13 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina void KeeperStorage::clearDeadWatches(int64_t session_id) { + /// Clear all watches for this session auto watches_it = sessions_and_watchers.find(session_id); if (watches_it != sessions_and_watchers.end()) { for (const auto & watch_path : watches_it->second) { + /// Maybe it's a normal watch auto watch = watches.find(watch_path); if (watch != watches.end()) { @@ -1173,6 +1198,7 @@ void KeeperStorage::clearDeadWatches(int64_t session_id) watches.erase(watch); } + /// Maybe it's a list watch auto list_watch = list_watches.find(watch_path); if (list_watch != list_watches.end()) { @@ -1188,6 +1214,7 @@ void KeeperStorage::clearDeadWatches(int64_t session_id) list_watches.erase(list_watch); } } + sessions_and_watchers.erase(watches_it); } } diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index e3cb0f59fdc..1e925a0634e 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -15,14 +15,17 @@ namespace DB { using namespace DB; -struct KeeperStorageRequest; -using KeeperStorageRequestPtr = std::shared_ptr; +struct KeeperStorageRequestProcessor; +using KeeperStorageRequestProcessorPtr = std::shared_ptr; using ResponseCallback = std::function; using ChildrenSet = std::unordered_set; using SessionAndTimeout = std::unordered_map; struct KeeperStorageSnapshot; +/// Keeper state machine almost equal to the ZooKeeper's state machine. +/// Implements all logic of operations, data changes, sessions allocation. +/// In-memory and not thread safe. class KeeperStorage { public: @@ -77,21 +80,34 @@ public: using Watches = std::map; + /// Main hashtable with nodes. Contain all information about data. + /// All other structures expect session_and_timeout can be restored from + /// container. Container container; + + /// Mapping session_id -> set of ephemeral nodes paths Ephemerals ephemerals; + /// Mapping sessuib_id -> set of watched nodes paths SessionAndWatcher sessions_and_watchers; + /// Expiration queue for session, allows to get dead sessions at some point of time SessionExpiryQueue session_expiry_queue; + /// All active sessions with timeout SessionAndTimeout session_and_timeout; + + /// ACLMap for more compact ACLs storage inside nodes. ACLMap acl_map; + /// Global id of all requests applied to storage int64_t zxid{0}; bool finalized{false}; + /// Currently active watches (node_path -> subscribed sessions) Watches watches; Watches list_watches; /// Watches for 'list' request (watches on children). void clearDeadWatches(int64_t session_id); + /// Get current zxid int64_t getZXID() const { return zxid; @@ -102,6 +118,7 @@ public: public: KeeperStorage(int64_t tick_time_ms, const String & superdigest_); + /// Allocate new session id with the specified timeouts int64_t getSessionID(int64_t session_timeout_ms) { auto result = session_id_counter++; @@ -110,21 +127,28 @@ public: return result; } + /// Add session id. Used when restoring KeeperStorage from snapshot. void addSessionID(int64_t session_id, int64_t session_timeout_ms) { session_and_timeout.emplace(session_id, session_timeout_ms); session_expiry_queue.update(session_id, session_timeout_ms); } + /// Process user request and return response. + /// check_acl = false only when converting data from ZooKeeper. ResponsesForSessions processRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, std::optional new_last_zxid, bool check_acl = true); void finalize(); + /// Set of methods for creating snapshots + + /// Turn on snapshot mode, so data inside Container is not deleted, but replaced with new version. void enableSnapshotMode() { container.enableSnapshotMode(); } + /// Turn off snapshot mode. void disableSnapshotMode() { container.disableSnapshotMode(); @@ -135,16 +159,19 @@ public: return container.begin(); } + /// Clear outdated data from internal container. void clearGarbageAfterSnapshot() { container.clearOutdatedNodes(); } + /// Get all active sessions const SessionAndTimeout & getActiveSessions() const { return session_and_timeout; } + /// Get all dead sessions std::unordered_set getDeadSessions() { return session_expiry_queue.getExpiredSessions(); diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_for_build.cpp index 9a744d2bbed..47eadbf9720 100644 --- a/src/Coordination/tests/gtest_for_build.cpp +++ b/src/Coordination/tests/gtest_for_build.cpp @@ -1299,6 +1299,82 @@ TEST(CoordinationTest, TestEphemeralNodeRemove) } +TEST(CoordinationTest, TestRotateIntervalChanges) +{ + using namespace Coordination; + ChangelogDirTest snapshots("./logs"); + { + DB::KeeperLogStore changelog("./logs", 100, true); + + changelog.init(0, 3); + for (size_t i = 1; i < 55; ++i) + { + std::shared_ptr request = std::make_shared(); + request->path = "/hello_" + std::to_string(i); + auto entry = getLogEntryFromZKRequest(0, 1, request); + changelog.append(entry); + changelog.end_of_append_batch(0, 0); + } + } + + EXPECT_TRUE(fs::exists("./logs/changelog_0_99.bin")); + + DB::KeeperLogStore changelog_1("./logs", 10, true); + changelog_1.init(0, 50); + for (size_t i = 0; i < 55; ++i) + { + std::shared_ptr request = std::make_shared(); + request->path = "/hello_" + std::to_string(100 + i); + auto entry = getLogEntryFromZKRequest(0, 1, request); + changelog_1.append(entry); + changelog_1.end_of_append_batch(0, 0); + } + + EXPECT_TRUE(fs::exists("./logs/changelog_0_99.bin")); + EXPECT_TRUE(fs::exists("./logs/changelog_100_109.bin")); + + DB::KeeperLogStore changelog_2("./logs", 7, true); + changelog_2.init(98, 55); + + for (size_t i = 0; i < 17; ++i) + { + std::shared_ptr request = std::make_shared(); + request->path = "/hello_" + std::to_string(200 + i); + auto entry = getLogEntryFromZKRequest(0, 1, request); + changelog_2.append(entry); + changelog_2.end_of_append_batch(0, 0); + } + + changelog_2.compact(105); + EXPECT_FALSE(fs::exists("./logs/changelog_0_99.bin")); + EXPECT_TRUE(fs::exists("./logs/changelog_100_109.bin")); + EXPECT_TRUE(fs::exists("./logs/changelog_110_116.bin")); + EXPECT_TRUE(fs::exists("./logs/changelog_117_123.bin")); + EXPECT_TRUE(fs::exists("./logs/changelog_124_130.bin")); + + DB::KeeperLogStore changelog_3("./logs", 5, true); + changelog_3.init(116, 3); + for (size_t i = 0; i < 17; ++i) + { + std::shared_ptr request = std::make_shared(); + request->path = "/hello_" + std::to_string(300 + i); + auto entry = getLogEntryFromZKRequest(0, 1, request); + changelog_3.append(entry); + changelog_3.end_of_append_batch(0, 0); + } + + changelog_3.compact(125); + EXPECT_FALSE(fs::exists("./logs/changelog_100_109.bin")); + EXPECT_FALSE(fs::exists("./logs/changelog_110_116.bin")); + EXPECT_FALSE(fs::exists("./logs/changelog_117_123.bin")); + + EXPECT_TRUE(fs::exists("./logs/changelog_124_130.bin")); + EXPECT_TRUE(fs::exists("./logs/changelog_131_135.bin")); + EXPECT_TRUE(fs::exists("./logs/changelog_136_140.bin")); + EXPECT_TRUE(fs::exists("./logs/changelog_141_145.bin")); +} + + int main(int argc, char ** argv) { Poco::AutoPtr channel(new Poco::ConsoleChannel(std::cerr)); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 2eca7c038ae..3d52f0d3bd5 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -97,7 +97,7 @@ class IColumn; M(Bool, optimize_move_to_prewhere_if_final, false, "If query has `FINAL`, the optimization `move_to_prewhere` is not always correct and it is enabled only if both settings `optimize_move_to_prewhere` and `optimize_move_to_prewhere_if_final` are turned on", 0) \ \ M(UInt64, replication_alter_partitions_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) \ - M(UInt64, replication_alter_columns_timeout, 60, "Wait for actions to change the table structure within the specified number of seconds. 0 - wait unlimited time.", 0) \ + M(Int64, replication_wait_for_inactive_replica_timeout, 120, "Wait for inactive replica to execute ALTER/OPTIMIZE. Time in seconds, 0 - do not wait, negative - wait for unlimited time.", 0) \ \ M(LoadBalancing, load_balancing, LoadBalancing::RANDOM, "Which replicas (among healthy replicas) to preferably send a query to (on the first attempt) for distributed processing.", 0) \ M(UInt64, load_balancing_first_offset, 0, "Which replica to preferably send a query when FIRST_OR_RANDOM load balancing strategy is used.", 0) \ @@ -125,7 +125,7 @@ class IColumn; M(UInt64, parallel_distributed_insert_select, 0, "Process distributed INSERT SELECT query in the same cluster on local tables on every shard, if 1 SELECT is executed on each shard, if 2 SELECT and INSERT is executed on each shard", 0) \ M(UInt64, distributed_group_by_no_merge, 0, "If 1, Do not merge aggregation states from different servers for distributed queries (shards will process query up to the Complete stage, initiator just proxies the data from the shards). If 2 the initiator will apply ORDER BY and LIMIT stages (it is not in case when shard process query up to the Complete stage)", 0) \ M(UInt64, distributed_push_down_limit, 1, "If 1, LIMIT will be applied on each shard separatelly. Usually you don't need to use it, since this will be done automatically if it is possible, i.e. for simple query SELECT FROM LIMIT.", 0) \ - M(Bool, optimize_distributed_group_by_sharding_key, false, "Optimize GROUP BY sharding_key queries (by avoiding costly aggregation on the initiator server).", 0) \ + M(Bool, optimize_distributed_group_by_sharding_key, true, "Optimize GROUP BY sharding_key queries (by avoiding costly aggregation on the initiator server).", 0) \ M(UInt64, optimize_skip_unused_shards_limit, 1000, "Limit for number of sharding key values, turns off optimize_skip_unused_shards if the limit is reached", 0) \ M(Bool, optimize_skip_unused_shards, false, "Assumes that data is distributed by sharding_key. Optimization to skip unused shards if SELECT query filters by sharding_key.", 0) \ M(Bool, optimize_skip_unused_shards_rewrite_in, true, "Rewrite IN in query for remote shards to exclude values that does not belong to the shard (requires optimize_skip_unused_shards)", 0) \ @@ -482,6 +482,8 @@ class IColumn; M(UInt64, distributed_ddl_entry_format_version, 1, "Version of DDL entry to write into ZooKeeper", 0) \ M(UInt64, external_storage_max_read_rows, 0, "Limit maximum number of rows when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializedMySQL. If equal to 0, this setting is disabled", 0) \ M(UInt64, external_storage_max_read_bytes, 0, "Limit maximum number of bytes when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializedMySQL. If equal to 0, this setting is disabled", 0) \ + M(UInt64, external_storage_connect_timeout, 100, "Connect timeout for external database (Now supported for MySQL)", 0) \ + M(UInt64, external_storage_rw_timeout, 1800, "Read / write timeout for external database (Now supported for MySQL)", 0) \ M(UnionMode, union_default_mode, UnionMode::Unspecified, "Set default Union Mode in SelectWithUnion query. Possible values: empty string, 'ALL', 'DISTINCT'. If empty, query without Union Mode will throw exception.", 0) \ M(Bool, optimize_aggregators_of_group_by_keys, true, "Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section", 0) \ M(Bool, optimize_group_by_function_keys, true, "Eliminates functions of other keys in GROUP BY section", 0) \ @@ -512,6 +514,7 @@ class IColumn; M(Bool, allow_experimental_window_functions, true, "Obsolete setting, does nothing.", 0) \ M(HandleKafkaErrorMode, handle_kafka_error_mode, HandleKafkaErrorMode::DEFAULT, "Obsolete setting, does nothing.", 0) \ M(Bool, database_replicated_ddl_output, true, "Obsolete setting, does nothing.", 0) \ + M(UInt64, replication_alter_columns_timeout, 60, "Obsolete setting, does nothing.", 0) \ /** The section above is for obsolete settings. Do not add anything there. */ diff --git a/src/DataStreams/RemoteQueryExecutor.cpp b/src/DataStreams/RemoteQueryExecutor.cpp index 21e874691c1..3b207110a67 100644 --- a/src/DataStreams/RemoteQueryExecutor.cpp +++ b/src/DataStreams/RemoteQueryExecutor.cpp @@ -526,7 +526,18 @@ void RemoteQueryExecutor::tryCancel(const char * reason, std::unique_ptr 0). + /// + /// Also note that it is possible to get this situation even when + /// enough data already had been read. + (*read_context)->setTimer(); (*read_context)->cancel(); + } connections->sendCancel(); diff --git a/src/DataStreams/RemoteQueryExecutorReadContext.cpp b/src/DataStreams/RemoteQueryExecutorReadContext.cpp index c1f415bb597..6bdf52d2831 100644 --- a/src/DataStreams/RemoteQueryExecutorReadContext.cpp +++ b/src/DataStreams/RemoteQueryExecutorReadContext.cpp @@ -100,7 +100,7 @@ void RemoteQueryExecutorReadContext::setConnectionFD(int fd, Poco::Timespan time connection_fd = fd; epoll.add(connection_fd); - receive_timeout = timeout; + receive_timeout_usec = timeout.totalMicroseconds(); connection_fd_description = fd_description; } @@ -157,8 +157,8 @@ void RemoteQueryExecutorReadContext::setTimer() const /// Did not get packet yet. Init timeout for the next async reading. timer.reset(); - if (receive_timeout.totalMicroseconds()) - timer.setRelative(receive_timeout); + if (receive_timeout_usec) + timer.setRelative(receive_timeout_usec); } bool RemoteQueryExecutorReadContext::resumeRoutine() diff --git a/src/DataStreams/RemoteQueryExecutorReadContext.h b/src/DataStreams/RemoteQueryExecutorReadContext.h index 5c56bb73dd6..91e34dbb82c 100644 --- a/src/DataStreams/RemoteQueryExecutorReadContext.h +++ b/src/DataStreams/RemoteQueryExecutorReadContext.h @@ -34,7 +34,8 @@ public: /// This mutex for fiber is needed because fiber could be destroyed in cancel method from another thread. std::mutex fiber_lock; - Poco::Timespan receive_timeout; + /// atomic is required due to data-race between setConnectionFD() and setTimer() from the cancellation path. + std::atomic receive_timeout_usec = 0; IConnections & connections; Poco::Net::Socket * last_used_socket = nullptr; @@ -75,6 +76,7 @@ class RemoteQueryExecutorReadContext { public: void cancel() {} + void setTimer() {} }; } diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp index 8b31da6d2f1..05d4ba0a395 100644 --- a/src/DataStreams/TTLBlockInputStream.cpp +++ b/src/DataStreams/TTLBlockInputStream.cpp @@ -76,17 +76,17 @@ TTLBlockInputStream::TTLBlockInputStream( algorithms.emplace_back(std::make_unique( description, old_ttl_infos.columns_ttl[name], current_time_, - force_, name, default_expression, default_column_name)); + force_, name, default_expression, default_column_name, isCompactPart(data_part))); } } for (const auto & move_ttl : metadata_snapshot_->getMoveTTLs()) - algorithms.emplace_back(std::make_unique( - move_ttl, old_ttl_infos.moves_ttl[move_ttl.result_column], current_time_, force_)); + algorithms.emplace_back(std::make_unique( + move_ttl, TTLUpdateField::MOVES_TTL, move_ttl.result_column, old_ttl_infos.moves_ttl[move_ttl.result_column], current_time_, force_)); for (const auto & recompression_ttl : metadata_snapshot_->getRecompressionTTLs()) - algorithms.emplace_back(std::make_unique( - recompression_ttl, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_)); + algorithms.emplace_back(std::make_unique( + recompression_ttl, TTLUpdateField::RECOMPRESSION_TTL, recompression_ttl.result_column, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_)); } Block reorderColumns(Block block, const Block & header) diff --git a/src/DataStreams/TTLCalcInputStream.cpp b/src/DataStreams/TTLCalcInputStream.cpp new file mode 100644 index 00000000000..2353e9ec259 --- /dev/null +++ b/src/DataStreams/TTLCalcInputStream.cpp @@ -0,0 +1,77 @@ +#include +#include + +namespace DB +{ + +TTLCalcInputStream::TTLCalcInputStream( + const BlockInputStreamPtr & input_, + const MergeTreeData & storage_, + const StorageMetadataPtr & metadata_snapshot_, + const MergeTreeData::MutableDataPartPtr & data_part_, + time_t current_time_, + bool force_) + : data_part(data_part_) + , log(&Poco::Logger::get(storage_.getLogName() + " (TTLCalcInputStream)")) +{ + children.push_back(input_); + header = children.at(0)->getHeader(); + auto old_ttl_infos = data_part->ttl_infos; + + if (metadata_snapshot_->hasRowsTTL()) + { + const auto & rows_ttl = metadata_snapshot_->getRowsTTL(); + algorithms.emplace_back(std::make_unique( + rows_ttl, TTLUpdateField::TABLE_TTL, rows_ttl.result_column, old_ttl_infos.table_ttl, current_time_, force_)); + } + + for (const auto & where_ttl : metadata_snapshot_->getRowsWhereTTLs()) + algorithms.emplace_back(std::make_unique( + where_ttl, TTLUpdateField::ROWS_WHERE_TTL, where_ttl.result_column, old_ttl_infos.rows_where_ttl[where_ttl.result_column], current_time_, force_)); + + for (const auto & group_by_ttl : metadata_snapshot_->getGroupByTTLs()) + algorithms.emplace_back(std::make_unique( + group_by_ttl, TTLUpdateField::GROUP_BY_TTL, group_by_ttl.result_column, old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_)); + + if (metadata_snapshot_->hasAnyColumnTTL()) + { + for (const auto & [name, description] : metadata_snapshot_->getColumnTTLs()) + { + algorithms.emplace_back(std::make_unique( + description, TTLUpdateField::COLUMNS_TTL, name, old_ttl_infos.columns_ttl[name], current_time_, force_)); + } + } + + for (const auto & move_ttl : metadata_snapshot_->getMoveTTLs()) + algorithms.emplace_back(std::make_unique( + move_ttl, TTLUpdateField::MOVES_TTL, move_ttl.result_column, old_ttl_infos.moves_ttl[move_ttl.result_column], current_time_, force_)); + + for (const auto & recompression_ttl : metadata_snapshot_->getRecompressionTTLs()) + algorithms.emplace_back(std::make_unique( + recompression_ttl, TTLUpdateField::RECOMPRESSION_TTL, recompression_ttl.result_column, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_)); +} + +Block TTLCalcInputStream::readImpl() +{ + auto block = children.at(0)->read(); + for (const auto & algorithm : algorithms) + algorithm->execute(block); + + if (!block) + return block; + + Block res; + for (const auto & col : header) + res.insert(block.getByName(col.name)); + + return res; +} + +void TTLCalcInputStream::readSuffixImpl() +{ + data_part->ttl_infos = {}; + for (const auto & algorithm : algorithms) + algorithm->finalize(data_part); +} + +} diff --git a/src/DataStreams/TTLCalcInputStream.h b/src/DataStreams/TTLCalcInputStream.h new file mode 100644 index 00000000000..d1b629c2ad5 --- /dev/null +++ b/src/DataStreams/TTLCalcInputStream.h @@ -0,0 +1,44 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +class TTLCalcInputStream : public IBlockInputStream +{ +public: + TTLCalcInputStream( + const BlockInputStreamPtr & input_, + const MergeTreeData & storage_, + const StorageMetadataPtr & metadata_snapshot_, + const MergeTreeData::MutableDataPartPtr & data_part_, + time_t current_time, + bool force_ + ); + + String getName() const override { return "TTL_CALC"; } + Block getHeader() const override { return header; } + +protected: + Block readImpl() override; + + /// Finalizes ttl infos and updates data part + void readSuffixImpl() override; + +private: + std::vector algorithms; + + /// ttl_infos and empty_columns are updating while reading + const MergeTreeData::MutableDataPartPtr & data_part; + Poco::Logger * log; + Block header; +}; + +} diff --git a/src/DataStreams/TTLColumnAlgorithm.cpp b/src/DataStreams/TTLColumnAlgorithm.cpp index 1318ea382db..71ad2a4e38f 100644 --- a/src/DataStreams/TTLColumnAlgorithm.cpp +++ b/src/DataStreams/TTLColumnAlgorithm.cpp @@ -10,11 +10,13 @@ TTLColumnAlgorithm::TTLColumnAlgorithm( bool force_, const String & column_name_, const ExpressionActionsPtr & default_expression_, - const String & default_column_name_) + const String & default_column_name_, + bool is_compact_part_) : ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_) , column_name(column_name_) , default_expression(default_expression_) , default_column_name(default_column_name_) + , is_compact_part(is_compact_part_) { if (!isMinTTLExpired()) { @@ -40,7 +42,7 @@ void TTLColumnAlgorithm::execute(Block & block) return; /// Later drop full column - if (isMaxTTLExpired()) + if (isMaxTTLExpired() && !is_compact_part) return; auto default_column = executeExpressionAndGetColumn(default_expression, block, default_column_name); diff --git a/src/DataStreams/TTLColumnAlgorithm.h b/src/DataStreams/TTLColumnAlgorithm.h index e09dd663af0..ddf963eaee2 100644 --- a/src/DataStreams/TTLColumnAlgorithm.h +++ b/src/DataStreams/TTLColumnAlgorithm.h @@ -17,7 +17,9 @@ public: bool force_, const String & column_name_, const ExpressionActionsPtr & default_expression_, - const String & default_column_name_); + const String & default_column_name_, + bool is_compact_part_ + ); void execute(Block & block) override; void finalize(const MutableDataPartPtr & data_part) const override; @@ -28,6 +30,7 @@ private: const String default_column_name; bool is_fully_empty = true; + bool is_compact_part; }; } diff --git a/src/DataStreams/TTLUpdateInfoAlgorithm.cpp b/src/DataStreams/TTLUpdateInfoAlgorithm.cpp index d5feb14658b..6a983d052c1 100644 --- a/src/DataStreams/TTLUpdateInfoAlgorithm.cpp +++ b/src/DataStreams/TTLUpdateInfoAlgorithm.cpp @@ -4,8 +4,15 @@ namespace DB { TTLUpdateInfoAlgorithm::TTLUpdateInfoAlgorithm( - const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_) + const TTLDescription & description_, + const TTLUpdateField ttl_update_field_, + const String ttl_update_key_, + const TTLInfo & old_ttl_info_, + time_t current_time_, + bool force_) : ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_) + , ttl_update_field(ttl_update_field_) + , ttl_update_key(ttl_update_key_) { } @@ -22,26 +29,37 @@ void TTLUpdateInfoAlgorithm::execute(Block & block) } } -TTLMoveAlgorithm::TTLMoveAlgorithm( - const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_) - : TTLUpdateInfoAlgorithm(description_, old_ttl_info_, current_time_, force_) +void TTLUpdateInfoAlgorithm::finalize(const MutableDataPartPtr & data_part) const { -} + if (ttl_update_field == TTLUpdateField::RECOMPRESSION_TTL) + { + data_part->ttl_infos.recompression_ttl[ttl_update_key] = new_ttl_info; + } + else if (ttl_update_field == TTLUpdateField::MOVES_TTL) + { + data_part->ttl_infos.moves_ttl[ttl_update_key] = new_ttl_info; + } + else if (ttl_update_field == TTLUpdateField::GROUP_BY_TTL) + { + data_part->ttl_infos.group_by_ttl[ttl_update_key] = new_ttl_info; + data_part->ttl_infos.updatePartMinMaxTTL(new_ttl_info.min, new_ttl_info.max); + } + else if (ttl_update_field == TTLUpdateField::ROWS_WHERE_TTL) + { + data_part->ttl_infos.rows_where_ttl[ttl_update_key] = new_ttl_info; + data_part->ttl_infos.updatePartMinMaxTTL(new_ttl_info.min, new_ttl_info.max); + } + else if (ttl_update_field == TTLUpdateField::TABLE_TTL) + { + data_part->ttl_infos.table_ttl = new_ttl_info; + data_part->ttl_infos.updatePartMinMaxTTL(new_ttl_info.min, new_ttl_info.max); + } + else if (ttl_update_field == TTLUpdateField::COLUMNS_TTL) + { + data_part->ttl_infos.columns_ttl[ttl_update_key] = new_ttl_info; + data_part->ttl_infos.updatePartMinMaxTTL(new_ttl_info.min, new_ttl_info.max); + } -void TTLMoveAlgorithm::finalize(const MutableDataPartPtr & data_part) const -{ - data_part->ttl_infos.moves_ttl[description.result_column] = new_ttl_info; -} - -TTLRecompressionAlgorithm::TTLRecompressionAlgorithm( - const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_) - : TTLUpdateInfoAlgorithm(description_, old_ttl_info_, current_time_, force_) -{ -} - -void TTLRecompressionAlgorithm::finalize(const MutableDataPartPtr & data_part) const -{ - data_part->ttl_infos.recompression_ttl[description.result_column] = new_ttl_info; } } diff --git a/src/DataStreams/TTLUpdateInfoAlgorithm.h b/src/DataStreams/TTLUpdateInfoAlgorithm.h index c1ef0e1c90d..551211fc47f 100644 --- a/src/DataStreams/TTLUpdateInfoAlgorithm.h +++ b/src/DataStreams/TTLUpdateInfoAlgorithm.h @@ -5,28 +5,35 @@ namespace DB { +enum class TTLUpdateField +{ + COLUMNS_TTL, + TABLE_TTL, + ROWS_WHERE_TTL, + MOVES_TTL, + RECOMPRESSION_TTL, + GROUP_BY_TTL, +}; + /// Calculates new ttl_info and does nothing with data. class TTLUpdateInfoAlgorithm : public ITTLAlgorithm { public: - TTLUpdateInfoAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_); + TTLUpdateInfoAlgorithm( + const TTLDescription & description_, + const TTLUpdateField ttl_update_field_, + const String ttl_update_key_, + const TTLInfo & old_ttl_info_, + time_t current_time_, bool force_ + ); void execute(Block & block) override; - void finalize(const MutableDataPartPtr & data_part) const override = 0; + void finalize(const MutableDataPartPtr & data_part) const override; + +private: + const TTLUpdateField ttl_update_field; + const String ttl_update_key; }; -class TTLMoveAlgorithm final : public TTLUpdateInfoAlgorithm -{ -public: - TTLMoveAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_); - void finalize(const MutableDataPartPtr & data_part) const override; -}; - -class TTLRecompressionAlgorithm final : public TTLUpdateInfoAlgorithm -{ -public: - TTLRecompressionAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_); - void finalize(const MutableDataPartPtr & data_part) const override; -}; } diff --git a/src/DataTypes/DataTypeEnum.h b/src/DataTypes/DataTypeEnum.h index 57657d1d110..92c72b87afa 100644 --- a/src/DataTypes/DataTypeEnum.h +++ b/src/DataTypes/DataTypeEnum.h @@ -27,6 +27,8 @@ public: bool isCategorial() const override { return true; } bool canBeInsideNullable() const override { return true; } bool isComparable() const override { return true; } + + virtual bool contains(const IDataType & rhs) const = 0; }; @@ -76,7 +78,7 @@ public: /// Example: /// Enum('a' = 1, 'b' = 2) -> Enum('c' = 1, 'b' = 2, 'd' = 3) OK /// Enum('a' = 1, 'b' = 2) -> Enum('a' = 2, 'b' = 1) NOT OK - bool contains(const IDataType & rhs) const; + bool contains(const IDataType & rhs) const override; SerializationPtr doGetDefaultSerialization() const override; }; diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index ab2e8e1958b..7077c5bfa14 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 75a3b9c9e1e..01bc8cb34c7 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -155,7 +155,13 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String /// Split into replicas if needed. size_t max_addresses = context->getSettingsRef().glob_expansion_max_elements; auto addresses = parseRemoteDescriptionForExternalDatabase(host_port, max_addresses, 3306); - auto mysql_pool = mysqlxx::PoolWithFailover(mysql_database_name, addresses, mysql_user_name, mysql_user_password); + mysqlxx::PoolWithFailover mysql_pool(mysql_database_name, addresses, + mysql_user_name, mysql_user_password, + MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS, + MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS, + MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, + context->getSettingsRef().external_storage_connect_timeout, + context->getSettingsRef().external_storage_rw_timeout); mysql_database_settings->loadFromQueryContext(context); mysql_database_settings->loadFromQuery(*engine_define); /// higher priority @@ -168,7 +174,6 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String MySQLClient client(remote_host_name, remote_port, mysql_user_name, mysql_user_password); auto mysql_pool = mysqlxx::Pool(mysql_database_name, remote_host_name, mysql_user_name, mysql_user_password, remote_port); - auto materialize_mode_settings = std::make_unique(); if (engine_define->settings) diff --git a/src/Functions/FunctionFactory.cpp b/src/Functions/FunctionFactory.cpp index b1437d58c09..64e5dce413d 100644 --- a/src/Functions/FunctionFactory.cpp +++ b/src/Functions/FunctionFactory.cpp @@ -26,8 +26,8 @@ const String & getFunctionCanonicalNameIfAny(const String & name) return FunctionFactory::instance().getCanonicalNameIfAny(name); } -void FunctionFactory::registerFunction(const - std::string & name, +void FunctionFactory::registerFunction( + const std::string & name, Value creator, CaseSensitiveness case_sensitiveness) { @@ -119,8 +119,8 @@ FunctionOverloadResolverPtr FunctionFactory::tryGetImpl( } FunctionOverloadResolverPtr FunctionFactory::tryGet( - const std::string & name, - ContextPtr context) const + const std::string & name, + ContextPtr context) const { auto impl = tryGetImpl(name, context); return impl ? std::move(impl) : nullptr; diff --git a/src/Functions/GatherUtils/Sources.h b/src/Functions/GatherUtils/Sources.h index 79f627fb64c..86b590646dc 100644 --- a/src/Functions/GatherUtils/Sources.h +++ b/src/Functions/GatherUtils/Sources.h @@ -325,7 +325,7 @@ struct StringSource }; -/// Differs to StringSource by having 'offest' and 'length' in code points instead of bytes in getSlice* methods. +/// Differs to StringSource by having 'offset' and 'length' in code points instead of bytes in getSlice* methods. /** NOTE: The behaviour of substring and substringUTF8 is inconsistent when negative offset is greater than string size: * substring: * hello diff --git a/src/Functions/array/arrayElement.cpp b/src/Functions/array/arrayElement.cpp index 59594a78401..a4cdc601d84 100644 --- a/src/Functions/array/arrayElement.cpp +++ b/src/Functions/array/arrayElement.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -95,32 +96,30 @@ private: using Offsets = ColumnArray::Offsets; - static bool matchKeyToIndex(const IColumn & data, const Offsets & offsets, - const ColumnsWithTypeAndName & arguments, PaddedPODArray & matched_idxs); + static bool matchKeyToIndexNumber( + const IColumn & data, const Offsets & offsets, bool is_key_const, + const IColumn & index, PaddedPODArray & matched_idxs); - static bool matchKeyToIndexConst(const IColumn & data, const Offsets & offsets, + static bool matchKeyToIndexNumberConst( + const IColumn & data, const Offsets & offsets, const Field & index, PaddedPODArray & matched_idxs); - template - static bool matchKeyToIndexNumber(const IColumn & data, const Offsets & offsets, - const ColumnsWithTypeAndName & arguments, PaddedPODArray & matched_idxs); + static bool matchKeyToIndexString( + const IColumn & data, const Offsets & offsets, bool is_key_const, + const IColumn & index, PaddedPODArray & matched_idxs); - template - static bool matchKeyToIndexNumberConst(const IColumn & data, const Offsets & offsets, - const Field & index, PaddedPODArray & matched_idxs); - - static bool matchKeyToIndexString(const IColumn & data, const Offsets & offsets, - const ColumnsWithTypeAndName & arguments, PaddedPODArray & matched_idxs); - - static bool matchKeyToIndexFixedString(const IColumn & data, const Offsets & offsets, - const ColumnsWithTypeAndName & arguments, PaddedPODArray & matched_idxs); - - static bool matchKeyToIndexStringConst(const IColumn & data, const Offsets & offsets, + static bool matchKeyToIndexStringConst( + const IColumn & data, const Offsets & offsets, const Field & index, PaddedPODArray & matched_idxs); template static void executeMatchKeyToIndex(const Offsets & offsets, PaddedPODArray & matched_idxs, const Matcher & matcher); + + template + static void executeMatchConstKeyToIndex( + size_t num_rows, size_t num_values, + PaddedPODArray & matched_idxs, const Matcher & matcher); }; @@ -759,23 +758,11 @@ ColumnPtr FunctionArrayElement::executeTuple(const ColumnsWithTypeAndName & argu namespace { +template struct MatcherString { - const ColumnString & data; - const ColumnString & index; - - bool match(size_t row_data, size_t row_index) const - { - auto data_ref = data.getDataAt(row_data); - auto index_ref = index.getDataAt(row_index); - return memequalSmallAllowOverflow15(index_ref.data, index_ref.size, data_ref.data, data_ref.size); - } -}; - -struct MatcherFixedString -{ - const ColumnFixedString & data; - const ColumnFixedString & index; + const DataColumn & data; + const IndexColumn & index; bool match(size_t row_data, size_t row_index) const { @@ -785,9 +772,10 @@ struct MatcherFixedString } }; +template struct MatcherStringConst { - const ColumnString & data; + const DataColumn & data; const String & index; bool match(size_t row_data, size_t /* row_index */) const @@ -797,23 +785,23 @@ struct MatcherStringConst } }; -template +template struct MatcherNumber { - const PaddedPODArray & data; - const PaddedPODArray & index; + const PaddedPODArray & data; + const PaddedPODArray & index; bool match(size_t row_data, size_t row_index) const { - return data[row_data] == index[row_index]; + return data[row_data] == static_cast(index[row_index]); } }; -template +template struct MatcherNumberConst { - const PaddedPODArray & data; - T index; + const PaddedPODArray & data; + DataType index; bool match(size_t row_data, size_t /* row_index */) const { @@ -848,147 +836,158 @@ void FunctionArrayElement::executeMatchKeyToIndex( } } +template +void FunctionArrayElement::executeMatchConstKeyToIndex( + size_t num_rows, size_t num_values, + PaddedPODArray & matched_idxs, const Matcher & matcher) +{ + for (size_t i = 0; i < num_rows; ++i) + { + bool matched = false; + for (size_t j = 0; j < num_values; ++j) + { + if (matcher.match(j, i)) + { + matched_idxs.push_back(j + 1); + matched = true; + break; + } + } + + if (!matched) + matched_idxs.push_back(0); + } +} + +template +static bool castColumnString(const IColumn * column, F && f) +{ + return castTypeToEither(column, std::forward(f)); +} + bool FunctionArrayElement::matchKeyToIndexStringConst( const IColumn & data, const Offsets & offsets, const Field & index, PaddedPODArray & matched_idxs) { - const auto * data_string = checkAndGetColumn(&data); - if (!data_string) - return false; + return castColumnString(&data, [&](const auto & data_column) + { + using DataColumn = std::decay_t; - if (index.getType() != Field::Types::String) - return false; - - MatcherStringConst matcher{*data_string, get(index)}; - executeMatchKeyToIndex(offsets, matched_idxs, matcher); - return true; + MatcherStringConst matcher{data_column, get(index)}; + executeMatchKeyToIndex(offsets, matched_idxs, matcher); + return true; + }); } bool FunctionArrayElement::matchKeyToIndexString( - const IColumn & data, const Offsets & offsets, - const ColumnsWithTypeAndName & arguments, PaddedPODArray & matched_idxs) + const IColumn & data, const Offsets & offsets, bool is_key_const, + const IColumn & index, PaddedPODArray & matched_idxs) { - const auto * index_string = checkAndGetColumn(arguments[1].column.get()); - if (!index_string) - return false; + return castColumnString(&data, [&](const auto & data_column) + { + return castColumnString(&index, [&](const auto & index_column) + { + using DataColumn = std::decay_t; + using IndexColumn = std::decay_t; - const auto * data_string = checkAndGetColumn(&data); - if (!data_string) - return false; + MatcherString matcher{data_column, index_column}; + if (is_key_const) + executeMatchConstKeyToIndex(index.size(), data.size(), matched_idxs, matcher); + else + executeMatchKeyToIndex(offsets, matched_idxs, matcher); - MatcherString matcher{*data_string, *index_string}; - executeMatchKeyToIndex(offsets, matched_idxs, matcher); - return true; + return true; + }); + }); } -bool FunctionArrayElement::matchKeyToIndexFixedString( - const IColumn & data, const Offsets & offsets, - const ColumnsWithTypeAndName & arguments, PaddedPODArray & matched_idxs) +template +static constexpr bool areConvertibleTypes = + std::is_same_v + || (is_integer_v && is_integer_v + && std::is_convertible_v); + +template +static bool castColumnNumeric(const IColumn * column, F && f) { - const auto * index_string = checkAndGetColumn(arguments[1].column.get()); - if (!index_string) - return false; - - const auto * data_string = checkAndGetColumn(&data); - if (!data_string) - return false; - - MatcherFixedString matcher{*data_string, *index_string}; - executeMatchKeyToIndex(offsets, matched_idxs, matcher); - return true; + return castTypeToEither< + ColumnVector, + ColumnVector, + ColumnVector, + ColumnVector, + ColumnVector, + ColumnVector, + ColumnVector, + ColumnVector, + ColumnVector, + ColumnVector, + ColumnVector, + ColumnVector, + ColumnVector + >(column, std::forward(f)); } -template bool FunctionArrayElement::matchKeyToIndexNumberConst( const IColumn & data, const Offsets & offsets, const Field & index, PaddedPODArray & matched_idxs) { - const auto * data_numeric = checkAndGetColumn>(&data); - if (!data_numeric) - return false; - - std::optional index_as_integer; - Field::dispatch([&](const auto & value) + return castColumnNumeric(&data, [&](const auto & data_column) { - using FieldType = std::decay_t; - if constexpr (std::is_same_v || (is_integer_v && std::is_convertible_v)) - index_as_integer = static_cast(value); - }, index); + using DataType = typename std::decay_t::ValueType; + std::optional index_as_integer; - if (!index_as_integer) - return false; + Field::dispatch([&](const auto & value) + { + using FieldType = std::decay_t; + if constexpr (areConvertibleTypes) + index_as_integer = static_cast(value); + }, index); - MatcherNumberConst matcher{data_numeric->getData(), *index_as_integer}; - executeMatchKeyToIndex(offsets, matched_idxs, matcher); - return true; + if (!index_as_integer) + return false; + + MatcherNumberConst matcher{data_column.getData(), *index_as_integer}; + executeMatchKeyToIndex(offsets, matched_idxs, matcher); + return true; + }); } -template bool FunctionArrayElement::matchKeyToIndexNumber( - const IColumn & data, const Offsets & offsets, - const ColumnsWithTypeAndName & arguments, PaddedPODArray & matched_idxs) + const IColumn & data, const Offsets & offsets, bool is_key_const, + const IColumn & index, PaddedPODArray & matched_idxs) { - const auto * index_numeric = checkAndGetColumn>(arguments[1].column.get()); - if (!index_numeric) - return false; + return castColumnNumeric(&data, [&](const auto & data_column) + { + return castColumnNumeric(&index, [&](const auto & index_column) + { + using DataType = typename std::decay_t::ValueType; + using IndexType = typename std::decay_t::ValueType; - const auto * data_numeric = checkAndGetColumn>(&data); - if (!data_numeric) - return false; + if constexpr (areConvertibleTypes) + { + MatcherNumber matcher{data_column.getData(), index_column.getData()}; + if (is_key_const) + executeMatchConstKeyToIndex(index_column.size(), data_column.size(), matched_idxs, matcher); + else + executeMatchKeyToIndex(offsets, matched_idxs, matcher); - MatcherNumber matcher{data_numeric->getData(), index_numeric->getData()}; - executeMatchKeyToIndex(offsets, matched_idxs, matcher); - return true; -} + return true; + } -bool FunctionArrayElement::matchKeyToIndex( - const IColumn & data, const Offsets & offsets, - const ColumnsWithTypeAndName & arguments, PaddedPODArray & matched_idxs) -{ - return matchKeyToIndexNumber(data, offsets, arguments, matched_idxs) - || matchKeyToIndexNumber(data, offsets, arguments, matched_idxs) - || matchKeyToIndexNumber(data, offsets, arguments, matched_idxs) - || matchKeyToIndexNumber(data, offsets, arguments, matched_idxs) - || matchKeyToIndexNumber(data, offsets, arguments, matched_idxs) - || matchKeyToIndexNumber(data, offsets, arguments, matched_idxs) - || matchKeyToIndexNumber(data, offsets, arguments, matched_idxs) - || matchKeyToIndexNumber(data, offsets, arguments, matched_idxs) - || matchKeyToIndexNumber(data, offsets, arguments, matched_idxs) - || matchKeyToIndexNumber(data, offsets, arguments, matched_idxs) - || matchKeyToIndexNumber(data, offsets, arguments, matched_idxs) - || matchKeyToIndexNumber(data, offsets, arguments, matched_idxs) - || matchKeyToIndexNumber(data, offsets, arguments, matched_idxs) - || matchKeyToIndexNumber(data, offsets, arguments, matched_idxs) - || matchKeyToIndexString(data, offsets, arguments, matched_idxs) - || matchKeyToIndexFixedString(data, offsets, arguments, matched_idxs); -} - -bool FunctionArrayElement::matchKeyToIndexConst( - const IColumn & data, const Offsets & offsets, - const Field & index, PaddedPODArray & matched_idxs) -{ - return matchKeyToIndexNumberConst(data, offsets, index, matched_idxs) - || matchKeyToIndexNumberConst(data, offsets, index, matched_idxs) - || matchKeyToIndexNumberConst(data, offsets, index, matched_idxs) - || matchKeyToIndexNumberConst(data, offsets, index, matched_idxs) - || matchKeyToIndexNumberConst(data, offsets, index, matched_idxs) - || matchKeyToIndexNumberConst(data, offsets, index, matched_idxs) - || matchKeyToIndexNumberConst(data, offsets, index, matched_idxs) - || matchKeyToIndexNumberConst(data, offsets, index, matched_idxs) - || matchKeyToIndexNumberConst(data, offsets, index, matched_idxs) - || matchKeyToIndexNumberConst(data, offsets, index, matched_idxs) - || matchKeyToIndexNumberConst(data, offsets, index, matched_idxs) - || matchKeyToIndexNumberConst(data, offsets, index, matched_idxs) - || matchKeyToIndexNumberConst(data, offsets, index, matched_idxs) - || matchKeyToIndexStringConst(data, offsets, index, matched_idxs); + return false; + }); + }); } ColumnPtr FunctionArrayElement::executeMap( const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const { - const ColumnMap * col_map = typeid_cast(arguments[0].column.get()); - if (!col_map) - return nullptr; + const auto * col_map = checkAndGetColumn(arguments[0].column.get()); + const auto * col_const_map = checkAndGetColumnConst(arguments[0].column.get()); + assert(col_map || col_const_map); + + if (col_const_map) + col_map = typeid_cast(&col_const_map->getDataColumn()); const auto & nested_column = col_map->getNestedColumn(); const auto & keys_data = col_map->getNestedData().getColumn(0); @@ -1000,29 +999,33 @@ ColumnPtr FunctionArrayElement::executeMap( indices_column->reserve(input_rows_count); auto & indices_data = assert_cast &>(*indices_column).getData(); + bool executed = false; if (!isColumnConst(*arguments[1].column)) { - if (input_rows_count > 0 && !matchKeyToIndex(keys_data, offsets, arguments, indices_data)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal types of arguments: {}, {} for function {}", - arguments[0].type->getName(), arguments[1].type->getName(), getName()); + executed = matchKeyToIndexNumber(keys_data, offsets, !!col_const_map, *arguments[1].column, indices_data) + || matchKeyToIndexString(keys_data, offsets, !!col_const_map, *arguments[1].column, indices_data); } else { Field index = (*arguments[1].column)[0]; - - // Get Matched key's value - if (input_rows_count > 0 && !matchKeyToIndexConst(keys_data, offsets, index, indices_data)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal types of arguments: {}, {} for function {}", - arguments[0].type->getName(), arguments[1].type->getName(), getName()); + executed = matchKeyToIndexNumberConst(keys_data, offsets, index, indices_data) + || matchKeyToIndexStringConst(keys_data, offsets, index, indices_data); } + if (!executed) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal types of arguments: {}, {} for function {}", + arguments[0].type->getName(), arguments[1].type->getName(), getName()); + + ColumnPtr values_array = ColumnArray::create(values_data.getPtr(), nested_column.getOffsetsPtr()); + if (col_const_map) + values_array = ColumnConst::create(values_array, input_rows_count); + /// Prepare arguments to call arrayElement for array with values and calculated indices at previous step. ColumnsWithTypeAndName new_arguments = { { - ColumnArray::create(values_data.getPtr(), nested_column.getOffsetsPtr()), + values_array, std::make_shared(result_type), "" }, @@ -1066,13 +1069,14 @@ DataTypePtr FunctionArrayElement::getReturnTypeImpl(const DataTypes & arguments) ColumnPtr FunctionArrayElement::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const { - /// Check nullability. - bool is_array_of_nullable = false; + const auto * col_map = checkAndGetColumn(arguments[0].column.get()); + const auto * col_const_map = checkAndGetColumnConst(arguments[0].column.get()); - const ColumnMap * col_map = checkAndGetColumn(arguments[0].column.get()); - if (col_map) + if (col_map || col_const_map) return executeMap(arguments, result_type, input_rows_count); + /// Check nullability. + bool is_array_of_nullable = false; const ColumnArray * col_array = nullptr; const ColumnArray * col_const_array = nullptr; diff --git a/src/IO/ZlibInflatingReadBuffer.cpp b/src/IO/ZlibInflatingReadBuffer.cpp index bea83c74e21..472399dea3d 100644 --- a/src/IO/ZlibInflatingReadBuffer.cpp +++ b/src/IO/ZlibInflatingReadBuffer.cpp @@ -38,7 +38,7 @@ ZlibInflatingReadBuffer::ZlibInflatingReadBuffer( #pragma GCC diagnostic pop if (rc != Z_OK) - throw Exception(std::string("inflateInit2 failed: ") + zError(rc) + "; zlib version: " + ZLIB_VERSION, ErrorCodes::ZLIB_INFLATE_FAILED); + throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflateInit2 failed: {}; zlib version: {}.", zError(rc), ZLIB_VERSION); } ZlibInflatingReadBuffer::~ZlibInflatingReadBuffer() @@ -48,41 +48,60 @@ ZlibInflatingReadBuffer::~ZlibInflatingReadBuffer() bool ZlibInflatingReadBuffer::nextImpl() { - if (eof) - return false; - - if (!zstr.avail_in) + /// Need do-while loop to prevent situation, when + /// eof was not reached, but working buffer became empty (when nothing was decompressed in current iteration) + /// (this happens with compression algorithms, same idea is implemented in ZstdInflatingReadBuffer) + do { - in->nextIfAtEnd(); - zstr.next_in = reinterpret_cast(in->position()); - zstr.avail_in = in->buffer().end() - in->position(); - } - zstr.next_out = reinterpret_cast(internal_buffer.begin()); - zstr.avail_out = internal_buffer.size(); + /// if we already found eof, we shouldn't do anything + if (eof) + return false; - int rc = inflate(&zstr, Z_NO_FLUSH); - - in->position() = in->buffer().end() - zstr.avail_in; - working_buffer.resize(internal_buffer.size() - zstr.avail_out); - - if (rc == Z_STREAM_END) - { - if (in->eof()) + /// if there is no available bytes in zstr, move ptr to next available data + if (!zstr.avail_in) { - eof = true; - return !working_buffer.empty(); + in->nextIfAtEnd(); + zstr.next_in = reinterpret_cast(in->position()); + zstr.avail_in = in->buffer().end() - in->position(); } - else - { - rc = inflateReset(&zstr); - if (rc != Z_OK) - throw Exception(std::string("inflateReset failed: ") + zError(rc), ErrorCodes::ZLIB_INFLATE_FAILED); - return true; - } - } - if (rc != Z_OK) - throw Exception(std::string("inflate failed: ") + zError(rc), ErrorCodes::ZLIB_INFLATE_FAILED); + /// init output bytes (place, where decompressed data will be) + zstr.next_out = reinterpret_cast(internal_buffer.begin()); + zstr.avail_out = internal_buffer.size(); + int rc = inflate(&zstr, Z_NO_FLUSH); + + /// move in stream on place, where reading stopped + in->position() = in->buffer().end() - zstr.avail_in; + /// change size of working buffer (it's size equal to internal_buffer size without unused uncompressed values) + working_buffer.resize(internal_buffer.size() - zstr.avail_out); + + /// If end was reached, it can be end of file or end of part (for example, chunk) + if (rc == Z_STREAM_END) + { + /// if it is end of file, remember this and return + /// * true if we can work with working buffer (we still have something to read, so next must return true) + /// * false if there is no data in working buffer + if (in->eof()) + { + eof = true; + return !working_buffer.empty(); + } + /// If it is not end of file, we need to reset zstr and return true, because we still have some data to read + else + { + rc = inflateReset(&zstr); + if (rc != Z_OK) + throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflateReset failed: {}", zError(rc)); + return true; + } + } + /// If it is not end and not OK, something went wrong, throw exception + if (rc != Z_OK) + throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflateReset failed: {}", zError(rc)); + } + while (working_buffer.empty()); + + /// if code reach this section, working buffer is not empty, so we have some data to process return true; } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index f59d50dbdeb..d984a350c80 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -146,7 +146,7 @@ struct ContextSharedPart #if USE_NURAFT mutable std::mutex keeper_storage_dispatcher_mutex; - mutable std::shared_ptr keeper_storage_dispatcher; + mutable std::shared_ptr keeper_storage_dispatcher; #endif mutable std::mutex auxiliary_zookeepers_mutex; mutable std::map auxiliary_zookeepers; /// Map for auxiliary ZooKeeper clients. @@ -1649,7 +1649,7 @@ void Context::setSystemZooKeeperLogAfterInitializationIfNeeded() zk.second->setZooKeeperLog(shared->system_logs->zookeeper_log); } -void Context::initializeKeeperStorageDispatcher() const +void Context::initializeKeeperDispatcher() const { #if USE_NURAFT std::lock_guard lock(shared->keeper_storage_dispatcher_mutex); @@ -1660,14 +1660,14 @@ void Context::initializeKeeperStorageDispatcher() const const auto & config = getConfigRef(); if (config.has("keeper_server")) { - shared->keeper_storage_dispatcher = std::make_shared(); + shared->keeper_storage_dispatcher = std::make_shared(); shared->keeper_storage_dispatcher->initialize(config, getApplicationType() == ApplicationType::KEEPER); } #endif } #if USE_NURAFT -std::shared_ptr & Context::getKeeperStorageDispatcher() const +std::shared_ptr & Context::getKeeperDispatcher() const { std::lock_guard lock(shared->keeper_storage_dispatcher_mutex); if (!shared->keeper_storage_dispatcher) @@ -1677,7 +1677,7 @@ std::shared_ptr & Context::getKeeperStorageDispatcher() } #endif -void Context::shutdownKeeperStorageDispatcher() const +void Context::shutdownKeeperDispatcher() const { #if USE_NURAFT std::lock_guard lock(shared->keeper_storage_dispatcher_mutex); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 9527b87ed39..6af2c3c4d62 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -102,7 +102,7 @@ class StoragePolicySelector; using StoragePolicySelectorPtr = std::shared_ptr; struct PartUUIDs; using PartUUIDsPtr = std::shared_ptr; -class KeeperStorageDispatcher; +class KeeperDispatcher; class Session; class IOutputFormat; @@ -647,10 +647,10 @@ public: std::shared_ptr getAuxiliaryZooKeeper(const String & name) const; #if USE_NURAFT - std::shared_ptr & getKeeperStorageDispatcher() const; + std::shared_ptr & getKeeperDispatcher() const; #endif - void initializeKeeperStorageDispatcher() const; - void shutdownKeeperStorageDispatcher() const; + void initializeKeeperDispatcher() const; + void shutdownKeeperDispatcher() const; /// Set auxiliary zookeepers configuration at server starting or configuration reloading. void reloadAuxiliaryZooKeepersConfigIfChanged(const ConfigurationPtr & config); diff --git a/src/Interpreters/InDepthNodeVisitor.h b/src/Interpreters/InDepthNodeVisitor.h index 3ba25a327c4..90235de34b0 100644 --- a/src/Interpreters/InDepthNodeVisitor.h +++ b/src/Interpreters/InDepthNodeVisitor.h @@ -16,7 +16,7 @@ class InDepthNodeVisitor public: using Data = typename Matcher::Data; - InDepthNodeVisitor(Data & data_, WriteBuffer * ostr_ = nullptr) + explicit InDepthNodeVisitor(Data & data_, WriteBuffer * ostr_ = nullptr) : data(data_), visit_depth(0), ostr(ostr_) diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.cpp b/src/Interpreters/InterpreterCreateFunctionQuery.cpp new file mode 100644 index 00000000000..01fc60060b3 --- /dev/null +++ b/src/Interpreters/InterpreterCreateFunctionQuery.cpp @@ -0,0 +1,117 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_IDENTIFIER; + extern const int CANNOT_CREATE_RECURSIVE_FUNCTION; + extern const int UNSUPPORTED_METHOD; +} + +BlockIO InterpreterCreateFunctionQuery::execute() +{ + auto current_context = getContext(); + current_context->checkAccess(AccessType::CREATE_FUNCTION); + + FunctionNameNormalizer().visit(query_ptr.get()); + auto * create_function_query = query_ptr->as(); + + if (!create_function_query) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Expected CREATE FUNCTION query"); + + auto & function_name = create_function_query->function_name; + validateFunction(create_function_query->function_core, function_name); + + UserDefinedFunctionFactory::instance().registerFunction(function_name, query_ptr); + + if (!is_internal) + { + try + { + UserDefinedObjectsLoader::instance().storeObject(current_context, UserDefinedObjectType::Function, function_name, *query_ptr); + } + catch (Exception & exception) + { + UserDefinedFunctionFactory::instance().unregisterFunction(function_name); + exception.addMessage(fmt::format("while storing user defined function {} on disk", backQuote(function_name))); + throw; + } + } + + return {}; +} + +void InterpreterCreateFunctionQuery::validateFunction(ASTPtr function, const String & name) +{ + const auto * args_tuple = function->as()->arguments->children.at(0)->as(); + std::unordered_set arguments; + for (const auto & argument : args_tuple->arguments->children) + { + const auto & argument_name = argument->as()->name(); + auto [_, inserted] = arguments.insert(argument_name); + if (!inserted) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Identifier {} already used as function parameter", argument_name); + } + + ASTPtr function_body = function->as()->children.at(0)->children.at(1); + std::unordered_set identifiers_in_body = getIdentifiers(function_body); + + for (const auto & identifier : identifiers_in_body) + { + if (!arguments.contains(identifier)) + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Identifier {} does not exist in arguments", backQuote(identifier)); + } + + validateFunctionRecursiveness(function_body, name); +} + +std::unordered_set InterpreterCreateFunctionQuery::getIdentifiers(ASTPtr node) +{ + std::unordered_set identifiers; + + std::stack ast_nodes_to_process; + ast_nodes_to_process.push(node); + + while (!ast_nodes_to_process.empty()) + { + auto ast_node_to_process = ast_nodes_to_process.top(); + ast_nodes_to_process.pop(); + + for (const auto & child : ast_node_to_process->children) + { + auto identifier_name_opt = tryGetIdentifierName(child); + if (identifier_name_opt) + identifiers.insert(identifier_name_opt.value()); + + ast_nodes_to_process.push(child); + } + } + + return identifiers; +} + +void InterpreterCreateFunctionQuery::validateFunctionRecursiveness(ASTPtr node, const String & function_to_create) +{ + for (const auto & child : node->children) + { + auto function_name_opt = tryGetFunctionName(child); + if (function_name_opt && function_name_opt.value() == function_to_create) + throw Exception(ErrorCodes::CANNOT_CREATE_RECURSIVE_FUNCTION, "You cannot create recursive function"); + + validateFunctionRecursiveness(child, function_to_create); + } +} + +} diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.h b/src/Interpreters/InterpreterCreateFunctionQuery.h new file mode 100644 index 00000000000..b10760c5e9d --- /dev/null +++ b/src/Interpreters/InterpreterCreateFunctionQuery.h @@ -0,0 +1,32 @@ +#pragma once + +#include + + +namespace DB +{ + +class Context; + +class InterpreterCreateFunctionQuery : public IInterpreter, WithContext +{ +public: + InterpreterCreateFunctionQuery(const ASTPtr & query_ptr_, ContextPtr context_, bool is_internal_) + : WithContext(context_) + , query_ptr(query_ptr_) + , is_internal(is_internal_) {} + + BlockIO execute() override; + + void setInternal(bool internal_); + +private: + static void validateFunction(ASTPtr function, const String & name); + static std::unordered_set getIdentifiers(ASTPtr node); + static void validateFunctionRecursiveness(ASTPtr node, const String & function_to_create); + + ASTPtr query_ptr; + bool is_internal; +}; + +} diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 76cb6c783ba..a1313a84c36 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -550,7 +550,7 @@ ConstraintsDescription InterpreterCreateQuery::getConstraintsDescription(const A } -InterpreterCreateQuery::TableProperties InterpreterCreateQuery::setProperties(ASTCreateQuery & create) const +InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTablePropertiesAndNormalizeCreateQuery(ASTCreateQuery & create) const { TableProperties properties; TableLockHolder as_storage_lock; @@ -589,10 +589,13 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::setProperties(AS auto as_storage_metadata = as_storage->getInMemoryMetadataPtr(); properties.columns = as_storage_metadata->getColumns(); - /// Secondary indices make sense only for MergeTree family of storage engines. + /// Secondary indices and projections make sense only for MergeTree family of storage engines. /// We should not copy them for other storages. if (create.storage && endsWith(create.storage->engine->name, "MergeTree")) + { properties.indices = as_storage_metadata->getSecondaryIndices(); + properties.projections = as_storage_metadata->getProjections().clone(); + } properties.constraints = as_storage_metadata->getConstraints(); } @@ -910,7 +913,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) } /// Set and retrieve list of columns, indices and constraints. Set table engine if needed. Rewrite query in canonical way. - TableProperties properties = setProperties(create); + TableProperties properties = getTablePropertiesAndNormalizeCreateQuery(create); DatabasePtr database; bool need_add_to_database = !create.temporary; diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h index 7bd3ef25746..92f2929ea7b 100644 --- a/src/Interpreters/InterpreterCreateQuery.h +++ b/src/Interpreters/InterpreterCreateQuery.h @@ -74,7 +74,7 @@ private: BlockIO createTable(ASTCreateQuery & create); /// Calculate list of columns, constraints, indices, etc... of table. Rewrite query in canonical way. - TableProperties setProperties(ASTCreateQuery & create) const; + TableProperties getTablePropertiesAndNormalizeCreateQuery(ASTCreateQuery & create) const; void validateTableStructure(const ASTCreateQuery & create, const TableProperties & properties) const; void setEngine(ASTCreateQuery & create) const; AccessRightsElements getRequiredAccess() const; diff --git a/src/Interpreters/InterpreterDropFunctionQuery.cpp b/src/Interpreters/InterpreterDropFunctionQuery.cpp new file mode 100644 index 00000000000..9f945c11aac --- /dev/null +++ b/src/Interpreters/InterpreterDropFunctionQuery.cpp @@ -0,0 +1,27 @@ +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +BlockIO InterpreterDropFunctionQuery::execute() +{ + auto current_context = getContext(); + current_context->checkAccess(AccessType::DROP_FUNCTION); + + FunctionNameNormalizer().visit(query_ptr.get()); + auto & drop_function_query = query_ptr->as(); + + UserDefinedFunctionFactory::instance().unregisterFunction(drop_function_query.function_name); + UserDefinedObjectsLoader::instance().removeObject(current_context, UserDefinedObjectType::Function, drop_function_query.function_name); + + return {}; +} + +} diff --git a/src/Interpreters/InterpreterDropFunctionQuery.h b/src/Interpreters/InterpreterDropFunctionQuery.h new file mode 100644 index 00000000000..5842851f5db --- /dev/null +++ b/src/Interpreters/InterpreterDropFunctionQuery.h @@ -0,0 +1,21 @@ +#pragma once + +#include + +namespace DB +{ + +class Context; + +class InterpreterDropFunctionQuery : public IInterpreter, WithMutableContext +{ +public: + InterpreterDropFunctionQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_) : WithMutableContext(context_), query_ptr(query_ptr_) {} + + BlockIO execute() override; + +private: + ASTPtr query_ptr; +}; + +} diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index a50a6279873..54307ae848b 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -7,7 +7,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -36,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -44,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -272,6 +276,14 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut { return std::make_unique(query, context); } + else if (query->as()) + { + return std::make_unique(query, context, false /*is_internal*/); + } + else if (query->as()) + { + return std::make_unique(query, context); + } else if (query->as()) { return std::make_unique(query, context); diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 4e5e3b4e86b..83af913c7ab 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -156,7 +156,7 @@ ColumnDependencies getAllColumnDependencies(const StorageMetadataPtr & metadata_ ColumnDependencies dependencies; while (!new_updated_columns.empty()) { - auto new_dependencies = metadata_snapshot->getColumnDependencies(new_updated_columns); + auto new_dependencies = metadata_snapshot->getColumnDependencies(new_updated_columns, true); new_updated_columns.clear(); for (const auto & dependency : new_dependencies) { @@ -303,6 +303,15 @@ static NameSet getKeyColumns(const StoragePtr & storage, const StorageMetadataPt return key_columns; } +static bool materializeTTLRecalculateOnly(const StoragePtr & storage) +{ + auto storage_from_merge_tree_data_part = std::dynamic_pointer_cast(storage); + if (!storage_from_merge_tree_data_part) + return false; + + return storage_from_merge_tree_data_part->materializeTTLRecalculateOnly(); +} + static void validateUpdateColumns( const StoragePtr & storage, const StorageMetadataPtr & metadata_snapshot, const NameSet & updated_columns, @@ -394,8 +403,13 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) NamesAndTypesList all_columns = columns_desc.getAllPhysical(); NameSet updated_columns; + bool materialize_ttl_recalculate_only = materializeTTLRecalculateOnly(storage); for (const MutationCommand & command : commands) { + if (command.type == MutationCommand::Type::UPDATE + || command.type == MutationCommand::Type::DELETE) + materialize_ttl_recalculate_only = false; + for (const auto & kv : command.column_to_update_expression) { updated_columns.insert(kv.first); @@ -569,7 +583,18 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) else if (command.type == MutationCommand::MATERIALIZE_TTL) { mutation_kind.set(MutationKind::MUTATE_OTHER); - if (metadata_snapshot->hasRowsTTL()) + if (materialize_ttl_recalculate_only) + { + // just recalculate ttl_infos without remove expired data + auto all_columns_vec = all_columns.getNames(); + auto new_dependencies = metadata_snapshot->getColumnDependencies(NameSet(all_columns_vec.begin(), all_columns_vec.end()), false); + for (const auto & dependency : new_dependencies) + { + if (dependency.kind == ColumnDependency::TTL_EXPRESSION) + dependencies.insert(dependency); + } + } + else if (metadata_snapshot->hasRowsTTL()) { for (const auto & column : all_columns) dependencies.emplace(column.name, ColumnDependency::TTL_TARGET); @@ -594,19 +619,19 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) } /// Recalc only skip indices and projections of columns which could be updated by TTL. - auto new_dependencies = metadata_snapshot->getColumnDependencies(new_updated_columns); + auto new_dependencies = metadata_snapshot->getColumnDependencies(new_updated_columns, true); for (const auto & dependency : new_dependencies) { if (dependency.kind == ColumnDependency::SKIP_INDEX || dependency.kind == ColumnDependency::PROJECTION) dependencies.insert(dependency); } + } - if (dependencies.empty()) - { - /// Very rare case. It can happen if we have only one MOVE TTL with constant expression. - /// But we still have to read at least one column. - dependencies.emplace(all_columns.front().name, ColumnDependency::TTL_EXPRESSION); - } + if (dependencies.empty()) + { + /// Very rare case. It can happen if we have only one MOVE TTL with constant expression. + /// But we still have to read at least one column. + dependencies.emplace(all_columns.front().name, ColumnDependency::TTL_EXPRESSION); } } else if (command.type == MutationCommand::READ_COLUMN) diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index a1b74fcd7a6..5a9e2bfbd17 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include /// getSmallestColumn() #include @@ -1045,6 +1046,9 @@ TreeRewriterResultPtr TreeRewriter::analyze( void TreeRewriter::normalize( ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases) { + UserDefinedFunctionsVisitor::Data data_user_defined_functions_visitor; + UserDefinedFunctionsVisitor(data_user_defined_functions_visitor).visit(query); + CustomizeCountDistinctVisitor::Data data_count_distinct{settings.count_distinct_implementation}; CustomizeCountDistinctVisitor(data_count_distinct).visit(query); diff --git a/src/Interpreters/UserDefinedFunctionFactory.cpp b/src/Interpreters/UserDefinedFunctionFactory.cpp new file mode 100644 index 00000000000..bcdf4c5ff88 --- /dev/null +++ b/src/Interpreters/UserDefinedFunctionFactory.cpp @@ -0,0 +1,93 @@ +#include "UserDefinedFunctionFactory.h" + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int FUNCTION_ALREADY_EXISTS; + extern const int UNKNOWN_FUNCTION; + extern const int CANNOT_DROP_SYSTEM_FUNCTION; +} + +UserDefinedFunctionFactory & UserDefinedFunctionFactory::instance() +{ + static UserDefinedFunctionFactory result; + return result; +} + +void UserDefinedFunctionFactory::registerFunction(const String & function_name, ASTPtr create_function_query) +{ + if (FunctionFactory::instance().hasNameOrAlias(function_name)) + throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "The function '{}' already exists", function_name); + + if (AggregateFunctionFactory::instance().hasNameOrAlias(function_name)) + throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "The aggregate function '{}' already exists", function_name); + + std::lock_guard lock(mutex); + + auto [_, inserted] = function_name_to_create_query.emplace(function_name, std::move(create_function_query)); + if (!inserted) + throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, + "The function name '{}' is not unique", + function_name); +} + +void UserDefinedFunctionFactory::unregisterFunction(const String & function_name) +{ + if (FunctionFactory::instance().hasNameOrAlias(function_name) || + AggregateFunctionFactory::instance().hasNameOrAlias(function_name)) + throw Exception(ErrorCodes::CANNOT_DROP_SYSTEM_FUNCTION, "Cannot drop system function '{}'", function_name); + + std::lock_guard lock(mutex); + + auto it = function_name_to_create_query.find(function_name); + if (it == function_name_to_create_query.end()) + throw Exception(ErrorCodes::UNKNOWN_FUNCTION, + "The function name '{}' is not registered", + function_name); + + function_name_to_create_query.erase(it); +} + +ASTPtr UserDefinedFunctionFactory::get(const String & function_name) const +{ + std::lock_guard lock(mutex); + + auto it = function_name_to_create_query.find(function_name); + if (it == function_name_to_create_query.end()) + throw Exception(ErrorCodes::UNKNOWN_FUNCTION, + "The function name '{}' is not registered", + function_name); + + return it->second; +} + +ASTPtr UserDefinedFunctionFactory::tryGet(const std::string & function_name) const +{ + std::lock_guard lock(mutex); + + auto it = function_name_to_create_query.find(function_name); + if (it == function_name_to_create_query.end()) + return nullptr; + + return it->second; +} + +std::vector UserDefinedFunctionFactory::getAllRegisteredNames() const +{ + std::vector registered_names; + registered_names.reserve(function_name_to_create_query.size()); + + std::lock_guard lock(mutex); + + for (const auto & [name, _] : function_name_to_create_query) + registered_names.emplace_back(name); + + return registered_names; +} + +} diff --git a/src/Interpreters/UserDefinedFunctionFactory.h b/src/Interpreters/UserDefinedFunctionFactory.h new file mode 100644 index 00000000000..b479d629532 --- /dev/null +++ b/src/Interpreters/UserDefinedFunctionFactory.h @@ -0,0 +1,33 @@ +#pragma once + +#include +#include + +#include + +#include + +namespace DB +{ + +class UserDefinedFunctionFactory : public IHints<1, UserDefinedFunctionFactory> +{ +public: + static UserDefinedFunctionFactory & instance(); + + void registerFunction(const String & function_name, ASTPtr create_function_query); + + void unregisterFunction(const String & function_name); + + ASTPtr get(const String & function_name) const; + + ASTPtr tryGet(const String & function_name) const; + + std::vector getAllRegisteredNames() const override; + +private: + std::unordered_map function_name_to_create_query; + mutable std::mutex mutex; +}; + +} diff --git a/src/Interpreters/UserDefinedFunctionsVisitor.cpp b/src/Interpreters/UserDefinedFunctionsVisitor.cpp new file mode 100644 index 00000000000..2a793d17817 --- /dev/null +++ b/src/Interpreters/UserDefinedFunctionsVisitor.cpp @@ -0,0 +1,99 @@ +#include "UserDefinedFunctionsVisitor.h" + +#include +#include + +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; +} + +void UserDefinedFunctionsMatcher::visit(ASTPtr & ast, Data &) +{ + auto * function = ast->as(); + if (!function) + return; + + auto result = tryToReplaceFunction(*function); + if (result) + ast = result; +} + +bool UserDefinedFunctionsMatcher::needChildVisit(const ASTPtr &, const ASTPtr &) +{ + return true; +} + +ASTPtr UserDefinedFunctionsMatcher::tryToReplaceFunction(const ASTFunction & function) +{ + auto user_defined_function = UserDefinedFunctionFactory::instance().tryGet(function.name); + if (!user_defined_function) + return nullptr; + + const auto & function_arguments_list = function.children.at(0)->as(); + auto & function_arguments = function_arguments_list->children; + + const auto & create_function_query = user_defined_function->as(); + auto & function_core_expression = create_function_query->function_core->children.at(0); + + const auto & identifiers_expression_list = function_core_expression->children.at(0)->children.at(0)->as(); + const auto & identifiers_raw = identifiers_expression_list->children; + + if (function_arguments.size() != identifiers_raw.size()) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Function {} expects {} arguments actual arguments {}", + create_function_query->function_name, + identifiers_raw.size(), + function_arguments.size()); + + std::unordered_map identifier_name_to_function_argument; + + for (size_t parameter_index = 0; parameter_index < identifiers_raw.size(); ++parameter_index) + { + const auto & identifier = identifiers_raw[parameter_index]->as(); + const auto & function_argument = function_arguments[parameter_index]; + const auto & identifier_name = identifier->name(); + + identifier_name_to_function_argument.emplace(identifier_name, function_argument); + } + + auto function_body_to_update = function_core_expression->children.at(1)->clone(); + + std::stack ast_nodes_to_update; + ast_nodes_to_update.push(function_body_to_update); + + while (!ast_nodes_to_update.empty()) + { + auto ast_node_to_update = ast_nodes_to_update.top(); + ast_nodes_to_update.pop(); + + for (auto & child : ast_node_to_update->children) + { + auto identifier_name_opt = tryGetIdentifierName(child); + if (identifier_name_opt) + { + auto function_argument_it = identifier_name_to_function_argument.find(*identifier_name_opt); + assert(function_argument_it != identifier_name_to_function_argument.end()); + + child = function_argument_it->second->clone(); + continue; + } + + ast_nodes_to_update.push(child); + } + } + + return function_body_to_update; +} + +} diff --git a/src/Interpreters/UserDefinedFunctionsVisitor.h b/src/Interpreters/UserDefinedFunctionsVisitor.h new file mode 100644 index 00000000000..c24e0327ddc --- /dev/null +++ b/src/Interpreters/UserDefinedFunctionsVisitor.h @@ -0,0 +1,44 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ASTFunction; + +/** Visits ASTFunction nodes and if it is used defined function replace it with function body. + * Example: + * + * CREATE FUNCTION test_function AS a -> a + 1; + * + * Before applying visitor: + * SELECT test_function(number) FROM system.numbers LIMIT 10; + * + * After applying visitor: + * SELECT number + 1 FROM system.numbers LIMIT 10; + */ +class UserDefinedFunctionsMatcher +{ +public: + using Visitor = InDepthNodeVisitor; + + struct Data + { + }; + + static void visit(ASTPtr & ast, Data & data); + static bool needChildVisit(const ASTPtr & node, const ASTPtr & child); + +private: + static void visit(ASTFunction & func, const Data & data); + + static ASTPtr tryToReplaceFunction(const ASTFunction & function); + +}; + +/// Visits AST nodes and collect their aliases in one map (with links to source nodes). +using UserDefinedFunctionsVisitor = UserDefinedFunctionsMatcher::Visitor; + +} diff --git a/src/Interpreters/UserDefinedObjectsLoader.cpp b/src/Interpreters/UserDefinedObjectsLoader.cpp new file mode 100644 index 00000000000..5237de594f4 --- /dev/null +++ b/src/Interpreters/UserDefinedObjectsLoader.cpp @@ -0,0 +1,164 @@ +#include "UserDefinedObjectsLoader.h" + +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int OBJECT_ALREADY_STORED_ON_DISK; + extern const int OBJECT_WAS_NOT_STORED_ON_DISK; +} + +UserDefinedObjectsLoader & UserDefinedObjectsLoader::instance() +{ + static UserDefinedObjectsLoader ret; + return ret; +} + +UserDefinedObjectsLoader::UserDefinedObjectsLoader() + : log(&Poco::Logger::get("UserDefinedObjectsLoader")) +{} + +void UserDefinedObjectsLoader::loadUserDefinedObject(ContextPtr context, UserDefinedObjectType object_type, const std::string_view & name, const String & path) +{ + auto name_ref = StringRef(name.data(), name.size()); + LOG_DEBUG(log, "Loading user defined object {} from file {}", backQuote(name_ref), path); + + /// There is .sql file with user defined object creation statement. + ReadBufferFromFile in(path); + + String object_create_query; + readStringUntilEOF(object_create_query, in); + + try + { + switch (object_type) + { + case UserDefinedObjectType::Function: + { + ParserCreateFunctionQuery parser; + ASTPtr ast = parseQuery( + parser, + object_create_query.data(), + object_create_query.data() + object_create_query.size(), + "in file " + path, + 0, + context->getSettingsRef().max_parser_depth); + + InterpreterCreateFunctionQuery interpreter(ast, context, true /*is internal*/); + interpreter.execute(); + } + } + } + catch (Exception & e) + { + e.addMessage(fmt::format("while loading user defined objects {} from path {}", backQuote(name_ref), path)); + throw; + } +} + +void UserDefinedObjectsLoader::loadObjects(ContextPtr context) +{ + LOG_DEBUG(log, "loading user defined objects"); + + String dir_path = context->getPath() + "user_defined/"; + Poco::DirectoryIterator dir_end; + for (Poco::DirectoryIterator it(dir_path); it != dir_end; ++it) + { + if (it->isLink()) + continue; + + const auto & file_name = it.name(); + + /// For '.svn', '.gitignore' directory and similar. + if (file_name.at(0) == '.') + continue; + + if (!it->isDirectory() && endsWith(file_name, ".sql")) + { + std::string_view object_name = file_name; + object_name.remove_suffix(strlen(".sql")); + object_name.remove_prefix(strlen("function_")); + loadUserDefinedObject(context, UserDefinedObjectType::Function, object_name, dir_path + it.name()); + } + } +} + +void UserDefinedObjectsLoader::storeObject(ContextPtr context, UserDefinedObjectType object_type, const String & object_name, const IAST & ast) +{ + String dir_path = context->getPath() + "user_defined/"; + String file_path; + + switch (object_type) + { + case UserDefinedObjectType::Function: + { + file_path = dir_path + "function_" + escapeForFileName(object_name) + ".sql"; + } + } + + if (std::filesystem::exists(file_path)) + throw Exception(ErrorCodes::OBJECT_ALREADY_STORED_ON_DISK, "User defined object {} already stored on disk", backQuote(file_path)); + + LOG_DEBUG(log, "Storing object {} to file {}", backQuote(object_name), file_path); + + WriteBufferFromOwnString create_statement_buf; + formatAST(ast, create_statement_buf, false); + writeChar('\n', create_statement_buf); + + String create_statement = create_statement_buf.str(); + WriteBufferFromFile out(file_path, create_statement.size(), O_WRONLY | O_CREAT | O_EXCL); + writeString(create_statement, out); + out.next(); + if (context->getSettingsRef().fsync_metadata) + out.sync(); + out.close(); + + LOG_DEBUG(log, "Stored object {}", backQuote(object_name)); +} + +void UserDefinedObjectsLoader::removeObject(ContextPtr context, UserDefinedObjectType object_type, const String & object_name) +{ + String dir_path = context->getPath() + "user_defined/"; + LOG_DEBUG(log, "Removing file for user defined object {} from {}", backQuote(object_name), dir_path); + + std::filesystem::path file_path; + + switch (object_type) + { + case UserDefinedObjectType::Function: + { + file_path = dir_path + "function_" + escapeForFileName(object_name) + ".sql"; + } + } + + if (!std::filesystem::exists(file_path)) + throw Exception(ErrorCodes::OBJECT_WAS_NOT_STORED_ON_DISK, "User defined object {} was not stored on disk", backQuote(file_path.string())); + + std::filesystem::remove(file_path); +} + +} diff --git a/src/Interpreters/UserDefinedObjectsLoader.h b/src/Interpreters/UserDefinedObjectsLoader.h new file mode 100644 index 00000000000..f0054fac450 --- /dev/null +++ b/src/Interpreters/UserDefinedObjectsLoader.h @@ -0,0 +1,33 @@ +#pragma once + +#include +#include + +#include + + +namespace DB +{ + +enum class UserDefinedObjectType +{ + Function +}; + +class UserDefinedObjectsLoader : private boost::noncopyable +{ +public: + static UserDefinedObjectsLoader & instance(); + UserDefinedObjectsLoader(); + + void loadObjects(ContextPtr context); + void storeObject(ContextPtr context, UserDefinedObjectType object_type, const String & object_name, const IAST & ast); + void removeObject(ContextPtr context, UserDefinedObjectType object_type, const String & object_name); + +private: + + void loadUserDefinedObject(ContextPtr context, UserDefinedObjectType object_type, const std::string_view & object_name, const String & file_path); + Poco::Logger * log; +}; + +} diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make index c0816bb671c..119e3cc5322 100644 --- a/src/Interpreters/ya.make +++ b/src/Interpreters/ya.make @@ -71,6 +71,7 @@ SRCS( InternalTextLogsQueue.cpp InterpreterAlterQuery.cpp InterpreterCheckQuery.cpp + InterpreterCreateFunctionQuery.cpp InterpreterCreateQuery.cpp InterpreterCreateQuotaQuery.cpp InterpreterCreateRoleQuery.cpp @@ -79,6 +80,7 @@ SRCS( InterpreterCreateUserQuery.cpp InterpreterDescribeQuery.cpp InterpreterDropAccessEntityQuery.cpp + InterpreterDropFunctionQuery.cpp InterpreterDropQuery.cpp InterpreterExistsQuery.cpp InterpreterExplainQuery.cpp @@ -89,6 +91,7 @@ SRCS( InterpreterKillQueryQuery.cpp InterpreterOptimizeQuery.cpp InterpreterRenameQuery.cpp + InterpreterSelectIntersectExceptQuery.cpp InterpreterSelectQuery.cpp InterpreterSelectWithUnionQuery.cpp InterpreterSetQuery.cpp @@ -142,6 +145,7 @@ SRCS( RewriteFunctionToSubcolumnVisitor.cpp RewriteSumIfFunctionVisitor.cpp RowRefs.cpp + SelectIntersectExceptQueryVisitor.cpp Set.cpp SetVariants.cpp SortedBlocksWriter.cpp @@ -157,6 +161,9 @@ SRCS( TranslateQualifiedNamesVisitor.cpp TreeOptimizer.cpp TreeRewriter.cpp + UserDefinedFunctionFactory.cpp + UserDefinedFunctionsVisitor.cpp + UserDefinedObjectsLoader.cpp WindowDescription.cpp ZooKeeperLog.cpp addMissingDefaults.cpp diff --git a/src/Parsers/ASTCreateFunctionQuery.cpp b/src/Parsers/ASTCreateFunctionQuery.cpp new file mode 100644 index 00000000000..0b3991ddc44 --- /dev/null +++ b/src/Parsers/ASTCreateFunctionQuery.cpp @@ -0,0 +1,21 @@ +#include +#include +#include + +namespace DB +{ + +ASTPtr ASTCreateFunctionQuery::clone() const +{ + return std::make_shared(*this); +} + +void ASTCreateFunctionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState & state, IAST::FormatStateStacked frame) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : "") << "CREATE FUNCTION " << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(function_name) << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS " << (settings.hilite ? hilite_none : ""); + function_core->formatImpl(settings, state, frame); +} + +} diff --git a/src/Parsers/ASTCreateFunctionQuery.h b/src/Parsers/ASTCreateFunctionQuery.h new file mode 100644 index 00000000000..3adddad8fbd --- /dev/null +++ b/src/Parsers/ASTCreateFunctionQuery.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ASTCreateFunctionQuery : public IAST +{ +public: + String function_name; + ASTPtr function_core; + + String getID(char) const override { return "CreateFunctionQuery"; } + + ASTPtr clone() const override; + + void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; +}; + +} diff --git a/src/Parsers/ASTDropFunctionQuery.cpp b/src/Parsers/ASTDropFunctionQuery.cpp new file mode 100644 index 00000000000..5800a7ba9cb --- /dev/null +++ b/src/Parsers/ASTDropFunctionQuery.cpp @@ -0,0 +1,19 @@ +#include +#include +#include + +namespace DB +{ + +ASTPtr ASTDropFunctionQuery::clone() const +{ + return std::make_shared(*this); +} + +void ASTDropFunctionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : "") << "DROP FUNCTION " << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(function_name) << (settings.hilite ? hilite_none : ""); +} + +} diff --git a/src/Parsers/ASTDropFunctionQuery.h b/src/Parsers/ASTDropFunctionQuery.h new file mode 100644 index 00000000000..e32bf93a64d --- /dev/null +++ b/src/Parsers/ASTDropFunctionQuery.h @@ -0,0 +1,20 @@ +#pragma once + +#include "IAST.h" + +namespace DB +{ + +class ASTDropFunctionQuery : public IAST +{ +public: + String function_name; + + String getID(char) const override { return "DropFunctionQuery"; } + + ASTPtr clone() const override; + + void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; +}; + +} diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index 1ff27c61836..6a3e9d19484 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace DB @@ -21,6 +22,7 @@ namespace DB namespace ErrorCodes { extern const int UNEXPECTED_EXPRESSION; + extern const int UNEXPECTED_AST_STRUCTURE; } void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const @@ -557,4 +559,33 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format } } +String getFunctionName(const IAST * ast) +{ + String res; + if (tryGetFunctionNameInto(ast, res)) + return res; + throw Exception(ast ? queryToString(*ast) + " is not an function" : "AST node is nullptr", ErrorCodes::UNEXPECTED_AST_STRUCTURE); +} + +std::optional tryGetFunctionName(const IAST * ast) +{ + String res; + if (tryGetFunctionNameInto(ast, res)) + return res; + return {}; +} + +bool tryGetFunctionNameInto(const IAST * ast, String & name) +{ + if (ast) + { + if (const auto * node = ast->as()) + { + name = node->name; + return true; + } + } + return false; +} + } diff --git a/src/Parsers/ASTFunction.h b/src/Parsers/ASTFunction.h index 685aaaadd26..ec49ee180ba 100644 --- a/src/Parsers/ASTFunction.h +++ b/src/Parsers/ASTFunction.h @@ -71,4 +71,14 @@ std::shared_ptr makeASTFunction(const String & name, Args &&... arg return function; } +/// ASTFunction Helpers: hide casts and semantic. + +String getFunctionName(const IAST * ast); +std::optional tryGetFunctionName(const IAST * ast); +bool tryGetFunctionNameInto(const IAST * ast, String & name); + +inline String getFunctionName(const ASTPtr & ast) { return getFunctionName(ast.get()); } +inline std::optional tryGetFunctionName(const ASTPtr & ast) { return tryGetFunctionName(ast.get()); } +inline bool tryGetFunctionNameInto(const ASTPtr & ast, String & name) { return tryGetFunctionNameInto(ast.get(), name); } + } diff --git a/src/Parsers/ParserCreateFunctionQuery.cpp b/src/Parsers/ParserCreateFunctionQuery.cpp new file mode 100644 index 00000000000..fbfd02415e7 --- /dev/null +++ b/src/Parsers/ParserCreateFunctionQuery.cpp @@ -0,0 +1,47 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserCreateFunctionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_create("CREATE"); + ParserKeyword s_function("FUNCTION"); + ParserIdentifier function_name_p; + ParserKeyword s_as("AS"); + ParserLambdaExpression lambda_p; + + ASTPtr function_name; + ASTPtr function_core; + + if (!s_create.ignore(pos, expected)) + return false; + + if (!s_function.ignore(pos, expected)) + return false; + + if (!function_name_p.parse(pos, function_name, expected)) + return false; + + if (!s_as.ignore(pos, expected)) + return false; + + if (!lambda_p.parse(pos, function_core, expected)) + return false; + + auto create_function_query = std::make_shared(); + node = create_function_query; + + create_function_query->function_name = function_name->as().name(); + create_function_query->function_core = function_core; + + return true; +} + +} diff --git a/src/Parsers/ParserCreateFunctionQuery.h b/src/Parsers/ParserCreateFunctionQuery.h new file mode 100644 index 00000000000..aac643b995d --- /dev/null +++ b/src/Parsers/ParserCreateFunctionQuery.h @@ -0,0 +1,16 @@ +#pragma once + +#include "IParserBase.h" + +namespace DB +{ + +/// CREATE FUNCTION test AS x -> x || '1' +class ParserCreateFunctionQuery : public IParserBase +{ +protected: + const char * getName() const override { return "CREATE FUNCTION query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/ParserDropFunctionQuery.cpp b/src/Parsers/ParserDropFunctionQuery.cpp new file mode 100644 index 00000000000..04d26109836 --- /dev/null +++ b/src/Parsers/ParserDropFunctionQuery.cpp @@ -0,0 +1,35 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserDropFunctionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_drop("DROP"); + ParserKeyword s_function("FUNCTION"); + ParserIdentifier function_name_p; + + ASTPtr function_name; + + if (!s_drop.ignore(pos, expected)) + return false; + + if (!s_function.ignore(pos, expected)) + return false; + + if (!function_name_p.parse(pos, function_name, expected)) + return false; + + auto drop_function_query = std::make_shared(); + node = drop_function_query; + + drop_function_query->function_name = function_name->as().name(); + + return true; +} + +} diff --git a/src/Parsers/ParserDropFunctionQuery.h b/src/Parsers/ParserDropFunctionQuery.h new file mode 100644 index 00000000000..03602c7ae96 --- /dev/null +++ b/src/Parsers/ParserDropFunctionQuery.h @@ -0,0 +1,14 @@ +#pragma once + +#include "IParserBase.h" + +namespace DB +{ +/// DROP FUNCTION function1 +class ParserDropFunctionQuery : public IParserBase +{ +protected: + const char * getName() const override { return "DROP FUNCTION query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; +} diff --git a/src/Parsers/ParserQuery.cpp b/src/Parsers/ParserQuery.cpp index 3cc6b530d7c..345013b6475 100644 --- a/src/Parsers/ParserQuery.cpp +++ b/src/Parsers/ParserQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -7,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -37,6 +39,8 @@ bool ParserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserCreateQuotaQuery create_quota_p; ParserCreateRowPolicyQuery create_row_policy_p; ParserCreateSettingsProfileQuery create_settings_profile_p; + ParserCreateFunctionQuery create_function_p; + ParserDropFunctionQuery drop_function_p; ParserDropAccessEntityQuery drop_access_entity_p; ParserGrantQuery grant_p; ParserSetRoleQuery set_role_p; @@ -54,6 +58,8 @@ bool ParserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) || create_quota_p.parse(pos, node, expected) || create_row_policy_p.parse(pos, node, expected) || create_settings_profile_p.parse(pos, node, expected) + || create_function_p.parse(pos, node, expected) + || drop_function_p.parse(pos, node, expected) || drop_access_entity_p.parse(pos, node, expected) || grant_p.parse(pos, node, expected) || external_ddl_p.parse(pos, node, expected) diff --git a/src/Parsers/getInsertQuery.cpp b/src/Parsers/getInsertQuery.cpp new file mode 100644 index 00000000000..6f52056dfe2 --- /dev/null +++ b/src/Parsers/getInsertQuery.cpp @@ -0,0 +1,28 @@ +#include + +#include +#include +#include +#include + + +namespace DB +{ +std::string getInsertQuery(const std::string & db_name, const std::string & table_name, const ColumnsWithTypeAndName & columns, IdentifierQuotingStyle quoting) +{ + ASTInsertQuery query; + query.table_id.database_name = db_name; + query.table_id.table_name = table_name; + query.columns = std::make_shared(','); + query.children.push_back(query.columns); + for (const auto & column : columns) + query.columns->children.emplace_back(std::make_shared(column.name)); + + WriteBufferFromOwnString buf; + IAST::FormatSettings settings(buf, true); + settings.always_quote_identifiers = true; + settings.identifier_quoting_style = quoting; + query.IAST::format(settings); + return buf.str(); +} +} diff --git a/src/Parsers/getInsertQuery.h b/src/Parsers/getInsertQuery.h new file mode 100644 index 00000000000..0bcb5e3660b --- /dev/null +++ b/src/Parsers/getInsertQuery.h @@ -0,0 +1,8 @@ +#pragma once +#include +#include + +namespace DB +{ +std::string getInsertQuery(const std::string & db_name, const std::string & table_name, const ColumnsWithTypeAndName & columns, IdentifierQuotingStyle quoting); +} diff --git a/src/Parsers/ya.make b/src/Parsers/ya.make index 3b8a9a19bce..e169b812dff 100644 --- a/src/Parsers/ya.make +++ b/src/Parsers/ya.make @@ -15,6 +15,7 @@ SRCS( ASTColumnsMatcher.cpp ASTColumnsTransformers.cpp ASTConstraintDeclaration.cpp + ASTCreateFunctionQuery.cpp ASTCreateQuery.cpp ASTCreateQuotaQuery.cpp ASTCreateRoleQuery.cpp @@ -25,6 +26,7 @@ SRCS( ASTDictionary.cpp ASTDictionaryAttributeDeclaration.cpp ASTDropAccessEntityQuery.cpp + ASTDropFunctionQuery.cpp ASTDropQuery.cpp ASTExpressionList.cpp ASTFunction.cpp @@ -89,6 +91,7 @@ SRCS( ParserAlterQuery.cpp ParserCase.cpp ParserCheckQuery.cpp + ParserCreateFunctionQuery.cpp ParserCreateQuery.cpp ParserCreateQuotaQuery.cpp ParserCreateRoleQuery.cpp @@ -101,6 +104,7 @@ SRCS( ParserDictionary.cpp ParserDictionaryAttributeDeclaration.cpp ParserDropAccessEntityQuery.cpp + ParserDropFunctionQuery.cpp ParserDropQuery.cpp ParserExplainQuery.cpp ParserExternalDDLQuery.cpp diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index df40a78749b..7ead4d0d419 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -194,7 +194,7 @@ KeeperTCPHandler::KeeperTCPHandler(IServer & server_, const Poco::Net::StreamSoc , server(server_) , log(&Poco::Logger::get("NuKeeperTCPHandler")) , global_context(Context::createCopy(server.context())) - , keeper_dispatcher(global_context->getKeeperStorageDispatcher()) + , keeper_dispatcher(global_context->getKeeperDispatcher()) , operation_timeout(0, global_context->getConfigRef().getUInt("keeper_server.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000) , session_timeout(0, global_context->getConfigRef().getUInt("keeper_server.session_timeout_ms", Coordination::DEFAULT_SESSION_TIMEOUT_MS) * 1000) , poll_wrapper(std::make_unique(socket_)) diff --git a/src/Server/KeeperTCPHandler.h b/src/Server/KeeperTCPHandler.h index 76371ed1a0c..7abfb72c846 100644 --- a/src/Server/KeeperTCPHandler.h +++ b/src/Server/KeeperTCPHandler.h @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include @@ -38,7 +38,7 @@ private: IServer & server; Poco::Logger * log; ContextPtr global_context; - std::shared_ptr keeper_dispatcher; + std::shared_ptr keeper_dispatcher; Poco::Timespan operation_timeout; Poco::Timespan session_timeout; int64_t session_id{-1}; diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index c91d60c5de7..2f46543b03c 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -287,7 +287,7 @@ static void injectVirtualColumnsImpl( { ColumnPtr column; if (rows) - column = DataTypeUUID().createColumnConst(rows, task->data_part->uuid)->convertToFullColumnIfConst(); + column = DataTypeUUID().createColumnConst(rows, part->uuid)->convertToFullColumnIfConst(); else column = DataTypeUUID().createColumn(); @@ -306,7 +306,7 @@ static void injectVirtualColumnsImpl( else if (virtual_column_name == "_partition_value") { if (rows) - inserter.insertPartitionValueColumn(rows, task->data_part->partition.value, partition_value_type, virtual_column_name); + inserter.insertPartitionValueColumn(rows, part->partition.value, partition_value_type, virtual_column_name); else inserter.insertPartitionValueColumn(rows, {}, partition_value_type, virtual_column_name); } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 764f5d7adf7..bdb3471fb01 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -757,16 +757,20 @@ DataTypePtr MergeTreeData::getPartitionValueType() const } -Block MergeTreeData::getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool one_part) const +Block MergeTreeData::getSampleBlockWithVirtualColumns() const { DataTypePtr partition_value_type = getPartitionValueType(); - bool has_partition_value = typeid_cast(partition_value_type.get()); - Block block{ + return { ColumnWithTypeAndName(ColumnString::create(), std::make_shared(), "_part"), ColumnWithTypeAndName(ColumnString::create(), std::make_shared(), "_partition_id"), ColumnWithTypeAndName(ColumnUUID::create(), std::make_shared(), "_part_uuid"), ColumnWithTypeAndName(partition_value_type->createColumn(), partition_value_type, "_partition_value")}; +} + +Block MergeTreeData::getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool one_part) const +{ + auto block = getSampleBlockWithVirtualColumns(); MutableColumns columns = block.mutateColumns(); auto & part_column = columns[0]; @@ -774,6 +778,7 @@ Block MergeTreeData::getBlockWithVirtualPartColumns(const MergeTreeData::DataPar auto & part_uuid_column = columns[2]; auto & partition_value_column = columns[3]; + bool has_partition_value = typeid_cast(partition_value_column.get()); for (const auto & part_or_projection : parts) { const auto * part = part_or_projection->isProjectionPart() ? part_or_projection->getParentPart() : part_or_projection.get(); @@ -3465,7 +3470,7 @@ MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVector( { for (const auto & part : range) { - for (const auto & [p_name, projection_part] : part->getProjectionParts()) + for (const auto & [_, projection_part] : part->getProjectionParts()) res.push_back(projection_part); } } @@ -4151,6 +4156,10 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( if (auto * select = query_ptr->as(); select && select->final()) return false; + // Currently projections don't support sampling yet. + if (settings.parallel_replicas_count > 1) + return false; + InterpreterSelectQuery select( query_ptr, query_context, SelectQueryOptions{QueryProcessingStage::WithMergeableState}.ignoreProjections().ignoreAlias()); const auto & analysis_result = select.getAnalysisResult(); @@ -4194,13 +4203,13 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( candidate.remove_where_filter = analysis_result.remove_where_filter; candidate.before_where = analysis_result.before_where->clone(); - required_columns = candidate.before_where->foldActionsByProjection( + auto new_required_columns = candidate.before_where->foldActionsByProjection( required_columns, projection.sample_block_for_keys, candidate.where_column_name); - - if (required_columns.empty()) + if (new_required_columns.empty() && !required_columns.empty()) return false; + required_columns = std::move(new_required_columns); candidate.before_where->addAggregatesViaProjection(aggregates); } @@ -4214,33 +4223,35 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( for (const auto & column : prewhere_actions->getResultColumns()) required_columns.erase(column.name); - // Prewhere_action should not add missing keys. - prewhere_required_columns = prewhere_actions->foldActionsByProjection( - prewhere_required_columns, projection.sample_block_for_keys, candidate.prewhere_info->prewhere_column_name, false); - - if (prewhere_required_columns.empty()) - return false; - candidate.prewhere_info->prewhere_actions = prewhere_actions; + { + // Prewhere_action should not add missing keys. + auto new_prewhere_required_columns = prewhere_actions->foldActionsByProjection( + prewhere_required_columns, projection.sample_block_for_keys, candidate.prewhere_info->prewhere_column_name, false); + if (new_prewhere_required_columns.empty() && !prewhere_required_columns.empty()) + return false; + prewhere_required_columns = std::move(new_prewhere_required_columns); + candidate.prewhere_info->prewhere_actions = prewhere_actions; + } if (candidate.prewhere_info->row_level_filter) { auto row_level_filter_actions = candidate.prewhere_info->row_level_filter->clone(); - prewhere_required_columns = row_level_filter_actions->foldActionsByProjection( + auto new_prewhere_required_columns = row_level_filter_actions->foldActionsByProjection( prewhere_required_columns, projection.sample_block_for_keys, candidate.prewhere_info->row_level_column_name, false); - - if (prewhere_required_columns.empty()) + if (new_prewhere_required_columns.empty() && !prewhere_required_columns.empty()) return false; + prewhere_required_columns = std::move(new_prewhere_required_columns); candidate.prewhere_info->row_level_filter = row_level_filter_actions; } if (candidate.prewhere_info->alias_actions) { auto alias_actions = candidate.prewhere_info->alias_actions->clone(); - prewhere_required_columns + auto new_prewhere_required_columns = alias_actions->foldActionsByProjection(prewhere_required_columns, projection.sample_block_for_keys, {}, false); - - if (prewhere_required_columns.empty()) + if (new_prewhere_required_columns.empty() && !prewhere_required_columns.empty()) return false; + prewhere_required_columns = std::move(new_prewhere_required_columns); candidate.prewhere_info->alias_actions = alias_actions; } required_columns.insert(prewhere_required_columns.begin(), prewhere_required_columns.end()); @@ -4259,11 +4270,20 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( return match; }; - for (const auto & projection : metadata_snapshot->projections) + auto virtual_block = getSampleBlockWithVirtualColumns(); + auto add_projection_candidate = [&](const ProjectionDescription & projection) { ProjectionCandidate candidate{}; candidate.desc = &projection; + auto sample_block = projection.sample_block; + auto sample_block_for_keys = projection.sample_block_for_keys; + for (const auto & column : virtual_block) + { + sample_block.insertUnique(column); + sample_block_for_keys.insertUnique(column); + } + if (projection.type == ProjectionDescription::Type::Aggregate && analysis_result.need_aggregate && can_use_aggregate_projection) { bool match = true; @@ -4271,7 +4291,7 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( // Let's first check if all aggregates are provided by current projection for (const auto & aggregate : select.getQueryAnalyzer()->aggregates()) { - const auto * column = projection.sample_block.findByName(aggregate.column_name); + const auto * column = sample_block.findByName(aggregate.column_name); if (column) { aggregates.insert(*column); @@ -4284,25 +4304,25 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( } if (!match) - continue; + return; // Check if all aggregation keys can be either provided by some action, or by current // projection directly. Reshape the `before_aggregation` action DAG so that it only - // needs to provide aggregation keys, and certain children DAG might be substituted by - // some keys in projection. + // needs to provide aggregation keys, and the DAG of certain child might be substituted + // by some keys in projection. candidate.before_aggregation = analysis_result.before_aggregation->clone(); - auto required_columns = candidate.before_aggregation->foldActionsByProjection(keys, projection.sample_block_for_keys); + auto required_columns = candidate.before_aggregation->foldActionsByProjection(keys, sample_block_for_keys); // TODO Let's find out the exact required_columns for keys. if (required_columns.empty() && (!keys.empty() && !candidate.before_aggregation->getRequiredColumns().empty())) - continue; + return; if (analysis_result.optimize_aggregation_in_order) { for (const auto & key : keys) { auto actions_dag = analysis_result.before_aggregation->clone(); - actions_dag->foldActionsByProjection({key}, projection.sample_block_for_keys); + actions_dag->foldActionsByProjection({key}, sample_block_for_keys); candidate.group_by_elements_actions.emplace_back(std::make_shared(actions_dag, actions_settings)); } } @@ -4311,7 +4331,7 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( candidate.before_aggregation->reorderAggregationKeysForProjection(key_name_pos_map); candidate.before_aggregation->addAggregatesViaProjection(aggregates); - if (rewrite_before_where(candidate, projection, required_columns, projection.sample_block_for_keys, aggregates)) + if (rewrite_before_where(candidate, projection, required_columns, sample_block_for_keys, aggregates)) { candidate.required_columns = {required_columns.begin(), required_columns.end()}; for (const auto & aggregate : aggregates) @@ -4328,13 +4348,16 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( for (const auto & column : actions->getRequiredColumns()) required_columns.insert(column.name); - if (rewrite_before_where(candidate, projection, required_columns, projection.sample_block, {})) + if (rewrite_before_where(candidate, projection, required_columns, sample_block, {})) { candidate.required_columns = {required_columns.begin(), required_columns.end()}; candidates.push_back(std::move(candidate)); } } - } + }; + + for (const auto & projection : metadata_snapshot->projections) + add_projection_candidate(projection); // Let's select the best projection to execute the query. if (!candidates.empty()) @@ -4409,6 +4432,14 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( if (!selected_candidate) return false; + else if (min_sum_marks == 0) + { + /// If selected_projection indicated an empty result set. Remember it in query_info but + /// don't use projection to run the query, because projection pipeline with empty result + /// set will not work correctly with empty_result_for_aggregation_by_empty_set. + query_info.merge_tree_empty_result = true; + return false; + } if (selected_candidate->desc->type == ProjectionDescription::Type::Aggregate) { diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 05d1b45a557..ef5f22ed096 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -795,6 +795,9 @@ public: /// Return the partition expression types as a Tuple type. Return DataTypeUInt8 if partition expression is empty. DataTypePtr getPartitionValueType() const; + /// Construct a sample block of virtual columns. + Block getSampleBlockWithVirtualColumns() const; + /// Construct a block consisting only of possible virtual columns for part pruning. /// If one_part is true, fill in at most one part. Block getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool one_part) const; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 84be8012509..00a599af9c3 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -88,6 +89,7 @@ void FutureMergedMutatedPart::assign(MergeTreeData::DataPartsVector parts_) future_part_type = std::min(future_part_type, part->getType()); } + /// NOTE: We don't support merging into an in-memory part yet. auto chosen_type = parts_.front()->storage.choosePartTypeOnDisk(sum_bytes_uncompressed, sum_rows); future_part_type = std::min(future_part_type, chosen_type); assign(std::move(parts_), future_part_type); @@ -493,7 +495,6 @@ static void extractMergingAndGatheringColumns( const NamesAndTypesList & storage_columns, const ExpressionActionsPtr & sorting_key_expr, const IndicesDescription & indexes, - const ProjectionsDescription & projections, const MergeTreeData::MergingParams & merging_params, NamesAndTypesList & gathering_columns, Names & gathering_column_names, NamesAndTypesList & merging_columns, Names & merging_column_names) @@ -507,13 +508,6 @@ static void extractMergingAndGatheringColumns( std::inserter(key_columns, key_columns.end())); } - for (const auto & projection : projections) - { - Names projection_columns_vec = projection.required_columns; - std::copy(projection_columns_vec.cbegin(), projection_columns_vec.cend(), - std::inserter(key_columns, key_columns.end())); - } - /// Force sign column for Collapsing mode if (merging_params.mode == MergeTreeData::MergingParams::Collapsing) key_columns.emplace(merging_params.sign_column); @@ -727,7 +721,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor storage_columns, metadata_snapshot->getSortingKey().expression, metadata_snapshot->getSecondaryIndices(), - metadata_snapshot->getProjections(), merging_params, gathering_columns, gathering_column_names, @@ -1288,10 +1281,10 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor auto mrk_extension = source_part->index_granularity_info.is_adaptive ? getAdaptiveMrkExtension(new_data_part->getType()) : getNonAdaptiveMrkExtension(); bool need_sync = needSyncPart(source_part->rows_count, source_part->getBytesOnDisk(), *data_settings); - bool need_remove_expired_values = false; + auto execute_ttl_type = ExecuteTTLType::NONE; - if (in && shouldExecuteTTL(metadata_snapshot, interpreter->getColumnDependencies(), commands_for_part)) - need_remove_expired_values = true; + if (in) + execute_ttl_type = shouldExecuteTTL(metadata_snapshot, interpreter->getColumnDependencies()); /// All columns from part are changed and may be some more that were missing before in part /// TODO We can materialize compact part without copying data @@ -1319,7 +1312,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor time_of_mutation, compression_codec, merge_entry, - need_remove_expired_values, + execute_ttl_type, need_sync, space_reservation, holder, @@ -1356,7 +1349,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor return data.cloneAndLoadDataPartOnSameDisk(source_part, "tmp_clone_", future_part.part_info, metadata_snapshot); } - if (need_remove_expired_values) + if (execute_ttl_type != ExecuteTTLType::NONE) files_to_skip.insert("ttl.txt"); disk->createDirectories(new_part_tmp_path); @@ -1416,7 +1409,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor time_of_mutation, compression_codec, merge_entry, - need_remove_expired_values, + execute_ttl_type, need_sync, space_reservation, holder, @@ -1437,7 +1430,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor } } - finalizeMutatedPart(source_part, new_data_part, need_remove_expired_values, compression_codec); + finalizeMutatedPart(source_part, new_data_part, execute_ttl_type, compression_codec); } return new_data_part; @@ -1984,21 +1977,22 @@ std::set MergeTreeDataMergerMutator::getProjectionsToRec return projections_to_recalc; } -bool MergeTreeDataMergerMutator::shouldExecuteTTL( - const StorageMetadataPtr & metadata_snapshot, const ColumnDependencies & dependencies, const MutationCommands & commands) +ExecuteTTLType MergeTreeDataMergerMutator::shouldExecuteTTL(const StorageMetadataPtr & metadata_snapshot, const ColumnDependencies & dependencies) { if (!metadata_snapshot->hasAnyTTL()) - return false; + return ExecuteTTLType::NONE; - for (const auto & command : commands) - if (command.type == MutationCommand::MATERIALIZE_TTL) - return true; + bool has_ttl_expression = false; for (const auto & dependency : dependencies) - if (dependency.kind == ColumnDependency::TTL_EXPRESSION || dependency.kind == ColumnDependency::TTL_TARGET) - return true; + { + if (dependency.kind == ColumnDependency::TTL_EXPRESSION) + has_ttl_expression = true; - return false; + if (dependency.kind == ColumnDependency::TTL_TARGET) + return ExecuteTTLType::NORMAL; + } + return has_ttl_expression ? ExecuteTTLType::RECALCULATE : ExecuteTTLType::NONE; } // 1. get projection pipeline and a sink to write parts @@ -2021,10 +2015,19 @@ void MergeTreeDataMergerMutator::writeWithProjections( std::map projection_parts; Block block; std::vector projection_squashes; + const auto & settings = context->getSettingsRef(); for (size_t i = 0, size = projections_to_build.size(); i < size; ++i) { - projection_squashes.emplace_back(65536, 65536 * 256); + // If the parent part is an in-memory part, squash projection output into one block and + // build in-memory projection because we don't support merging into a new in-memory part. + // Otherwise we split the materialization into multiple stages similar to the process of + // INSERT SELECT query. + if (new_data_part->getType() == MergeTreeDataPartType::IN_MEMORY) + projection_squashes.emplace_back(0, 0); + else + projection_squashes.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); } + while (checkOperationIsNotCanceled(merge_entry) && (block = mutating_stream->read())) { if (minmax_idx) @@ -2035,26 +2038,10 @@ void MergeTreeDataMergerMutator::writeWithProjections( for (size_t i = 0, size = projections_to_build.size(); i < size; ++i) { const auto & projection = projections_to_build[i]->projection; - auto in = InterpreterSelectQuery( - projection.query_ast, - context, - Pipe(std::make_shared(block, Chunk(block.getColumns(), block.rows()))), - SelectQueryOptions{ - projection.type == ProjectionDescription::Type::Normal ? QueryProcessingStage::FetchColumns : QueryProcessingStage::WithMergeableState}) - .execute() - .getInputStream(); - in = std::make_shared(in, block.rows(), std::numeric_limits::max()); - in->readPrefix(); - auto & projection_squash = projection_squashes[i]; - auto projection_block = projection_squash.add(in->read()); - if (in->read()) - throw Exception("Projection cannot increase the number of rows in a block", ErrorCodes::LOGICAL_ERROR); - in->readSuffix(); + auto projection_block = projection_squashes[i].add(projection.calculate(block, context)); if (projection_block) - { - projection_parts[projection.name].emplace_back( - MergeTreeDataWriter::writeTempProjectionPart(data, log, projection_block, projection, new_data_part.get(), ++block_num)); - } + projection_parts[projection.name].emplace_back(MergeTreeDataWriter::writeTempProjectionPart( + data, log, projection_block, projection, new_data_part.get(), ++block_num)); } merge_entry->rows_written += block.rows(); @@ -2172,7 +2159,7 @@ void MergeTreeDataMergerMutator::mutateAllPartColumns( time_t time_of_mutation, const CompressionCodecPtr & compression_codec, MergeListEntry & merge_entry, - bool need_remove_expired_values, + ExecuteTTLType execute_ttl_type, bool need_sync, const ReservationPtr & space_reservation, TableLockHolder & holder, @@ -2185,9 +2172,12 @@ void MergeTreeDataMergerMutator::mutateAllPartColumns( mutating_stream = std::make_shared( std::make_shared(mutating_stream, data.getPrimaryKeyAndSkipIndicesExpression(metadata_snapshot))); - if (need_remove_expired_values) + if (execute_ttl_type == ExecuteTTLType::NORMAL) mutating_stream = std::make_shared(mutating_stream, data, metadata_snapshot, new_data_part, time_of_mutation, true); + if (execute_ttl_type == ExecuteTTLType::RECALCULATE) + mutating_stream = std::make_shared(mutating_stream, data, metadata_snapshot, new_data_part, time_of_mutation, true); + IMergeTreeDataPart::MinMaxIndex minmax_idx; MergedBlockOutputStream out{ @@ -2229,7 +2219,7 @@ void MergeTreeDataMergerMutator::mutateSomePartColumns( time_t time_of_mutation, const CompressionCodecPtr & compression_codec, MergeListEntry & merge_entry, - bool need_remove_expired_values, + ExecuteTTLType execute_ttl_type, bool need_sync, const ReservationPtr & space_reservation, TableLockHolder & holder, @@ -2238,9 +2228,12 @@ void MergeTreeDataMergerMutator::mutateSomePartColumns( if (mutating_stream == nullptr) throw Exception("Cannot mutate part columns with uninitialized mutations stream. It's a bug", ErrorCodes::LOGICAL_ERROR); - if (need_remove_expired_values) + if (execute_ttl_type == ExecuteTTLType::NORMAL) mutating_stream = std::make_shared(mutating_stream, data, metadata_snapshot, new_data_part, time_of_mutation, true); + if (execute_ttl_type == ExecuteTTLType::RECALCULATE) + mutating_stream = std::make_shared(mutating_stream, data, metadata_snapshot, new_data_part, time_of_mutation, true); + IMergedBlockOutputStream::WrittenOffsetColumns unused_written_offsets; MergedColumnOnlyOutputStream out( new_data_part, @@ -2279,7 +2272,7 @@ void MergeTreeDataMergerMutator::mutateSomePartColumns( void MergeTreeDataMergerMutator::finalizeMutatedPart( const MergeTreeDataPartPtr & source_part, MergeTreeData::MutableDataPartPtr new_data_part, - bool need_remove_expired_values, + ExecuteTTLType execute_ttl_type, const CompressionCodecPtr & codec) { auto disk = new_data_part->volume->getDisk(); @@ -2293,7 +2286,7 @@ void MergeTreeDataMergerMutator::finalizeMutatedPart( new_data_part->checksums.files[IMergeTreeDataPart::UUID_FILE_NAME].file_hash = out_hashing.getHash(); } - if (need_remove_expired_values) + if (execute_ttl_type != ExecuteTTLType::NONE) { /// Write a file with ttl infos in json format. auto out_ttl = disk->writeFile(fs::path(new_data_part->getFullRelativePath()) / "ttl.txt", 4096); diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index ca7376d8f3e..3a0041e4a37 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -23,6 +23,13 @@ enum class SelectPartsDecision NOTHING_TO_MERGE = 2, }; +enum class ExecuteTTLType +{ + NONE = 0, + NORMAL = 1, + RECALCULATE= 2, +}; + /// Auxiliary struct holding metainformation for the future merged or mutated part. struct FutureMergedMutatedPart { @@ -200,8 +207,7 @@ private: const ProjectionsDescription & all_projections, const MutationCommands & commands_for_removes); - static bool shouldExecuteTTL( - const StorageMetadataPtr & metadata_snapshot, const ColumnDependencies & dependencies, const MutationCommands & commands); + static ExecuteTTLType shouldExecuteTTL(const StorageMetadataPtr & metadata_snapshot, const ColumnDependencies & dependencies); /// Return set of indices which should be recalculated during mutation also /// wraps input stream into additional expression stream @@ -242,7 +248,7 @@ private: time_t time_of_mutation, const CompressionCodecPtr & compression_codec, MergeListEntry & merge_entry, - bool need_remove_expired_values, + ExecuteTTLType execute_ttl_type, bool need_sync, const ReservationPtr & space_reservation, TableLockHolder & holder, @@ -260,7 +266,7 @@ private: time_t time_of_mutation, const CompressionCodecPtr & compression_codec, MergeListEntry & merge_entry, - bool need_remove_expired_values, + ExecuteTTLType execute_ttl_type, bool need_sync, const ReservationPtr & space_reservation, TableLockHolder & holder, @@ -271,7 +277,7 @@ private: static void finalizeMutatedPart( const MergeTreeDataPartPtr & source_part, MergeTreeData::MutableDataPartPtr new_data_part, - bool need_remove_expired_values, + ExecuteTTLType execute_ttl_type, const CompressionCodecPtr & codec); public : diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index e929bfc6862..635da7e2ede 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -94,6 +94,42 @@ void MergeTreeDataPartInMemory::flushToDisk(const String & base_path, const Stri MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, indices, compression_codec); out.writePrefix(); out.write(block); + const auto & projections = metadata_snapshot->getProjections(); + for (const auto & [projection_name, projection] : projection_parts) + { + if (projections.has(projection_name)) + { + String projection_destination_path = fs::path(destination_path) / projection_name / ".proj"; + if (disk->exists(projection_destination_path)) + { + throw Exception( + ErrorCodes::DIRECTORY_ALREADY_EXISTS, + "Could not flush projection part {}. Projection part in {} already exists", + projection_name, + fullPath(disk, projection_destination_path)); + } + + auto projection_part = asInMemoryPart(projection); + auto projection_type = storage.choosePartTypeOnDisk(projection_part->block.bytes(), rows_count); + MergeTreePartInfo projection_info("all", 0, 0, 0); + auto projection_data_part + = storage.createPart(projection_name, projection_type, projection_info, volume, projection_name + ".proj", parent_part); + projection_data_part->is_temp = false; // clean up will be done on parent part + projection_data_part->setColumns(projection->getColumns()); + + disk->createDirectories(projection_destination_path); + const auto & desc = projections.get(name); + auto projection_compression_codec = storage.getContext()->chooseCompressionCodec(0, 0); + auto projection_indices = MergeTreeIndexFactory::instance().getMany(desc.metadata->getSecondaryIndices()); + MergedBlockOutputStream projection_out( + projection_data_part, desc.metadata, projection_part->columns, projection_indices, projection_compression_codec); + projection_out.writePrefix(); + projection_out.write(projection_part->block); + projection_out.writeSuffixAndFinalizePart(projection_data_part); + new_data_part->addProjectionPart(projection_name, std::move(projection_data_part)); + } + } + out.writeSuffixAndFinalizePart(new_data_part); } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index f5c1890154a..004eaa6254c 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -132,6 +132,9 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( QueryProcessingStage::Enum processed_stage, std::shared_ptr max_block_numbers_to_read) const { + if (query_info.merge_tree_empty_result) + return std::make_unique(); + const auto & settings = context->getSettingsRef(); if (!query_info.projection) { @@ -181,7 +184,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( max_block_numbers_to_read, query_info.projection->merge_tree_projection_select_result_ptr); - if (plan) + if (plan->isInitialized()) { // If `before_where` is not empty, transform input blocks by adding needed columns // originated from key columns. We already project the block at the end, using @@ -237,7 +240,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( ordinary_query_plan.addStep(std::move(where_step)); } - ordinary_pipe = QueryPipeline::getPipe(interpreter.execute().pipeline); + ordinary_pipe = ordinary_query_plan.convertToPipe( + QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); } if (query_info.projection->desc->type == ProjectionDescription::Type::Aggregate) @@ -351,12 +355,14 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( pipes.emplace_back(std::move(projection_pipe)); pipes.emplace_back(std::move(ordinary_pipe)); auto pipe = Pipe::unitePipes(std::move(pipes)); - pipe.resize(1); + auto plan = std::make_unique(); + if (pipe.empty()) + return plan; + pipe.resize(1); auto step = std::make_unique( std::move(pipe), fmt::format("MergeTree(with {} projection {})", query_info.projection->desc->type, query_info.projection->desc->name)); - auto plan = std::make_unique(); plan->addStep(std::move(step)); return plan; } diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 0b05650b42c..180c18ed1b5 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -386,31 +386,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart( sync_guard = disk->getDirectorySyncGuard(full_path); } - if (metadata_snapshot->hasProjections()) - { - for (const auto & projection : metadata_snapshot->getProjections()) - { - auto in = InterpreterSelectQuery( - projection.query_ast, - context, - Pipe(std::make_shared(block, Chunk(block.getColumns(), block.rows()))), - SelectQueryOptions{ - projection.type == ProjectionDescription::Type::Normal ? QueryProcessingStage::FetchColumns : QueryProcessingStage::WithMergeableState}) - .execute() - .getInputStream(); - in = std::make_shared(in, block.rows(), std::numeric_limits::max()); - in->readPrefix(); - auto projection_block = in->read(); - if (in->read()) - throw Exception("Projection cannot grow block rows", ErrorCodes::LOGICAL_ERROR); - in->readSuffix(); - if (projection_block.rows()) - { - new_data_part->addProjectionPart(projection.name, writeProjectionPart(projection_block, projection, new_data_part.get())); - } - } - } - if (metadata_snapshot->hasRowsTTL()) updateTTL(metadata_snapshot->getRowsTTL(), new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true); @@ -439,6 +414,14 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart( out.writePrefix(); out.writeWithPermutation(block, perm_ptr); + + for (const auto & projection : metadata_snapshot->getProjections()) + { + auto projection_block = projection.calculate(block, context); + if (projection_block.rows()) + new_data_part->addProjectionPart( + projection.name, writeProjectionPart(data, log, projection_block, projection, new_data_part.get())); + } out.writeSuffixAndFinalizePart(new_data_part, sync_on_insert); ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterRows, block.rows()); @@ -449,18 +432,28 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart( } MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeProjectionPartImpl( - MergeTreeData & data, + const String part_name, + MergeTreeDataPartType part_type, + const String & relative_path, + bool is_temp, + const IMergeTreeDataPart * parent_part, + const MergeTreeData & data, Poco::Logger * log, Block block, - const StorageMetadataPtr & metadata_snapshot, - MergeTreeData::MutableDataPartPtr && new_data_part) + const StorageMetadataPtr & metadata_snapshot) { + MergeTreePartInfo new_part_info("all", 0, 0, 0); + auto new_data_part = data.createPart( + part_name, + part_type, + new_part_info, + parent_part->volume, + relative_path, + parent_part); + new_data_part->is_temp = is_temp; + NamesAndTypesList columns = metadata_snapshot->getColumns().getAllPhysical().filter(block.getNames()); - MergeTreePartition partition{}; - IMergeTreeDataPart::MinMaxIndex minmax_idx{}; new_data_part->setColumns(columns); - new_data_part->partition = std::move(partition); - new_data_part->minmax_idx = std::move(minmax_idx); if (new_data_part->isStoredOnDisk()) { @@ -523,27 +516,41 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeProjectionPartImpl( ProfileEvents::increment(ProfileEvents::MergeTreeDataProjectionWriterUncompressedBytes, block.bytes()); ProfileEvents::increment(ProfileEvents::MergeTreeDataProjectionWriterCompressedBytes, new_data_part->getBytesOnDisk()); - return std::move(new_data_part); + return new_data_part; } -MergeTreeData::MutableDataPartPtr -MergeTreeDataWriter::writeProjectionPart(Block block, const ProjectionDescription & projection, const IMergeTreeDataPart * parent_part) +MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeProjectionPart( + MergeTreeData & data, Poco::Logger * log, Block block, const ProjectionDescription & projection, const IMergeTreeDataPart * parent_part) { - /// Size of part would not be greater than block.bytes() + epsilon - size_t expected_size = block.bytes(); - - // just check if there is enough space on parent volume - data.reserveSpace(expected_size, parent_part->volume); - String part_name = projection.name; - MergeTreePartInfo new_part_info("all", 0, 0, 0); - auto new_data_part = data.createPart( - part_name, data.choosePartType(expected_size, block.rows()), new_part_info, parent_part->volume, part_name + ".proj", parent_part); - new_data_part->is_temp = false; // clean up will be done on parent part + MergeTreeDataPartType part_type; + if (parent_part->getType() == MergeTreeDataPartType::IN_MEMORY) + { + part_type = MergeTreeDataPartType::IN_MEMORY; + } + else + { + /// Size of part would not be greater than block.bytes() + epsilon + size_t expected_size = block.bytes(); + // just check if there is enough space on parent volume + data.reserveSpace(expected_size, parent_part->volume); + part_type = data.choosePartTypeOnDisk(expected_size, block.rows()); + } - return writeProjectionPartImpl(data, log, block, projection.metadata, std::move(new_data_part)); + return writeProjectionPartImpl( + part_name, + part_type, + part_name + ".proj" /* relative_path */, + false /* is_temp */, + parent_part, + data, + log, + block, + projection.metadata); } +/// This is used for projection materialization process which may contain multiple stages of +/// projection part merges. MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempProjectionPart( MergeTreeData & data, Poco::Logger * log, @@ -552,24 +559,50 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempProjectionPart( const IMergeTreeDataPart * parent_part, size_t block_num) { - /// Size of part would not be greater than block.bytes() + epsilon - size_t expected_size = block.bytes(); - - // just check if there is enough space on parent volume - data.reserveSpace(expected_size, parent_part->volume); - String part_name = fmt::format("{}_{}", projection.name, block_num); - MergeTreePartInfo new_part_info("all", 0, 0, 0); - auto new_data_part = data.createPart( + MergeTreeDataPartType part_type; + if (parent_part->getType() == MergeTreeDataPartType::IN_MEMORY) + { + part_type = MergeTreeDataPartType::IN_MEMORY; + } + else + { + /// Size of part would not be greater than block.bytes() + epsilon + size_t expected_size = block.bytes(); + // just check if there is enough space on parent volume + data.reserveSpace(expected_size, parent_part->volume); + part_type = data.choosePartTypeOnDisk(expected_size, block.rows()); + } + + return writeProjectionPartImpl( part_name, - data.choosePartType(expected_size, block.rows()), - new_part_info, - parent_part->volume, - "tmp_insert_" + part_name + ".proj", - parent_part); - new_data_part->is_temp = true; // It's part for merge + part_type, + "tmp_insert_" + part_name + ".proj" /* relative_path */, + true /* is_temp */, + parent_part, + data, + log, + block, + projection.metadata); +} - return writeProjectionPartImpl(data, log, block, projection.metadata, std::move(new_data_part)); +MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeInMemoryProjectionPart( + const MergeTreeData & data, + Poco::Logger * log, + Block block, + const ProjectionDescription & projection, + const IMergeTreeDataPart * parent_part) +{ + return writeProjectionPartImpl( + projection.name, + MergeTreeDataPartType::IN_MEMORY, + projection.name + ".proj" /* relative_path */, + false /* is_temp */, + parent_part, + data, + log, + block, + projection.metadata); } } diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index feb2f1e2b12..006f897c3e2 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -49,9 +49,15 @@ public: MergeTreeData::MutableDataPartPtr writeTempPart(BlockWithPartition & block, const StorageMetadataPtr & metadata_snapshot, ContextPtr context); - MergeTreeData::MutableDataPartPtr writeProjectionPart( - Block block, const ProjectionDescription & projection, const IMergeTreeDataPart * parent_part); + /// For insertion. + static MergeTreeData::MutableDataPartPtr writeProjectionPart( + MergeTreeData & data, + Poco::Logger * log, + Block block, + const ProjectionDescription & projection, + const IMergeTreeDataPart * parent_part); + /// For mutation: MATERIALIZE PROJECTION. static MergeTreeData::MutableDataPartPtr writeTempProjectionPart( MergeTreeData & data, Poco::Logger * log, @@ -60,15 +66,27 @@ public: const IMergeTreeDataPart * parent_part, size_t block_num); + /// For WriteAheadLog AddPart. + static MergeTreeData::MutableDataPartPtr writeInMemoryProjectionPart( + const MergeTreeData & data, + Poco::Logger * log, + Block block, + const ProjectionDescription & projection, + const IMergeTreeDataPart * parent_part); + Block mergeBlock(const Block & block, SortDescription sort_description, Names & partition_key_columns, IColumn::Permutation *& permutation); private: static MergeTreeData::MutableDataPartPtr writeProjectionPartImpl( - MergeTreeData & data, + const String part_name, + MergeTreeDataPartType part_type, + const String & relative_path, + bool is_temp, + const IMergeTreeDataPart * parent_part, + const MergeTreeData & data, Poco::Logger * log, Block block, - const StorageMetadataPtr & metadata_snapshot, - MergeTreeData::MutableDataPartPtr && new_data_part); + const StorageMetadataPtr & metadata_snapshot); MergeTreeData & data; diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 9a198500447..890cfca8d71 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -117,6 +117,7 @@ struct Settings; M(Int64, merge_with_ttl_timeout, 3600 * 4, "Minimal time in seconds, when merge with delete TTL can be repeated.", 0) \ M(Int64, merge_with_recompression_ttl_timeout, 3600 * 4, "Minimal time in seconds, when merge with recompression TTL can be repeated.", 0) \ M(Bool, ttl_only_drop_parts, false, "Only drop altogether the expired parts and not partially prune them.", 0) \ + M(Bool, materialize_ttl_recalculate_only, false, "Only recalculate ttl info when MATERIALIZE TTL", 0) \ M(Bool, write_final_mark, true, "Write final mark after end of column (0 - disabled, do nothing if index_granularity_bytes=0)", 0) \ M(Bool, enable_mixed_granularity_parts, true, "Enable parts with adaptive and non adaptive granularity", 0) \ M(MaxThreads, max_part_loading_threads, 0, "The number of threads to load data parts at startup.", 0) \ diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 2da20073427..806c861cf00 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -47,8 +47,12 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer( if (!primary_key.column_names.empty()) first_primary_key_column = primary_key.column_names[0]; - for (const auto & [_, size] : column_sizes) - total_size_of_queried_columns += size; + for (const auto & name : queried_columns) + { + auto it = column_sizes.find(name); + if (it != column_sizes.end()) + total_size_of_queried_columns += it->second; + } determineArrayJoinedNames(query_info.query->as()); optimize(query_info.query->as()); diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp index 1fcd28b70e3..d8fb50a866c 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -31,6 +32,7 @@ MergeTreeWriteAheadLog::MergeTreeWriteAheadLog( , name(name_) , path(storage.getRelativeDataPath() + name_) , pool(storage.getContext()->getSchedulePool()) + , log(&Poco::Logger::get(storage.getLogName() + " (WriteAheadLog)")) { init(); sync_task = pool.createTask("MergeTreeWriteAheadLog::sync", [this] @@ -172,8 +174,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(const Stor || e.code() == ErrorCodes::BAD_DATA_PART_NAME || e.code() == ErrorCodes::CORRUPTED_DATA) { - LOG_WARNING(&Poco::Logger::get(storage.getLogName() + " (WriteAheadLog)"), - "WAL file '{}' is broken. {}", path, e.displayText()); + LOG_WARNING(log, "WAL file '{}' is broken. {}", path, e.displayText()); /// If file is broken, do not write new parts to it. /// But if it contains any part rotate and save them. @@ -203,6 +204,15 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(const Stor part_out.writePrefix(); part_out.write(block); + + for (const auto & projection : metadata_snapshot->getProjections()) + { + auto projection_block = projection.calculate(block, context); + if (projection_block.rows()) + part->addProjectionPart( + projection.name, + MergeTreeDataWriter::writeInMemoryProjectionPart(storage, log, projection_block, projection, part.get())); + } part_out.writeSuffixAndFinalizePart(part); min_block_number = std::min(min_block_number, part->info.min_block); diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h index e01911aa8b8..8d1ea3c332e 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h @@ -91,6 +91,8 @@ private: bool sync_scheduled = false; mutable std::mutex write_mutex; + + Poco::Logger * log; }; } diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h index d04df598218..4c36508ebf5 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -34,6 +34,7 @@ public: void writeSuffix() override; /// Finilize writing part and fill inner structures + /// If part is new and contains projections, they should be added before invoking this method. void writeSuffixAndFinalizePart( MergeTreeData::MutableDataPartPtr & new_part, bool sync = false, diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp index 4c187109ac6..c227febbbc2 100644 --- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp +++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp @@ -194,7 +194,7 @@ void PartMovesBetweenShardsOrchestrator::stepEntry(const Entry & entry, zkutil:: /// This wait in background schedule pool is useless. It'd be /// better to have some notification which will call `step` /// function when all replicated will finish. TODO. - storage.waitForAllReplicasToProcessLogEntry(log_entry, true); + storage.waitForAllReplicasToProcessLogEntry(zookeeper_path, log_entry, -1); } { @@ -231,7 +231,7 @@ void PartMovesBetweenShardsOrchestrator::stepEntry(const Entry & entry, zkutil:: String log_znode_path = dynamic_cast(*responses.back()).path_created; log_entry.znode_name = log_znode_path.substr(log_znode_path.find_last_of('/') + 1); - storage.waitForAllTableReplicasToProcessLogEntry(entry.to_shard, log_entry, true); + storage.waitForAllReplicasToProcessLogEntry(entry.to_shard, log_entry, -1); } { @@ -269,7 +269,7 @@ void PartMovesBetweenShardsOrchestrator::stepEntry(const Entry & entry, zkutil:: String log_znode_path = dynamic_cast(*responses.back()).path_created; log_entry.znode_name = log_znode_path.substr(log_znode_path.find_last_of('/') + 1); - storage.waitForAllTableReplicasToProcessLogEntry(entry.to_shard, log_entry, true); + storage.waitForAllReplicasToProcessLogEntry(entry.to_shard, log_entry, -1); } { @@ -318,7 +318,7 @@ void PartMovesBetweenShardsOrchestrator::stepEntry(const Entry & entry, zkutil:: String log_znode_path = dynamic_cast(*responses.back()).path_created; log_entry.znode_name = log_znode_path.substr(log_znode_path.find_last_of('/') + 1); - storage.waitForAllTableReplicasToProcessLogEntry(entry.to_shard, log_entry, true); + storage.waitForAllReplicasToProcessLogEntry(entry.to_shard, log_entry, -1); } { @@ -348,7 +348,7 @@ void PartMovesBetweenShardsOrchestrator::stepEntry(const Entry & entry, zkutil:: { ReplicatedMergeTreeLogEntry log_entry; if (storage.dropPartImpl(zk, entry.part_name, log_entry, false, false)) - storage.waitForAllReplicasToProcessLogEntry(log_entry, true); + storage.waitForAllReplicasToProcessLogEntry(zookeeper_path, log_entry, -1); } { diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index bcce2d990ca..997e6e8bb74 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -73,6 +73,11 @@ public: return storage.getPartitionIDFromQuery(ast, context); } + bool materializeTTLRecalculateOnly() const + { + return parts.front()->storage.getSettings()->materialize_ttl_recalculate_only; + } + protected: /// Used in part mutation. StorageFromMergeTreeDataPart(const MergeTreeData::DataPartPtr & part_) diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index dd48b23ecc3..5fc44bc044f 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -14,6 +14,7 @@ #include #include +#include namespace DB { @@ -23,6 +24,7 @@ namespace ErrorCodes extern const int NO_SUCH_PROJECTION_IN_TABLE; extern const int ILLEGAL_PROJECTION; extern const int NOT_IMPLEMENTED; + extern const int LOGICAL_ERROR; }; const char * ProjectionDescription::typeToString(Type type) @@ -192,6 +194,28 @@ void ProjectionDescription::recalculateWithNewColumns(const ColumnsDescription & *this = getProjectionFromAST(definition_ast, new_columns, query_context); } + +Block ProjectionDescription::calculate(const Block & block, ContextPtr context) const +{ + auto in = InterpreterSelectQuery( + query_ast, + context, + Pipe(std::make_shared(block, Chunk(block.getColumns(), block.rows()))), + SelectQueryOptions{ + type == ProjectionDescription::Type::Normal ? QueryProcessingStage::FetchColumns + : QueryProcessingStage::WithMergeableState}) + .execute() + .getInputStream(); + in = std::make_shared(in, block.rows(), 0); + in->readPrefix(); + auto ret = in->read(); + if (in->read()) + throw Exception("Projection cannot increase the number of rows in a block", ErrorCodes::LOGICAL_ERROR); + in->readSuffix(); + return ret; +} + + String ProjectionsDescription::toString() const { if (empty()) diff --git a/src/Storages/ProjectionsDescription.h b/src/Storages/ProjectionsDescription.h index fd505c4fe06..2b279c711fe 100644 --- a/src/Storages/ProjectionsDescription.h +++ b/src/Storages/ProjectionsDescription.h @@ -85,6 +85,8 @@ struct ProjectionDescription void recalculateWithNewColumns(const ColumnsDescription & new_columns, ContextPtr query_context); bool isPrimaryKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node) const; + + Block calculate(const Block & block, ContextPtr context) const; }; /// All projections in storage diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index a4536e1ff58..a2db6655223 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -163,6 +163,7 @@ struct SelectQueryInfo std::optional projection; bool ignore_projections = false; bool is_projection_query = false; + bool merge_tree_empty_result = false; MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr; }; diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 5183b925141..cbd27afe106 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -214,7 +215,7 @@ bool StorageInMemoryMetadata::hasAnyGroupByTTL() const return !table_ttl.group_by_ttl.empty(); } -ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(const NameSet & updated_columns) const +ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(const NameSet & updated_columns, bool include_ttl_target) const { if (updated_columns.empty()) return {}; @@ -250,7 +251,7 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(const NameSet if (hasRowsTTL()) { auto rows_expression = getRowsTTL().expression; - if (add_dependent_columns(rows_expression, required_ttl_columns)) + if (add_dependent_columns(rows_expression, required_ttl_columns) && include_ttl_target) { /// Filter all columns, if rows TTL expression have to be recalculated. for (const auto & column : getColumns().getAllPhysical()) @@ -263,13 +264,15 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(const NameSet for (const auto & [name, entry] : getColumnTTLs()) { - if (add_dependent_columns(entry.expression, required_ttl_columns)) + if (add_dependent_columns(entry.expression, required_ttl_columns) && include_ttl_target) updated_ttl_columns.insert(name); } for (const auto & entry : getMoveTTLs()) add_dependent_columns(entry.expression, required_ttl_columns); + //TODO what about rows_where_ttl and group_by_ttl ?? + for (const auto & column : indices_columns) res.emplace(column, ColumnDependency::SKIP_INDEX); for (const auto & column : projections_columns) @@ -493,6 +496,23 @@ namespace return res; } + + /* + * This function checks compatibility of enums. It returns true if: + * 1. Both types are enums. + * 2. The first type can represent all possible values of the second one. + * 3. Both types require the same amount of memory. + */ + bool isCompatibleEnumTypes(const IDataType * lhs, const IDataType * rhs) + { + if (IDataTypeEnum const * enum_type = dynamic_cast(lhs)) + { + if (!enum_type->contains(*rhs)) + return false; + return enum_type->getMaximumSizeOfValueInMemory() == rhs->getMaximumSizeOfValueInMemory(); + } + return false; + } } void StorageInMemoryMetadata::check(const Names & column_names, const NamesAndTypesList & virtuals, const StorageID & storage_id) const @@ -544,12 +564,13 @@ void StorageInMemoryMetadata::check(const NamesAndTypesList & provided_columns) column.name, listOfColumns(available_columns)); - if (!column.type->equals(*it->getMapped())) + const auto * available_type = it->getMapped(); + if (!column.type->equals(*available_type) && !isCompatibleEnumTypes(available_type, column.type.get())) throw Exception( ErrorCodes::TYPE_MISMATCH, "Type mismatch for column {}. Column has type {}, got type {}", column.name, - it->getMapped()->getName(), + available_type->getName(), column.type->getName()); if (unique_names.end() != unique_names.find(column.name)) @@ -588,16 +609,16 @@ void StorageInMemoryMetadata::check(const NamesAndTypesList & provided_columns, name, listOfColumns(available_columns)); - const auto & provided_column_type = *it->getMapped(); - const auto & available_column_type = *jt->getMapped(); + const auto * provided_column_type = it->getMapped(); + const auto * available_column_type = jt->getMapped(); - if (!provided_column_type.equals(available_column_type)) + if (!provided_column_type->equals(*available_column_type) && !isCompatibleEnumTypes(available_column_type, provided_column_type)) throw Exception( ErrorCodes::TYPE_MISMATCH, "Type mismatch for column {}. Column has type {}, got type {}", name, - provided_column_type.getName(), - available_column_type.getName()); + available_column_type->getName(), + provided_column_type->getName()); if (unique_names.end() != unique_names.find(name)) throw Exception(ErrorCodes::COLUMN_QUERIED_MORE_THAN_ONCE, @@ -632,12 +653,13 @@ void StorageInMemoryMetadata::check(const Block & block, bool need_all) const column.name, listOfColumns(available_columns)); - if (!column.type->equals(*it->getMapped())) + const auto * available_type = it->getMapped(); + if (!column.type->equals(*available_type) && !isCompatibleEnumTypes(available_type, column.type.get())) throw Exception( ErrorCodes::TYPE_MISMATCH, "Type mismatch for column {}. Column has type {}, got type {}", column.name, - it->getMapped()->getName(), + available_type->getName(), column.type->getName()); } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index d0d60f608d7..9accdb9b3b6 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -143,7 +143,7 @@ struct StorageInMemoryMetadata /// Returns columns, which will be needed to calculate dependencies (skip /// indices, TTL expressions) if we update @updated_columns set of columns. - ColumnDependencies getColumnDependencies(const NameSet & updated_columns) const; + ColumnDependencies getColumnDependencies(const NameSet & updated_columns, bool include_ttl_target) const; /// Block with ordinary + materialized columns. Block getSampleBlock() const; diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 79bb1f59cc7..ee5bd1eb03d 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -271,7 +271,9 @@ void registerStorageMySQL(StorageFactory & factory) username, password, MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS, mysql_settings.connection_pool_size, - mysql_settings.connection_max_tries); + mysql_settings.connection_max_tries, + args.getContext()->getSettingsRef().external_storage_connect_timeout, + args.getContext()->getSettingsRef().external_storage_rw_timeout); bool replace_query = false; std::string on_duplicate_clause; diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index 603a52b2801..3617e964734 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -29,6 +29,8 @@ #include #include #include +#include +#include namespace DB @@ -47,10 +49,12 @@ StoragePostgreSQL::StoragePostgreSQL( const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, const String & comment, - const String & remote_table_schema_) + const String & remote_table_schema_, + const String & on_conflict_) : IStorage(table_id_) , remote_table_name(remote_table_name_) , remote_table_schema(remote_table_schema_) + , on_conflict(on_conflict_) , pool(std::move(pool_)) { StorageInMemoryMetadata storage_metadata; @@ -94,17 +98,22 @@ Pipe StoragePostgreSQL::read( class PostgreSQLSink : public SinkToStorage { + +using Row = std::vector>; + public: explicit PostgreSQLSink( const StorageMetadataPtr & metadata_snapshot_, postgres::ConnectionHolderPtr connection_holder_, const String & remote_table_name_, - const String & remote_table_schema_) + const String & remote_table_schema_, + const String & on_conflict_) : SinkToStorage(metadata_snapshot_->getSampleBlock()) , metadata_snapshot(metadata_snapshot_) , connection_holder(std::move(connection_holder_)) , remote_table_name(remote_table_name_) , remote_table_schema(remote_table_schema_) + , on_conflict(on_conflict_) { } @@ -113,11 +122,21 @@ public: void consume(Chunk chunk) override { auto block = getPort().getHeader().cloneWithColumns(chunk.detachColumns()); + if (!inserter) - inserter = std::make_unique(connection_holder->get(), - remote_table_schema.empty() ? pqxx::table_path({remote_table_name}) - : pqxx::table_path({remote_table_schema, remote_table_name}), - block.getNames()); + { + if (on_conflict.empty()) + { + inserter = std::make_unique(connection_holder->get(), + remote_table_schema.empty() ? pqxx::table_path({remote_table_name}) + : pqxx::table_path({remote_table_schema, remote_table_name}), block.getNames()); + } + else + { + inserter = std::make_unique(connection_holder->get(), remote_table_name, + remote_table_schema, block.getColumnsWithTypeAndName(), on_conflict); + } + } const auto columns = block.getColumns(); const size_t num_rows = block.rows(), num_cols = block.columns(); @@ -151,7 +170,7 @@ public: } } - inserter->stream.write_values(row); + inserter->insert(row); } } @@ -268,37 +287,92 @@ public: } private: - struct StreamTo + struct Inserter { + pqxx::connection & connection; pqxx::work tx; + + explicit Inserter(pqxx::connection & connection_) + : connection(connection_) + , tx(connection) {} + + virtual ~Inserter() = default; + + virtual void insert(const Row & row) = 0; + virtual void complete() = 0; + }; + + struct StreamTo : Inserter + { Names columns; pqxx::stream_to stream; - StreamTo(pqxx::connection & connection, pqxx::table_path table_, Names columns_) - : tx(connection) + StreamTo(pqxx::connection & connection_, pqxx::table_path table_, Names columns_) + : Inserter(connection_) , columns(std::move(columns_)) , stream(pqxx::stream_to::raw_table(tx, connection.quote_table(table_), connection.quote_columns(columns))) { } - void complete() + void complete() override { stream.complete(); tx.commit(); } + + void insert(const Row & row) override + { + stream.write_values(row); + } + }; + + struct PreparedInsert : Inserter + { + PreparedInsert(pqxx::connection & connection_, const String & table, const String & schema, + const ColumnsWithTypeAndName & columns, const String & on_conflict_) + : Inserter(connection_) + { + WriteBufferFromOwnString buf; + buf << getInsertQuery(schema, table, columns, IdentifierQuotingStyle::DoubleQuotes); + buf << " ("; + for (size_t i = 1; i <= columns.size(); ++i) + { + if (i > 1) + buf << ", "; + buf << "$" << i; + } + buf << ") "; + buf << on_conflict_; + connection.prepare("insert", buf.str()); + } + + void complete() override + { + connection.unprepare("insert"); + tx.commit(); + } + + void insert(const Row & row) override + { + pqxx::params params; + params.reserve(row.size()); + params.append_multi(row); + tx.exec_prepared("insert", params); + } }; StorageMetadataPtr metadata_snapshot; postgres::ConnectionHolderPtr connection_holder; - const String remote_table_name, remote_table_schema; - std::unique_ptr inserter; + const String remote_db_name, remote_table_name, remote_table_schema, on_conflict; + + std::unique_ptr inserter; }; SinkToStoragePtr StoragePostgreSQL::write( const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr /* context */) { - return std::make_shared(metadata_snapshot, pool->get(), remote_table_name, remote_table_schema); + return std::make_shared(metadata_snapshot, pool->get(), remote_table_name, remote_table_schema, on_conflict); } @@ -308,9 +382,9 @@ void registerStoragePostgreSQL(StorageFactory & factory) { ASTs & engine_args = args.engine_args; - if (engine_args.size() < 5 || engine_args.size() > 6) - throw Exception("Storage PostgreSQL requires from 5 to 6 parameters: " - "PostgreSQL('host:port', 'database', 'table', 'username', 'password' [, 'schema']", + if (engine_args.size() < 5 || engine_args.size() > 7) + throw Exception("Storage PostgreSQL requires from 5 to 7 parameters: " + "PostgreSQL('host:port', 'database', 'table', 'username', 'password' [, 'schema', 'ON CONFLICT ...']", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); for (auto & engine_arg : engine_args) @@ -326,9 +400,11 @@ void registerStoragePostgreSQL(StorageFactory & factory) const String & username = engine_args[3]->as().value.safeGet(); const String & password = engine_args[4]->as().value.safeGet(); - String remote_table_schema; - if (engine_args.size() == 6) + String remote_table_schema, on_conflict; + if (engine_args.size() >= 6) remote_table_schema = engine_args[5]->as().value.safeGet(); + if (engine_args.size() >= 7) + on_conflict = engine_args[6]->as().value.safeGet(); auto pool = std::make_shared( remote_database, @@ -345,7 +421,8 @@ void registerStoragePostgreSQL(StorageFactory & factory) args.columns, args.constraints, args.comment, - remote_table_schema); + remote_table_schema, + on_conflict); }, { .source_access_type = AccessType::POSTGRES, diff --git a/src/Storages/StoragePostgreSQL.h b/src/Storages/StoragePostgreSQL.h index bd5cd317c3d..a12b52e6e48 100644 --- a/src/Storages/StoragePostgreSQL.h +++ b/src/Storages/StoragePostgreSQL.h @@ -27,7 +27,8 @@ public: const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, const String & comment, - const std::string & remote_table_schema_ = ""); + const String & remote_table_schema_ = "", + const String & on_conflict = ""); String getName() const override { return "PostgreSQL"; } @@ -47,6 +48,7 @@ private: String remote_table_name; String remote_table_schema; + String on_conflict; postgres::PoolWithFailoverPtr pool; }; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index f76f0881438..d24ca0f3161 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4732,12 +4732,10 @@ bool StorageReplicatedMergeTree::optimize( } } - if (query_context->getSettingsRef().replication_alter_partitions_sync != 0) - { - /// NOTE Table lock must not be held while waiting. Some combination of R-W-R locks from different threads will yield to deadlock. - for (auto & merge_entry : merge_entries) - waitForAllReplicasToProcessLogEntry(merge_entry, false); - } + table_lock.reset(); + + for (auto & merge_entry : merge_entries) + waitForLogEntryToBeProcessedIfNecessary(merge_entry, query_context); return true; } @@ -5048,20 +5046,8 @@ void StorageReplicatedMergeTree::alter( table_lock_holder.reset(); - std::vector unwaited; - if (query_context->getSettingsRef().replication_alter_partitions_sync == 2) - { - LOG_DEBUG(log, "Updated shared metadata nodes in ZooKeeper. Waiting for replicas to apply changes."); - unwaited = waitForAllReplicasToProcessLogEntry(*alter_entry, false); - } - else if (query_context->getSettingsRef().replication_alter_partitions_sync == 1) - { - LOG_DEBUG(log, "Updated shared metadata nodes in ZooKeeper. Waiting for replicas to apply changes."); - waitForReplicaToProcessLogEntry(replica_name, *alter_entry); - } - - if (!unwaited.empty()) - throw Exception("Some replicas doesn't finish metadata alter", ErrorCodes::UNFINISHED); + LOG_DEBUG(log, "Updated shared metadata nodes in ZooKeeper. Waiting for replicas to apply changes."); + waitForLogEntryToBeProcessedIfNecessary(*alter_entry, query_context, "Some replicas doesn't finish metadata alter: "); if (mutation_znode) { @@ -5212,11 +5198,7 @@ void StorageReplicatedMergeTree::dropPart(const String & part_name, bool detach, dropPartImpl(zookeeper, part_name, entry, detach, /*throw_if_noop=*/ true); - /// If necessary, wait until the operation is performed on itself or on all replicas. - if (query_context->getSettingsRef().replication_alter_partitions_sync == 1) - waitForReplicaToProcessLogEntry(replica_name, entry); - else if (query_context->getSettingsRef().replication_alter_partitions_sync == 2) - waitForAllReplicasToProcessLogEntry(entry); + waitForLogEntryToBeProcessedIfNecessary(entry, query_context); } void StorageReplicatedMergeTree::dropPartition(const ASTPtr & partition, bool detach, ContextPtr query_context) @@ -5233,12 +5215,7 @@ void StorageReplicatedMergeTree::dropPartition(const ASTPtr & partition, bool de if (did_drop) { - /// If necessary, wait until the operation is performed on itself or on all replicas. - if (query_context->getSettingsRef().replication_alter_partitions_sync == 1) - waitForReplicaToProcessLogEntry(replica_name, entry); - else if (query_context->getSettingsRef().replication_alter_partitions_sync == 2) - waitForAllReplicasToProcessLogEntry(entry); - + waitForLogEntryToBeProcessedIfNecessary(entry, query_context); cleanLastPartNode(partition_id); } } @@ -5257,13 +5234,17 @@ void StorageReplicatedMergeTree::truncate( Strings partitions = zookeeper->getChildren(fs::path(zookeeper_path) / "block_numbers"); + std::vector> entries_to_wait; + entries_to_wait.reserve(partitions.size()); for (String & partition_id : partitions) { - LogEntry entry; - - if (dropAllPartsInPartition(*zookeeper, partition_id, entry, query_context, false)) - waitForAllReplicasToProcessLogEntry(entry); + auto entry = std::make_unique(); + if (dropAllPartsInPartition(*zookeeper, partition_id, *entry, query_context, false)) + entries_to_wait.push_back(std::move(entry)); } + + for (const auto & entry : entries_to_wait) + waitForLogEntryToBeProcessedIfNecessary(*entry, query_context); } @@ -5421,19 +5402,20 @@ StorageReplicatedMergeTree::allocateBlockNumber( } -Strings StorageReplicatedMergeTree::waitForAllTableReplicasToProcessLogEntry( - const String & table_zookeeper_path, const ReplicatedMergeTreeLogEntryData & entry, bool wait_for_non_active) +Strings StorageReplicatedMergeTree::tryWaitForAllReplicasToProcessLogEntry( + const String & table_zookeeper_path, const ReplicatedMergeTreeLogEntryData & entry, Int64 wait_for_inactive_timeout) { LOG_DEBUG(log, "Waiting for all replicas to process {}", entry.znode_name); auto zookeeper = getZooKeeper(); Strings replicas = zookeeper->getChildren(fs::path(table_zookeeper_path) / "replicas"); Strings unwaited; + bool wait_for_inactive = wait_for_inactive_timeout != 0; for (const String & replica : replicas) { - if (wait_for_non_active || zookeeper->exists(fs::path(table_zookeeper_path) / "replicas" / replica / "is_active")) + if (wait_for_inactive || zookeeper->exists(fs::path(table_zookeeper_path) / "replicas" / replica / "is_active")) { - if (!waitForTableReplicaToProcessLogEntry(table_zookeeper_path, replica, entry, wait_for_non_active)) + if (!tryWaitForReplicaToProcessLogEntry(table_zookeeper_path, replica, entry, wait_for_inactive_timeout)) unwaited.push_back(replica); } else @@ -5446,16 +5428,38 @@ Strings StorageReplicatedMergeTree::waitForAllTableReplicasToProcessLogEntry( return unwaited; } - -Strings StorageReplicatedMergeTree::waitForAllReplicasToProcessLogEntry( - const ReplicatedMergeTreeLogEntryData & entry, bool wait_for_non_active) +void StorageReplicatedMergeTree::waitForAllReplicasToProcessLogEntry( + const String & table_zookeeper_path, const ReplicatedMergeTreeLogEntryData & entry, Int64 wait_for_inactive_timeout, const String & error_context) { - return waitForAllTableReplicasToProcessLogEntry(zookeeper_path, entry, wait_for_non_active); + Strings unfinished_replicas = tryWaitForAllReplicasToProcessLogEntry(table_zookeeper_path, entry, wait_for_inactive_timeout); + if (unfinished_replicas.empty()) + return; + + throw Exception(ErrorCodes::UNFINISHED, "{}Timeout exceeded while waiting for replicas {} to process entry {}. " + "Probably some replicas are inactive", error_context, fmt::join(unfinished_replicas, ", "), entry.znode_name); } +void StorageReplicatedMergeTree::waitForLogEntryToBeProcessedIfNecessary(const ReplicatedMergeTreeLogEntryData & entry, ContextPtr query_context, const String & error_context) +{ + /// If necessary, wait until the operation is performed on itself or on all replicas. + Int64 wait_for_inactive_timeout = query_context->getSettingsRef().replication_wait_for_inactive_replica_timeout; + if (query_context->getSettingsRef().replication_alter_partitions_sync == 1) + { + bool finished = tryWaitForReplicaToProcessLogEntry(zookeeper_path, replica_name, entry, wait_for_inactive_timeout); + if (!finished) + { + throw Exception(ErrorCodes::UNFINISHED, "{}Log entry {} is not precessed on local replica, " + "most likely because the replica was shut down.", error_context, entry.znode_name); + } + } + else if (query_context->getSettingsRef().replication_alter_partitions_sync == 2) + { + waitForAllReplicasToProcessLogEntry(zookeeper_path, entry, wait_for_inactive_timeout, error_context); + } +} -bool StorageReplicatedMergeTree::waitForTableReplicaToProcessLogEntry( - const String & table_zookeeper_path, const String & replica, const ReplicatedMergeTreeLogEntryData & entry, bool wait_for_non_active) +bool StorageReplicatedMergeTree::tryWaitForReplicaToProcessLogEntry( + const String & table_zookeeper_path, const String & replica, const ReplicatedMergeTreeLogEntryData & entry, Int64 wait_for_inactive_timeout) { String entry_str = entry.toString(); String log_node_name; @@ -5473,18 +5477,27 @@ bool StorageReplicatedMergeTree::waitForTableReplicaToProcessLogEntry( */ bool waiting_itself = replica == replica_name; + /// Do not wait if timeout is zero + bool wait_for_inactive = wait_for_inactive_timeout != 0; + /// Wait for unlimited time if timeout is negative + bool check_timeout = wait_for_inactive_timeout > 0; + Stopwatch time_waiting; const auto & stop_waiting = [&]() { bool stop_waiting_itself = waiting_itself && partial_shutdown_called; - bool stop_waiting_non_active = !wait_for_non_active && !getZooKeeper()->exists(fs::path(table_zookeeper_path) / "replicas" / replica / "is_active"); - return is_dropped || stop_waiting_itself || stop_waiting_non_active; + bool timeout_exceeded = check_timeout && wait_for_inactive_timeout < time_waiting.elapsedSeconds(); + bool stop_waiting_inactive = (!wait_for_inactive || timeout_exceeded) + && !getZooKeeper()->exists(fs::path(table_zookeeper_path) / "replicas" / replica / "is_active"); + return is_dropped || stop_waiting_itself || stop_waiting_inactive; }; /// Don't recheck ZooKeeper too often constexpr auto event_wait_timeout_ms = 3000; - if (startsWith(entry.znode_name, "log-")) + if (!startsWith(entry.znode_name, "log-")) + throw Exception("Logical error: unexpected name of log node: " + entry.znode_name, ErrorCodes::LOGICAL_ERROR); + { /// Take the number from the node name `log-xxxxxxxxxx`. UInt64 log_index = parse(entry.znode_name.substr(entry.znode_name.size() - 10)); @@ -5493,13 +5506,17 @@ bool StorageReplicatedMergeTree::waitForTableReplicaToProcessLogEntry( LOG_DEBUG(log, "Waiting for {} to pull {} to queue", replica, log_node_name); /// Let's wait until entry gets into the replica queue. + bool pulled_to_queue = false; while (!stop_waiting()) { zkutil::EventPtr event = std::make_shared(); String log_pointer = getZooKeeper()->get(fs::path(table_zookeeper_path) / "replicas" / replica / "log_pointer", nullptr, event); if (!log_pointer.empty() && parse(log_pointer) > log_index) + { + pulled_to_queue = true; break; + } /// Wait with timeout because we can be already shut down, but not dropped. /// So log_pointer node will exist, but we will never update it because all background threads already stopped. @@ -5507,9 +5524,10 @@ bool StorageReplicatedMergeTree::waitForTableReplicaToProcessLogEntry( /// but the query will never finish because the drop already shut down the table. event->tryWait(event_wait_timeout_ms); } + + if (!pulled_to_queue) + return false; } - else - throw Exception("Logical error: unexpected name of log node: " + entry.znode_name, ErrorCodes::LOGICAL_ERROR); LOG_DEBUG(log, "Looking for node corresponding to {} in {} queue", log_node_name, replica); @@ -5547,13 +5565,6 @@ bool StorageReplicatedMergeTree::waitForTableReplicaToProcessLogEntry( } -bool StorageReplicatedMergeTree::waitForReplicaToProcessLogEntry( - const String & replica, const ReplicatedMergeTreeLogEntryData & entry, bool wait_for_non_active) -{ - return waitForTableReplicaToProcessLogEntry(zookeeper_path, replica, entry, wait_for_non_active); -} - - void StorageReplicatedMergeTree::getStatus(Status & res, bool with_zk_fields) { auto zookeeper = tryGetZooKeeper(); @@ -6562,13 +6573,10 @@ void StorageReplicatedMergeTree::replacePartitionFrom( parts_to_remove.clear(); cleanup_thread.wakeup(); - /// If necessary, wait until the operation is performed on all replicas. - if (query_context->getSettingsRef().replication_alter_partitions_sync > 1) - { - lock2.reset(); - lock1.reset(); - waitForAllReplicasToProcessLogEntry(entry); - } + lock2.reset(); + lock1.reset(); + + waitForLogEntryToBeProcessedIfNecessary(entry, query_context); } void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_table, const ASTPtr & partition, ContextPtr query_context) @@ -6767,12 +6775,9 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta parts_to_remove.clear(); cleanup_thread.wakeup(); + lock2.reset(); - if (query_context->getSettingsRef().replication_alter_partitions_sync > 1) - { - lock2.reset(); - dest_table_storage->waitForAllReplicasToProcessLogEntry(entry); - } + dest_table_storage->waitForLogEntryToBeProcessedIfNecessary(entry, query_context); /// Create DROP_RANGE for the source table Coordination::Requests ops_src; @@ -6787,11 +6792,8 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta log_znode_path = dynamic_cast(*op_results.front()).path_created; entry_delete.znode_name = log_znode_path.substr(log_znode_path.find_last_of('/') + 1); - if (query_context->getSettingsRef().replication_alter_partitions_sync > 1) - { - lock1.reset(); - waitForAllReplicasToProcessLogEntry(entry_delete); - } + lock1.reset(); + waitForLogEntryToBeProcessedIfNecessary(entry_delete, query_context); /// Cleaning possibly stored information about parts from /quorum/last_part node in ZooKeeper. cleanLastPartNode(partition_id); @@ -7540,6 +7542,9 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP out.writePrefix(); out.write(block); + /// TODO(ab): What projections should we add to the empty part? How can we make sure that it + /// won't block future merges? Perhaps we should also check part emptiness when selecting parts + /// to merge. out.writeSuffixAndFinalizePart(new_data_part, sync_on_insert); try diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 2fd10f88895..73c39bed3e3 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -635,22 +635,27 @@ private: const String & zookeeper_block_id_path = "", const String & zookeeper_path_prefix = "") const; /** Wait until all replicas, including this, execute the specified action from the log. - * If replicas are added at the same time, it can not wait the added replica . + * If replicas are added at the same time, it can not wait the added replica. + * + * Waits for inactive replicas no more than wait_for_inactive_timeout. + * Returns list of inactive replicas that have not executed entry or throws exception. * * NOTE: This method must be called without table lock held. * Because it effectively waits for other thread that usually has to also acquire a lock to proceed and this yields deadlock. - * TODO: There are wrong usages of this method that are not fixed yet. - * - * One method for convenient use on current table, another for waiting on foreign shards. */ - Strings waitForAllTableReplicasToProcessLogEntry(const String & table_zookeeper_path, const ReplicatedMergeTreeLogEntryData & entry, bool wait_for_non_active = true); - Strings waitForAllReplicasToProcessLogEntry(const ReplicatedMergeTreeLogEntryData & entry, bool wait_for_non_active = true); + void waitForAllReplicasToProcessLogEntry(const String & table_zookeeper_path, const ReplicatedMergeTreeLogEntryData & entry, + Int64 wait_for_inactive_timeout, const String & error_context = {}); + Strings tryWaitForAllReplicasToProcessLogEntry(const String & table_zookeeper_path, const ReplicatedMergeTreeLogEntryData & entry, + Int64 wait_for_inactive_timeout); /** Wait until the specified replica executes the specified action from the log. * NOTE: See comment about locks above. */ - bool waitForTableReplicaToProcessLogEntry(const String & table_zookeeper_path, const String & replica_name, const ReplicatedMergeTreeLogEntryData & entry, bool wait_for_non_active = true); - bool waitForReplicaToProcessLogEntry(const String & replica_name, const ReplicatedMergeTreeLogEntryData & entry, bool wait_for_non_active = true); + bool tryWaitForReplicaToProcessLogEntry(const String & table_zookeeper_path, const String & replica_name, + const ReplicatedMergeTreeLogEntryData & entry, Int64 wait_for_inactive_timeout = 0); + + /// Depending on settings, do nothing or wait for this replica or all replicas process log entry. + void waitForLogEntryToBeProcessedIfNecessary(const ReplicatedMergeTreeLogEntryData & entry, ContextPtr query_context, const String & error_context = {}); /// Throw an exception if the table is readonly. void assertNotReadonly() const; diff --git a/src/Storages/System/StorageSystemFunctions.cpp b/src/Storages/System/StorageSystemFunctions.cpp index 973bf493cd1..f19cffee37c 100644 --- a/src/Storages/System/StorageSystemFunctions.cpp +++ b/src/Storages/System/StorageSystemFunctions.cpp @@ -1,26 +1,38 @@ #include #include #include +#include #include #include #include +#include #include - namespace DB { namespace { template - void fillRow(MutableColumns & res_columns, const String & name, UInt64 is_aggregate, const Factory & f) + void fillRow(MutableColumns & res_columns, const String & name, UInt64 is_aggregate, const String & create_query, const Factory & f) { res_columns[0]->insert(name); res_columns[1]->insert(is_aggregate); - res_columns[2]->insert(f.isCaseInsensitive(name)); - if (f.isAlias(name)) - res_columns[3]->insert(f.aliasTo(name)); - else + + if constexpr (std::is_same_v) + { + res_columns[2]->insert(false); res_columns[3]->insertDefault(); + } + else + { + res_columns[2]->insert(f.isCaseInsensitive(name)); + if (f.isAlias(name)) + res_columns[3]->insert(f.aliasTo(name)); + else + res_columns[3]->insertDefault(); + } + + res_columns[4]->insert(create_query); } } @@ -31,6 +43,7 @@ NamesAndTypesList StorageSystemFunctions::getNamesAndTypes() {"is_aggregate", std::make_shared()}, {"case_insensitive", std::make_shared()}, {"alias_to", std::make_shared()}, + {"create_query", std::make_shared()} }; } @@ -40,14 +53,22 @@ void StorageSystemFunctions::fillData(MutableColumns & res_columns, ContextPtr, const auto & function_names = functions_factory.getAllRegisteredNames(); for (const auto & function_name : function_names) { - fillRow(res_columns, function_name, UInt64(0), functions_factory); + fillRow(res_columns, function_name, UInt64(0), "", functions_factory); } const auto & aggregate_functions_factory = AggregateFunctionFactory::instance(); const auto & aggregate_function_names = aggregate_functions_factory.getAllRegisteredNames(); for (const auto & function_name : aggregate_function_names) { - fillRow(res_columns, function_name, UInt64(1), aggregate_functions_factory); + fillRow(res_columns, function_name, UInt64(1), "", aggregate_functions_factory); + } + + const auto & user_defined_functions_factory = UserDefinedFunctionFactory::instance(); + const auto & user_defined_functions_names = user_defined_functions_factory.getAllRegisteredNames(); + for (const auto & function_name : user_defined_functions_names) + { + auto create_query = queryToString(user_defined_functions_factory.get(function_name)); + fillRow(res_columns, function_name, UInt64(0), create_query, user_defined_functions_factory); } } } diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index 6a643dbe1b9..dba05d44969 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -75,7 +75,9 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_) {"rows_where_ttl_info.expression", std::make_shared(std::make_shared())}, {"rows_where_ttl_info.min", std::make_shared(std::make_shared())}, - {"rows_where_ttl_info.max", std::make_shared(std::make_shared())} + {"rows_where_ttl_info.max", std::make_shared(std::make_shared())}, + + {"projections", std::make_shared(std::make_shared())}, } ) { @@ -253,6 +255,13 @@ void StorageSystemParts::processNextStorage( add_ttl_info_map(part->ttl_infos.group_by_ttl); add_ttl_info_map(part->ttl_infos.rows_where_ttl); + Array projections; + for (const auto & [name, _] : part->getProjectionParts()) + projections.push_back(name); + + if (columns_mask[src_index++]) + columns[res_index++]->insert(projections); + /// _state column should be the latest. /// Do not use part->getState*, it can be changed from different thread if (has_state_column) diff --git a/src/Storages/examples/CMakeLists.txt b/src/Storages/examples/CMakeLists.txt index cb03ae751e3..103972a106f 100644 --- a/src/Storages/examples/CMakeLists.txt +++ b/src/Storages/examples/CMakeLists.txt @@ -22,4 +22,3 @@ target_link_libraries (transform_part_zk_nodes dbms string_utils ) - diff --git a/src/Storages/fuzzers/CMakeLists.txt b/src/Storages/fuzzers/CMakeLists.txt index 93d3d2926bd..d41e96868ad 100644 --- a/src/Storages/fuzzers/CMakeLists.txt +++ b/src/Storages/fuzzers/CMakeLists.txt @@ -1,11 +1,7 @@ -add_executable (mergetree_checksum_fuzzer - mergetree_checksum_fuzzer.cpp - "${ClickHouse_SOURCE_DIR}/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp" - "${ClickHouse_SOURCE_DIR}/src/Compression/CompressedReadBuffer.cpp" - "${ClickHouse_SOURCE_DIR}/src/Compression/CompressedWriteBuffer.cpp" -) -target_link_libraries (mergetree_checksum_fuzzer PRIVATE clickhouse_common_io fuzz_compression ${LIB_FUZZING_ENGINE}) +add_executable (mergetree_checksum_fuzzer mergetree_checksum_fuzzer.cpp) +# Look at comment around fuzz_compression target declaration +target_link_libraries (mergetree_checksum_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE}) add_executable (columns_description_fuzzer columns_description_fuzzer.cpp) target_link_libraries (columns_description_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE}) diff --git a/src/TableFunctions/TableFunctionPostgreSQL.cpp b/src/TableFunctions/TableFunctionPostgreSQL.cpp index d701728479b..568cc6171fd 100644 --- a/src/TableFunctions/TableFunctionPostgreSQL.cpp +++ b/src/TableFunctions/TableFunctionPostgreSQL.cpp @@ -37,7 +37,8 @@ StoragePtr TableFunctionPostgreSQL::executeImpl(const ASTPtr & /*ast_function*/, columns, ConstraintsDescription{}, String{}, - remote_table_schema); + remote_table_schema, + on_conflict); result->startup(); return result; @@ -67,9 +68,9 @@ void TableFunctionPostgreSQL::parseArguments(const ASTPtr & ast_function, Contex ASTs & args = func_args.arguments->children; - if (args.size() < 5 || args.size() > 6) - throw Exception("Table function 'PostgreSQL' requires from 5 to 6 parameters: " - "PostgreSQL('host:port', 'database', 'table', 'user', 'password', [, 'schema']).", + if (args.size() < 5 || args.size() > 7) + throw Exception("Table function 'PostgreSQL' requires from 5 to 7 parameters: " + "PostgreSQL('host:port', 'database', 'table', 'user', 'password', [, 'schema', 'ON CONFLICT ...']).", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); for (auto & arg : args) @@ -82,8 +83,10 @@ void TableFunctionPostgreSQL::parseArguments(const ASTPtr & ast_function, Contex remote_table_name = args[2]->as().value.safeGet(); - if (args.size() == 6) + if (args.size() >= 6) remote_table_schema = args[5]->as().value.safeGet(); + if (args.size() >= 7) + on_conflict = args[6]->as().value.safeGet(); connection_pool = std::make_shared( args[1]->as().value.safeGet(), diff --git a/src/TableFunctions/TableFunctionPostgreSQL.h b/src/TableFunctions/TableFunctionPostgreSQL.h index c31d02fa955..e3810a0e391 100644 --- a/src/TableFunctions/TableFunctionPostgreSQL.h +++ b/src/TableFunctions/TableFunctionPostgreSQL.h @@ -28,7 +28,7 @@ private: void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; String connection_str; - String remote_table_name, remote_table_schema; + String remote_table_name, remote_table_schema, on_conflict; postgres::PoolWithFailoverPtr connection_pool; }; diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 0d833e5fbe6..f3a41ba6a25 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -634,6 +634,7 @@ def run_tests_array(all_tests_with_params): open(stdout_file).read().split('\n')[:100]) status += '\n' + status += "\nstdout:\n{}\n".format(stdout) status += 'Database: ' + testcase_args.testcase_database elif stderr: @@ -643,6 +644,7 @@ def run_tests_array(all_tests_with_params): status += print_test_time(total_time) status += " - having stderror:\n{}\n".format( '\n'.join(stderr.split('\n')[:100])) + status += "\nstdout:\n{}\n".format(stdout) status += 'Database: ' + testcase_args.testcase_database elif 'Exception' in stdout: failures += 1 diff --git a/tests/config/config.d/merge_tree.xml b/tests/config/config.d/merge_tree.xml new file mode 100644 index 00000000000..35af1fa65eb --- /dev/null +++ b/tests/config/config.d/merge_tree.xml @@ -0,0 +1,5 @@ + + + 8 + + diff --git a/tests/config/config.d/merge_tree_settings.xml b/tests/config/config.d/merge_tree_settings.xml new file mode 100644 index 00000000000..8c57dc3acfd --- /dev/null +++ b/tests/config/config.d/merge_tree_settings.xml @@ -0,0 +1,6 @@ + + + + 10 + + diff --git a/tests/config/config.d/top_level_domains_lists.xml b/tests/config/config.d/top_level_domains_lists.xml index 7b5e6a5638a..a10cbae1b43 100644 --- a/tests/config/config.d/top_level_domains_lists.xml +++ b/tests/config/config.d/top_level_domains_lists.xml @@ -1,5 +1,6 @@ public_suffix_list.dat + no_new_line_list.dat diff --git a/tests/config/install.sh b/tests/config/install.sh index 571dff34018..7dd2559e505 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -28,9 +28,11 @@ ln -sf $SRC_PATH/config.d/clusters.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/graphite.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/database_atomic.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/max_concurrent_queries.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/merge_tree_settings.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/test_cluster_with_incorrect_pw.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/keeper_port.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/logging_no_rotate.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/merge_tree.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/tcp_with_proxy.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/top_level_domains_lists.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/top_level_domains_path.xml $DEST_SERVER_PATH/config.d/ diff --git a/tests/config/top_level_domains/no_new_line_list.dat b/tests/config/top_level_domains/no_new_line_list.dat new file mode 100644 index 00000000000..4d5f9756e55 --- /dev/null +++ b/tests/config/top_level_domains/no_new_line_list.dat @@ -0,0 +1 @@ +foo.bar \ No newline at end of file diff --git a/tests/config/users.d/timeouts.xml b/tests/config/users.d/timeouts.xml index 60b24cfdef8..7d58315bd94 100644 --- a/tests/config/users.d/timeouts.xml +++ b/tests/config/users.d/timeouts.xml @@ -6,6 +6,8 @@ 60 60000 + + 30 diff --git a/tests/integration/test_access_for_functions/__init__.py b/tests/integration/test_access_for_functions/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_access_for_functions/test.py b/tests/integration/test_access_for_functions/test.py new file mode 100644 index 00000000000..ebd0f6bd907 --- /dev/null +++ b/tests/integration/test_access_for_functions/test.py @@ -0,0 +1,39 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance('instance') + + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + +def test_access_rights_for_funtion(): + create_function_query = "CREATE FUNCTION MySum AS (a, b) -> a + b" + + instance.query("CREATE USER A") + instance.query("CREATE USER B") + assert "it's necessary to have grant CREATE FUNCTION ON *.*" in instance.query_and_get_error(create_function_query, user = 'A') + + instance.query("GRANT CREATE FUNCTION on *.* TO A") + + instance.query(create_function_query, user = 'A') + assert instance.query("SELECT MySum(1, 2)") == "3\n" + + assert "it's necessary to have grant DROP FUNCTION ON *.*" in instance.query_and_get_error("DROP FUNCTION MySum", user = 'B') + + instance.query("GRANT DROP FUNCTION ON *.* TO B") + instance.query("DROP FUNCTION MySum", user = 'B') + assert "Unknown function MySum" in instance.query_and_get_error("SELECT MySum(1, 2)") + + instance.query("REVOKE CREATE FUNCTION ON *.* FROM A") + assert "it's necessary to have grant CREATE FUNCTION ON *.*" in instance.query_and_get_error(create_function_query, user = 'A') + + instance.query("DROP USER IF EXISTS A") + instance.query("DROP USER IF EXISTS B") diff --git a/tests/integration/test_keeper_back_to_back/test.py b/tests/integration/test_keeper_back_to_back/test.py index 64f2f42d71e..f73b4671798 100644 --- a/tests/integration/test_keeper_back_to_back/test.py +++ b/tests/integration/test_keeper_back_to_back/test.py @@ -218,6 +218,10 @@ def test_watchers(started_cluster): print("Fake data", fake_data_watch_data) assert genuine_data_watch_data == fake_data_watch_data + + genuine_zk.create("/test_data_watches/child", b"a") + fake_zk.create("/test_data_watches/child", b"a") + genuine_children = None def genuine_child_callback(event): print("Genuine child watch called") @@ -233,16 +237,74 @@ def test_watchers(started_cluster): genuine_zk.get_children("/test_data_watches", watch=genuine_child_callback) fake_zk.get_children("/test_data_watches", watch=fake_child_callback) + print("Calling non related genuine child") + genuine_zk.set("/test_data_watches/child", b"q") + genuine_zk.set("/test_data_watches", b"q") + + print("Calling non related fake child") + fake_zk.set("/test_data_watches/child", b"q") + fake_zk.set("/test_data_watches", b"q") + + time.sleep(3) + + assert genuine_children == None + assert fake_children == None + print("Calling genuine child") - genuine_zk.create("/test_data_watches/child", b"b") + genuine_zk.create("/test_data_watches/child_new", b"b") print("Calling fake child") - fake_zk.create("/test_data_watches/child", b"b") + fake_zk.create("/test_data_watches/child_new", b"b") time.sleep(3) print("Genuine children", genuine_children) print("Fake children", fake_children) assert genuine_children == fake_children + + genuine_children_delete = None + def genuine_child_delete_callback(event): + print("Genuine child watch called") + nonlocal genuine_children_delete + genuine_children_delete = event + + fake_children_delete = None + def fake_child_delete_callback(event): + print("Fake child watch called") + nonlocal fake_children_delete + fake_children_delete = event + + genuine_child_delete = None + def genuine_own_delete_callback(event): + print("Genuine child watch called") + nonlocal genuine_child_delete + genuine_child_delete = event + + fake_child_delete = None + def fake_own_delete_callback(event): + print("Fake child watch called") + nonlocal fake_child_delete + fake_child_delete = event + + genuine_zk.get_children("/test_data_watches", watch=genuine_child_delete_callback) + fake_zk.get_children("/test_data_watches", watch=fake_child_delete_callback) + genuine_zk.get_children("/test_data_watches/child", watch=genuine_own_delete_callback) + fake_zk.get_children("/test_data_watches/child", watch=fake_own_delete_callback) + + print("Calling genuine child delete") + genuine_zk.delete("/test_data_watches/child") + print("Calling fake child delete") + fake_zk.delete("/test_data_watches/child") + + time.sleep(3) + + print("Genuine children delete", genuine_children_delete) + print("Fake children delete", fake_children_delete) + assert genuine_children_delete == fake_children_delete + + print("Genuine child delete", genuine_child_delete) + print("Fake child delete", fake_child_delete) + assert genuine_child_delete == fake_child_delete + finally: for zk in [genuine_zk, fake_zk]: stop_zk(zk) diff --git a/tests/integration/test_mysql_database_engine/configs/users.xml b/tests/integration/test_mysql_database_engine/configs/users.xml new file mode 100644 index 00000000000..12c6d97d4eb --- /dev/null +++ b/tests/integration/test_mysql_database_engine/configs/users.xml @@ -0,0 +1,9 @@ + + + + + 3 + 3 + + + diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py old mode 100644 new mode 100755 index a093c2a0125..4a826213594 --- a/tests/integration/test_mysql_database_engine/test.py +++ b/tests/integration/test_mysql_database_engine/test.py @@ -8,7 +8,7 @@ from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -clickhouse_node = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml'], with_mysql=True) +clickhouse_node = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml'], user_configs=['configs/users.xml'], with_mysql=True) @pytest.fixture(scope="module") @@ -404,3 +404,26 @@ def test_mysql_types(started_cluster, case_name, mysql_type, expected_ch_type, m execute_query(clickhouse_node, "SELECT value FROM mysql('mysql57:3306', '${mysql_db}', '${table_name}', 'root', 'clickhouse')", settings=clickhouse_query_settings) + + +def test_clickhouse_mysql_no_connection(started_cluster): + with contextlib.closing(MySQLNodeInstance('root', 'clickhouse', started_cluster.mysql_ip, started_cluster.mysql_port)) as mysql_node: + mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'") + mysql_node.query('CREATE TABLE `test_database`.`test_table` ( `i``d` int(11) NOT NULL, PRIMARY KEY (`i``d`)) ENGINE=InnoDB;') + + clickhouse_node.query("CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', test_database, 'root', 'clickhouse')") + clickhouse_node.query("INSERT INTO `test_database`.`test_table`(`i``d`) select number from numbers(10000)") + assert clickhouse_node.query("SELECT count() FROM `test_database`.`test_table`").rstrip() == '10000' + + started_cluster.pause_container('mysql57'); + result = clickhouse_node.query_and_get_error("SELECT count() FROM `test_database`.`test_table`") + assert('Exception: Connections to all replicas failed' in result) + + started_cluster.unpause_container('mysql57'); + result = clickhouse_node.query("SELECT count() FROM `test_database`.`test_table`") + assert(result.strip() == '10000') + + started_cluster.pause_container('mysql57'); + clickhouse_node.query("DROP DATABASE test_database") + assert 'test_database' not in clickhouse_node.query('SHOW DATABASES') + started_cluster.unpause_container('mysql57'); diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index 28a76631c0f..bb0e284eac9 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -291,7 +291,7 @@ def test_postgres_distributed(started_cluster): node2.query('DROP TABLE test_shards') node2.query('DROP TABLE test_replicas') - + def test_datetime_with_timezone(started_cluster): cursor = started_cluster.postgres_conn.cursor() cursor.execute("DROP TABLE IF EXISTS test_timezone") @@ -328,6 +328,32 @@ def test_postgres_ndim(started_cluster): cursor.execute("DROP TABLE arr1, arr2") +def test_postgres_on_conflict(started_cluster): + cursor = started_cluster.postgres_conn.cursor() + table = 'test_conflict' + cursor.execute(f'DROP TABLE IF EXISTS {table}') + cursor.execute(f'CREATE TABLE {table} (a integer PRIMARY KEY, b text, c integer)') + + node1.query(''' + CREATE TABLE test_conflict (a UInt32, b String, c Int32) + ENGINE PostgreSQL('postgres1:5432', 'postgres', 'test_conflict', 'postgres', 'mysecretpassword', '', 'ON CONFLICT DO NOTHING'); + ''') + node1.query(f''' INSERT INTO {table} SELECT number, concat('name_', toString(number)), 3 from numbers(100)''') + node1.query(f''' INSERT INTO {table} SELECT number, concat('name_', toString(number)), 4 from numbers(100)''') + + check1 = f"SELECT count() FROM {table}" + assert (node1.query(check1)).rstrip() == '100' + + table_func = f'''postgresql('{started_cluster.postgres_ip}:{started_cluster.postgres_port}', 'postgres', '{table}', 'postgres', 'mysecretpassword', '', 'ON CONFLICT DO NOTHING')''' + node1.query(f'''INSERT INTO TABLE FUNCTION {table_func} SELECT number, concat('name_', toString(number)), 3 from numbers(100)''') + node1.query(f'''INSERT INTO TABLE FUNCTION {table_func} SELECT number, concat('name_', toString(number)), 3 from numbers(100)''') + + check1 = f"SELECT count() FROM {table}" + assert (node1.query(check1)).rstrip() == '100' + + cursor.execute(f'DROP TABLE {table} ') + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_user_defined_object_persistence/__init__.py b/tests/integration/test_user_defined_object_persistence/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_user_defined_object_persistence/test.py b/tests/integration/test_user_defined_object_persistence/test.py new file mode 100644 index 00000000000..6993bc13615 --- /dev/null +++ b/tests/integration/test_user_defined_object_persistence/test.py @@ -0,0 +1,39 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance('instance', stay_alive=True) + + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_persistence(): + create_function_query1 = "CREATE FUNCTION MySum1 AS (a, b) -> a + b" + create_function_query2 = "CREATE FUNCTION MySum2 AS (a, b) -> MySum1(a, b) + b" + + instance.query(create_function_query1) + instance.query(create_function_query2) + + assert instance.query("SELECT MySum1(1,2)") == "3\n" + assert instance.query("SELECT MySum2(1,2)") == "5\n" + + instance.restart_clickhouse() + + assert instance.query("SELECT MySum1(1,2)") == "3\n" + assert instance.query("SELECT MySum2(1,2)") == "5\n" + + instance.query("DROP FUNCTION MySum2") + instance.query("DROP FUNCTION MySum1") + + instance.restart_clickhouse() + + assert "Unknown function MySum1" in instance.query_and_get_error("SELECT MySum1(1, 2)") + assert "Unknown function MySum2" in instance.query_and_get_error("SELECT MySum2(1, 2)") diff --git a/tests/jepsen.clickhouse-keeper/src/jepsen/clickhouse_keeper/db.clj b/tests/jepsen.clickhouse-keeper/src/jepsen/clickhouse_keeper/db.clj index 30c2c0eaf4f..745d88e97f7 100644 --- a/tests/jepsen.clickhouse-keeper/src/jepsen/clickhouse_keeper/db.clj +++ b/tests/jepsen.clickhouse-keeper/src/jepsen/clickhouse_keeper/db.clj @@ -68,6 +68,7 @@ (do (c/exec :mkdir :-p common-prefix) (c/exec :mkdir :-p data-dir) + (c/exec :mkdir :-p coordination-data-dir) (c/exec :mkdir :-p logs-dir) (c/exec :mkdir :-p configs-dir) (c/exec :mkdir :-p sub-configs-dir) diff --git a/tests/queries/0_stateless/00721_force_by_identical_result_after_merge_zookeeper_long.sql b/tests/queries/0_stateless/00721_force_by_identical_result_after_merge_zookeeper_long.sql index 50f51510d61..07fba5d39b4 100644 --- a/tests/queries/0_stateless/00721_force_by_identical_result_after_merge_zookeeper_long.sql +++ b/tests/queries/0_stateless/00721_force_by_identical_result_after_merge_zookeeper_long.sql @@ -12,6 +12,7 @@ SYSTEM SYNC REPLICA byte_identical_r2; ALTER TABLE byte_identical_r1 ADD COLUMN y UInt64 DEFAULT rand(); SYSTEM SYNC REPLICA byte_identical_r1; SYSTEM SYNC REPLICA byte_identical_r2; +SET replication_alter_partitions_sync=2; OPTIMIZE TABLE byte_identical_r1 PARTITION tuple() FINAL; SELECT x, t1.y - t2.y FROM byte_identical_r1 t1 SEMI LEFT JOIN byte_identical_r2 t2 USING x ORDER BY x; diff --git a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.reference b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.reference index e69de29bb2d..c3165c3d6ef 100644 --- a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.reference +++ b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.reference @@ -0,0 +1,2 @@ +Replication did not hang: synced all replicas of alter_table +Consistency: 1 diff --git a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh index 793fc8e9575..19f72120912 100755 --- a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh +++ b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh @@ -3,15 +3,17 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# shellcheck source=./replication.lib +. "$CURDIR"/replication.lib set -e $CLICKHOUSE_CLIENT -n -q " - DROP TABLE IF EXISTS alter_table; - DROP TABLE IF EXISTS alter_table2; + DROP TABLE IF EXISTS alter_table0; + DROP TABLE IF EXISTS alter_table1; - CREATE TABLE alter_table (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r1') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0; - CREATE TABLE alter_table2 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r2') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0 + CREATE TABLE alter_table0 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r1') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0; + CREATE TABLE alter_table1 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r2') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0 " function thread1() @@ -22,22 +24,22 @@ function thread1() function thread2() { - while true; do $CLICKHOUSE_CLIENT -n --query "ALTER TABLE alter_table ADD COLUMN h String DEFAULT '0'; ALTER TABLE alter_table MODIFY COLUMN h UInt64; ALTER TABLE alter_table DROP COLUMN h;"; done + while true; do $CLICKHOUSE_CLIENT -n --query "ALTER TABLE alter_table0 ADD COLUMN h String DEFAULT '0'; ALTER TABLE alter_table0 MODIFY COLUMN h UInt64; ALTER TABLE alter_table0 DROP COLUMN h;"; done } function thread3() { - while true; do $CLICKHOUSE_CLIENT -q "INSERT INTO alter_table SELECT rand(1), rand(2), 1 / rand(3), toString(rand(4)), [rand(5), rand(6)], rand(7) % 2 ? NULL : generateUUIDv4(), (rand(8), rand(9)) FROM numbers(100000)"; done + while true; do $CLICKHOUSE_CLIENT -q "INSERT INTO alter_table0 SELECT rand(1), rand(2), 1 / rand(3), toString(rand(4)), [rand(5), rand(6)], rand(7) % 2 ? NULL : generateUUIDv4(), (rand(8), rand(9)) FROM numbers(100000)"; done } function thread4() { - while true; do $CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE alter_table FINAL"; done + while true; do $CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE alter_table0 FINAL"; done } function thread5() { - while true; do $CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table DELETE WHERE cityHash64(a,b,c,d,e,g) % 1048576 < 524288"; done + while true; do $CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table0 DELETE WHERE cityHash64(a,b,c,d,e,g) % 1048576 < 524288"; done } # https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout @@ -74,8 +76,9 @@ timeout $TIMEOUT bash -c thread4 2> /dev/null & timeout $TIMEOUT bash -c thread5 2> /dev/null & wait +check_replication_consistency "alter_table" "count(), sum(a), sum(b), round(sum(c))" -$CLICKHOUSE_CLIENT -n -q "DROP TABLE alter_table;" & -$CLICKHOUSE_CLIENT -n -q "DROP TABLE alter_table2;" & +$CLICKHOUSE_CLIENT -n -q "DROP TABLE alter_table0;" & +$CLICKHOUSE_CLIENT -n -q "DROP TABLE alter_table1;" & wait diff --git a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.reference b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.reference index e69de29bb2d..6e705f05f04 100644 --- a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.reference +++ b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.reference @@ -0,0 +1,2 @@ +Replication did not hang: synced all replicas of alter_table_ +Consistency: 1 diff --git a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh index 32fe31f68c6..bdad08fb0e1 100755 --- a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh +++ b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh @@ -3,6 +3,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# shellcheck source=./replication.lib +. "$CURDIR"/replication.lib set -e @@ -99,6 +101,8 @@ timeout $TIMEOUT bash -c thread6 2>&1 | grep "was not completely removed from Zo wait +check_replication_consistency "alter_table_" "count(), sum(a), sum(b), round(sum(c))" + for i in {0..9}; do $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS alter_table_$i" 2>&1 | grep "was not completely removed from ZooKeeper" & done diff --git a/tests/queries/0_stateless/01070_modify_ttl_recalc_only.reference b/tests/queries/0_stateless/01070_modify_ttl_recalc_only.reference new file mode 100644 index 00000000000..fe9cba71c4c --- /dev/null +++ b/tests/queries/0_stateless/01070_modify_ttl_recalc_only.reference @@ -0,0 +1,68 @@ +2000-10-10 1 +2000-10-10 2 +2100-10-10 3 +2100-10-10 4 +2000-10-11 00:00:00 2000-10-11 00:00:00 +2000-10-11 00:00:00 2000-10-11 00:00:00 +2100-10-11 00:00:00 2100-10-11 00:00:00 +2100-10-11 00:00:00 2100-10-11 00:00:00 +2100-10-10 3 +2100-10-10 4 +============= +1 a +2 b +3 c +4 d +2000-01-01 00:00:00 2100-01-01 00:00:00 +1 a +3 c +============= +1 a +3 c +2000-01-01 00:00:00 2000-01-01 00:00:00 +============= +1 a +2 b +3 c +4 d +1 a +2 +3 c +4 +============= +1 a +2 +3 c +4 +1 +2 +3 +4 +============= +1 a +2 b +3 c +4 d +2000-01-01 00:00:00 2100-01-01 00:00:00 +1 a +2 b +4 d +============= +1 a +2 b +4 d +1 +2 +4 d +============= +1 a aa +2 b bb +3 c cc +4 d dd +1 a +2 b bb +3 cc +4 d +1 +============= +0 diff --git a/tests/queries/0_stateless/01070_modify_ttl_recalc_only.sql b/tests/queries/0_stateless/01070_modify_ttl_recalc_only.sql new file mode 100644 index 00000000000..aafed1a7bce --- /dev/null +++ b/tests/queries/0_stateless/01070_modify_ttl_recalc_only.sql @@ -0,0 +1,107 @@ +set mutations_sync = 2; + +drop table if exists ttl; + +create table ttl (d Date, a Int) engine = MergeTree order by a partition by toDayOfMonth(d) +SETTINGS max_number_of_merges_with_ttl_in_pool=0,materialize_ttl_recalculate_only=true; + +insert into ttl values (toDateTime('2000-10-10 00:00:00'), 1); +insert into ttl values (toDateTime('2000-10-10 00:00:00'), 2); +insert into ttl values (toDateTime('2100-10-10 00:00:00'), 3); +insert into ttl values (toDateTime('2100-10-10 00:00:00'), 4); + + +alter table ttl modify ttl d + interval 1 day; +select * from ttl order by a; +select delete_ttl_info_min, delete_ttl_info_max from system.parts where database = currentDatabase() and table = 'ttl' and active > 0 order by name asc; +optimize table ttl final; +select * from ttl order by a; +select '============='; + +drop table if exists ttl; + +create table ttl (i Int, s String) engine = MergeTree order by i +SETTINGS max_number_of_merges_with_ttl_in_pool=0,materialize_ttl_recalculate_only=true; + +insert into ttl values (1, 'a') (2, 'b') (3, 'c') (4, 'd'); + +alter table ttl modify ttl i % 2 = 0 ? toDate('2000-01-01') : toDate('2100-01-01'); +select * from ttl order by i; +select delete_ttl_info_min, delete_ttl_info_max from system.parts where database = currentDatabase() and table = 'ttl' and active > 0; +optimize table ttl final; +select * from ttl order by i; +select '============='; + +alter table ttl modify ttl toDate('2000-01-01'); +select * from ttl order by i; +select delete_ttl_info_min, delete_ttl_info_max from system.parts where database = currentDatabase() and table = 'ttl' and active > 0; +optimize table ttl final; +select * from ttl order by i; +select '============='; + +drop table if exists ttl; + +create table ttl (i Int, s String) engine = MergeTree order by i +SETTINGS max_number_of_merges_with_ttl_in_pool=0,materialize_ttl_recalculate_only=true; + +insert into ttl values (1, 'a') (2, 'b') (3, 'c') (4, 'd'); + +alter table ttl modify column s String ttl i % 2 = 0 ? today() - 10 : toDate('2100-01-01'); +select * from ttl order by i; +optimize table ttl final; +select * from ttl order by i; +select '============='; + +alter table ttl modify column s String ttl toDate('2000-01-01'); +select * from ttl order by i; +optimize table ttl final; +select * from ttl order by i; +select '============='; + +drop table if exists ttl; + +create table ttl (d Date, i Int, s String) engine = MergeTree order by i +SETTINGS max_number_of_merges_with_ttl_in_pool=0,materialize_ttl_recalculate_only=true; + +insert into ttl values (toDate('2000-01-02'), 1, 'a') (toDate('2000-01-03'), 2, 'b') (toDate('2080-01-01'), 3, 'c') (toDate('2080-01-03'), 4, 'd'); + +alter table ttl modify ttl i % 3 = 0 ? toDate('2000-01-01') : toDate('2100-01-01'); +select i, s from ttl order by i; +select delete_ttl_info_min, delete_ttl_info_max from system.parts where database = currentDatabase() and table = 'ttl' and active > 0; +optimize table ttl final; +select i, s from ttl order by i; +select '============='; + +alter table ttl modify column s String ttl d + interval 1 month; +select i, s from ttl order by i; +optimize table ttl final; +select i, s from ttl order by i; +select '============='; + +drop table if exists ttl; + +create table ttl (i Int, s String, t String) engine = MergeTree order by i +SETTINGS max_number_of_merges_with_ttl_in_pool=0,materialize_ttl_recalculate_only=true; + +insert into ttl values (1, 'a', 'aa') (2, 'b', 'bb') (3, 'c', 'cc') (4, 'd', 'dd'); + +alter table ttl modify column s String ttl i % 3 = 0 ? today() - 10 : toDate('2100-01-01'), + modify column t String ttl i % 3 = 1 ? today() - 10 : toDate('2100-01-01'); + +select i, s, t from ttl order by i; +optimize table ttl final; +select i, s, t from ttl order by i; +-- MATERIALIZE TTL ran only once +select count() from system.mutations where database = currentDatabase() and table = 'ttl' and is_done; +select '============='; + +drop table if exists ttl; + +-- Nothing changed, don't run mutation +create table ttl (i Int, s String ttl toDate('2000-01-02')) engine = MergeTree order by i +SETTINGS max_number_of_merges_with_ttl_in_pool=0,materialize_ttl_recalculate_only=true; + +alter table ttl modify column s String ttl toDate('2000-01-02'); +select count() from system.mutations where database = currentDatabase() and table = 'ttl' and is_done; + +drop table if exists ttl; diff --git a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.reference b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.reference index af33a5bfc3f..4b640354c1b 100644 --- a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.reference +++ b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.reference @@ -1,6 +1,8 @@ Starting alters Finishing alters Equal number of columns +Replication did not hang: synced all replicas of concurrent_alter_add_drop_ +Consistency: 1 0 0 0 diff --git a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh index fd0b53cf122..4b67a03760b 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh @@ -3,6 +3,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# shellcheck source=./replication.lib +. "$CURDIR"/replication.lib REPLICAS=3 @@ -101,6 +103,8 @@ while [[ $(timeout 120 ${CLICKHOUSE_CLIENT} --query "ALTER TABLE concurrent_alte sleep 1 done +check_replication_consistency "concurrent_alter_add_drop_" "count(), sum(key), sum(cityHash64(value0))" + for i in $(seq $REPLICAS); do $CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA concurrent_alter_add_drop_$i" $CLICKHOUSE_CLIENT --query "SELECT COUNT() FROM system.mutations WHERE is_done = 0 and table = 'concurrent_alter_add_drop_$i'" diff --git a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.reference b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.reference index ff9c6824f00..435b1b1f1ae 100644 --- a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.reference +++ b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.reference @@ -5,6 +5,8 @@ 1725 Starting alters Finishing alters +Replication did not hang: synced all replicas of concurrent_alter_mt_ +Consistency: 1 1 0 1 diff --git a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.sh b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.sh index 37d880bdce7..acbb01a1c68 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.sh @@ -3,6 +3,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# shellcheck source=./replication.lib +. "$CURDIR"/replication.lib REPLICAS=5 @@ -112,6 +114,8 @@ while [[ $(timeout 120 ${CLICKHOUSE_CLIENT} --query "ALTER TABLE concurrent_alte sleep 1 done +check_replication_consistency "concurrent_alter_mt_" "count(), sum(key), sum(cityHash64(value1)), sum(cityHash64(value2))" + for i in $(seq $REPLICAS); do $CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA concurrent_alter_mt_$i" $CLICKHOUSE_CLIENT --query "SELECT SUM(toUInt64(value1)) > $INITIAL_SUM FROM concurrent_alter_mt_$i" diff --git a/tests/queries/0_stateless/01154_move_partition_long.reference b/tests/queries/0_stateless/01154_move_partition_long.reference index c6d9204ed02..37f0181524e 100644 --- a/tests/queries/0_stateless/01154_move_partition_long.reference +++ b/tests/queries/0_stateless/01154_move_partition_long.reference @@ -1 +1,3 @@ -Replication did not hang +Replication did not hang: synced all replicas of dst_ +Consistency: 1 +Replication did not hang: synced all replicas of src_ diff --git a/tests/queries/0_stateless/01154_move_partition_long.sh b/tests/queries/0_stateless/01154_move_partition_long.sh index 1b5985b9942..541550160f2 100755 --- a/tests/queries/0_stateless/01154_move_partition_long.sh +++ b/tests/queries/0_stateless/01154_move_partition_long.sh @@ -3,6 +3,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# shellcheck source=./replication.lib +. "$CURDIR"/replication.lib declare -A engines engines[0]="MergeTree" @@ -116,13 +118,8 @@ timeout $TIMEOUT bash -c optimize_thread & timeout $TIMEOUT bash -c drop_part_thread & wait -for ((i=0; i<16; i++)) do - # The size of log is big, so increase timeout. - $CLICKHOUSE_CLIENT --receive_timeout 600 -q "SYSTEM SYNC REPLICA dst_$i" & - $CLICKHOUSE_CLIENT --receive_timeout 600 -q "SYSTEM SYNC REPLICA src_$i" 2>/dev/null & -done -wait -echo "Replication did not hang" +check_replication_consistency "dst_" "count(), sum(p), sum(k), sum(v)" +try_sync_replicas "src_" for ((i=0; i<16; i++)) do $CLICKHOUSE_CLIENT -q "DROP TABLE dst_$i" 2>&1| grep -Fv "is already started to be removing" & diff --git a/tests/queries/0_stateless/01213_optimize_skip_unused_shards_DISTINCT.reference b/tests/queries/0_stateless/01213_optimize_skip_unused_shards_DISTINCT.reference index 4ade9cd9c5d..febec683df9 100644 --- a/tests/queries/0_stateless/01213_optimize_skip_unused_shards_DISTINCT.reference +++ b/tests/queries/0_stateless/01213_optimize_skip_unused_shards_DISTINCT.reference @@ -3,6 +3,11 @@ distributed_group_by_no_merge 1 optimize_skip_unused_shards 1 -optimize_skip_unused_shards lack of WHERE +optimize_skip_unused_shards lack of WHERE (optimize_distributed_group_by_sharding_key=0) +0 +1 +optimize_skip_unused_shards lack of WHERE (optimize_distributed_group_by_sharding_key=1) +0 +1 0 1 diff --git a/tests/queries/0_stateless/01213_optimize_skip_unused_shards_DISTINCT.sql b/tests/queries/0_stateless/01213_optimize_skip_unused_shards_DISTINCT.sql index 5b45bea9046..8024abc75ed 100644 --- a/tests/queries/0_stateless/01213_optimize_skip_unused_shards_DISTINCT.sql +++ b/tests/queries/0_stateless/01213_optimize_skip_unused_shards_DISTINCT.sql @@ -12,8 +12,15 @@ SELECT DISTINCT id FROM dist_01213 WHERE id = 1 SETTINGS distributed_group_by_no SELECT 'optimize_skip_unused_shards'; SELECT DISTINCT id FROM dist_01213 WHERE id = 1 SETTINGS optimize_skip_unused_shards=1; -- check that querying all shards is ok -SELECT 'optimize_skip_unused_shards lack of WHERE'; -SELECT DISTINCT id FROM dist_01213 SETTINGS optimize_skip_unused_shards=1; +SELECT 'optimize_skip_unused_shards lack of WHERE (optimize_distributed_group_by_sharding_key=0)'; +SELECT DISTINCT id FROM dist_01213 SETTINGS optimize_skip_unused_shards=1, optimize_distributed_group_by_sharding_key=0; +-- with optimize_distributed_group_by_sharding_key=1 there will be 4 rows, +-- since DISTINCT will be done on each shard separatelly, and initiator will +-- not do anything (since we use optimize_skip_unused_shards=1 that must +-- guarantee that the data had been INSERTed according to sharding key, +-- which is not our case, since we use one local table). +SELECT 'optimize_skip_unused_shards lack of WHERE (optimize_distributed_group_by_sharding_key=1)'; +SELECT DISTINCT id FROM dist_01213 SETTINGS optimize_skip_unused_shards=1, optimize_distributed_group_by_sharding_key=1; DROP TABLE local_01213; DROP TABLE dist_01213; diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 95f9e407f21..46eb3bf9ba8 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -45,11 +45,13 @@ CREATE TABLE [] TABLE CREATE CREATE VIEW [] VIEW CREATE CREATE DICTIONARY [] DICTIONARY CREATE CREATE TEMPORARY TABLE [] GLOBAL CREATE +CREATE FUNCTION [] DATABASE CREATE CREATE [] \N ALL DROP DATABASE [] DATABASE DROP DROP TABLE [] TABLE DROP DROP VIEW [] VIEW DROP DROP DICTIONARY [] DICTIONARY DROP +DROP FUNCTION [] DATABASE DROP DROP [] \N ALL TRUNCATE ['TRUNCATE TABLE'] TABLE ALL OPTIMIZE ['OPTIMIZE TABLE'] TABLE ALL diff --git a/tests/queries/0_stateless/01293_show_settings.reference b/tests/queries/0_stateless/01293_show_settings.reference index 6fcbf194614..5079f5c3d00 100644 --- a/tests/queries/0_stateless/01293_show_settings.reference +++ b/tests/queries/0_stateless/01293_show_settings.reference @@ -1,5 +1,3 @@ send_timeout Seconds 300 -connect_timeout Seconds 10 -connect_timeout_with_failover_ms Milliseconds 2000 -connect_timeout_with_failover_secure_ms Milliseconds 3000 +function_range_max_elements_in_block UInt64 500000000 max_memory_usage UInt64 10000000000 diff --git a/tests/queries/0_stateless/01293_show_settings.sql b/tests/queries/0_stateless/01293_show_settings.sql index 08f00ed201c..abdb06ff05e 100644 --- a/tests/queries/0_stateless/01293_show_settings.sql +++ b/tests/queries/0_stateless/01293_show_settings.sql @@ -1,3 +1,3 @@ show settings like 'send_timeout'; -SHOW SETTINGS ILIKE '%CONNECT_timeout%'; +SHOW SETTINGS ILIKE '%RANGE_max%'; SHOW CHANGED SETTINGS ILIKE '%MEMORY%'; diff --git a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql index 1acae560c93..85a2e893f37 100644 --- a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql +++ b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql @@ -98,6 +98,7 @@ SELECT '*** disable the feature'; ALTER TABLE execute_on_single_replica_r1 MODIFY SETTING execute_merges_on_single_replica_time_threshold=0; ALTER TABLE execute_on_single_replica_r2 MODIFY SETTING execute_merges_on_single_replica_time_threshold=0; +SET replication_alter_partitions_sync=2; /* all_0_0_6 - we disabled the feature, both replicas will merge */ OPTIMIZE TABLE execute_on_single_replica_r2 FINAL; /* all_0_0_7 - same */ diff --git a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas_long.reference b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas_long.reference index f7c65e36be4..c68053e8270 100644 --- a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas_long.reference +++ b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas_long.reference @@ -14,3 +14,5 @@ CREATE TABLE default.concurrent_kill_4\n(\n `key` UInt64,\n `value` Int64\ Metadata version on replica 5 equal with first replica, OK CREATE TABLE default.concurrent_kill_5\n(\n `key` UInt64,\n `value` Int64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/01593_concurrent_alter_mutations_kill_many_replicas_long_default/{shard}\', \'{replica}5\')\nORDER BY key\nSETTINGS max_replicated_mutations_in_queue = 1000, number_of_free_entries_in_pool_to_execute_mutation = 0, max_replicated_merges_in_queue = 1000, index_granularity = 8192 499999500000 +Replication did not hang: synced all replicas of concurrent_kill_ +Consistency: 1 diff --git a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas_long.sh b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas_long.sh index e263750c431..bb04facba15 100755 --- a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas_long.sh +++ b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas_long.sh @@ -3,6 +3,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# shellcheck source=./replication.lib +. "$CURDIR"/replication.lib REPLICAS=5 @@ -59,10 +61,6 @@ timeout $TIMEOUT bash -c kill_mutation_thread 2> /dev/null & wait -for i in $(seq $REPLICAS); do - $CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA concurrent_kill_$i" -done - # with timeout alter query can be not finished yet, so to execute new alter # we use retries counter=0 @@ -80,7 +78,7 @@ while true; do done -metadata_version=$($CLICKHOUSE_CLIENT --query "SELECT value FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/s1/replicas/r1$i/' and name = 'metadata_version'") +metadata_version=$($CLICKHOUSE_CLIENT --query "SELECT value FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/s1/replicas/r11/' and name = 'metadata_version'") for i in $(seq $REPLICAS); do replica_metadata_version=$($CLICKHOUSE_CLIENT --query "SELECT value FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/s1/replicas/r1$i/' and name = 'metadata_version'") @@ -95,6 +93,8 @@ done $CLICKHOUSE_CLIENT --query "SELECT sum(value) FROM concurrent_kill_1" +check_replication_consistency "concurrent_kill_" "count(), sum(key), sum(cityHash64(value))" + for i in $(seq $REPLICAS); do $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS concurrent_kill_$i" done diff --git a/tests/queries/0_stateless/01601_custom_tld.reference b/tests/queries/0_stateless/01601_custom_tld.reference index 04204ebf02a..ee326a77834 100644 --- a/tests/queries/0_stateless/01601_custom_tld.reference +++ b/tests/queries/0_stateless/01601_custom_tld.reference @@ -28,3 +28,7 @@ foo -- vector xx.blogspot.co.at +-- no new line +foo.bar +a.foo.bar +foo.baz diff --git a/tests/queries/0_stateless/01601_custom_tld.sql b/tests/queries/0_stateless/01601_custom_tld.sql index ceb00d5ff19..92ce28828f8 100644 --- a/tests/queries/0_stateless/01601_custom_tld.sql +++ b/tests/queries/0_stateless/01601_custom_tld.sql @@ -37,3 +37,8 @@ select cutToFirstSignificantSubdomainCustom('http://www.foo', 'public_suffix_lis select '-- vector'; select cutToFirstSignificantSubdomainCustom('http://xx.blogspot.co.at/' || toString(number), 'public_suffix_list') from numbers(1); select cutToFirstSignificantSubdomainCustom('there-is-no-such-domain' || toString(number), 'public_suffix_list') from numbers(1); + +select '-- no new line'; +select cutToFirstSignificantSubdomainCustom('foo.bar', 'no_new_line_list'); +select cutToFirstSignificantSubdomainCustom('a.foo.bar', 'no_new_line_list'); +select cutToFirstSignificantSubdomainCustom('a.foo.baz', 'no_new_line_list'); diff --git a/tests/queries/0_stateless/01720_join_implicit_cast.sql.j2 b/tests/queries/0_stateless/01720_join_implicit_cast.sql.j2 index f2b13e9824b..d1de6d06593 100644 --- a/tests/queries/0_stateless/01720_join_implicit_cast.sql.j2 +++ b/tests/queries/0_stateless/01720_join_implicit_cast.sql.j2 @@ -42,7 +42,6 @@ SELECT sum(a) + sum(t_ab2.a) - 1, sum(b) + sum(t_ab2.b) - 1 FROM t_ab1 RIGHT JOI SELECT sum(a) + sum(t_ab2.a) - 1, sum(b) + sum(t_ab2.b) - 1 FROM t_ab1 INNER JOIN t_ab2 ON (t_ab1.a == t_ab2.a AND t_ab1.b == t_ab2.b); SELECT '= types ='; - SELECT any(toTypeName(a)) == 'Int32' AND any(toTypeName(b)) == 'Nullable(Int64)' FROM t_ab1 FULL JOIN t_ab2 USING (a, b); SELECT any(toTypeName(a)) == 'Int32' AND any(toTypeName(b)) == 'Nullable(Int64)' FROM t_ab1 LEFT JOIN t_ab2 USING (a, b); SELECT any(toTypeName(a)) == 'Int32' AND any(toTypeName(b)) == 'Nullable(Int64)' FROM t_ab1 RIGHT JOIN t_ab2 USING (a, b); diff --git a/tests/queries/0_stateless/01856_create_function.reference b/tests/queries/0_stateless/01856_create_function.reference new file mode 100644 index 00000000000..a211b2318a0 --- /dev/null +++ b/tests/queries/0_stateless/01856_create_function.reference @@ -0,0 +1,2 @@ +24 +1 diff --git a/tests/queries/0_stateless/01856_create_function.sql b/tests/queries/0_stateless/01856_create_function.sql new file mode 100644 index 00000000000..7e2f38c2415 --- /dev/null +++ b/tests/queries/0_stateless/01856_create_function.sql @@ -0,0 +1,13 @@ +CREATE FUNCTION 01856_test_function_0 AS (a, b, c) -> a * b * c; +SELECT 01856_test_function_0(2, 3, 4); +SELECT isConstant(01856_test_function_0(1, 2, 3)); +DROP FUNCTION 01856_test_function_0; +CREATE FUNCTION 01856_test_function_1 AS (a, b) -> a || b || c; --{serverError 47} +CREATE FUNCTION 01856_test_function_1 AS (a, b) -> 01856_test_function_1(a, b) + 01856_test_function_1(a, b); --{serverError 600} +CREATE FUNCTION cast AS a -> a + 1; --{serverError 598} +CREATE FUNCTION sum AS (a, b) -> a + b; --{serverError 598} +CREATE FUNCTION 01856_test_function_2 AS (a, b) -> a + b; +CREATE FUNCTION 01856_test_function_2 AS (a) -> a || '!!!'; --{serverError 598} +DROP FUNCTION 01856_test_function_2; +DROP FUNCTION unknown_function; -- {serverError 46} +DROP FUNCTION CAST; -- {serverError 599} diff --git a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.reference b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.reference index d00491fd7e5..e5a8ecd20b4 100644 --- a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.reference +++ b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.reference @@ -1 +1,3 @@ +Replication did not hang: synced all replicas of ttl_table +Consistency: 1 1 diff --git a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh index 13086879e0d..80022bd472d 100755 --- a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh +++ b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh @@ -3,6 +3,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# shellcheck source=./replication.lib +. "$CURDIR"/replication.lib NUM_REPLICAS=5 @@ -58,14 +60,16 @@ timeout $TIMEOUT bash -c optimize_thread 2> /dev/null & timeout $TIMEOUT bash -c optimize_thread 2> /dev/null & wait - for i in $(seq 1 $NUM_REPLICAS); do - $CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA ttl_table$i" + # disable ttl merges before checking consistency + $CLICKHOUSE_CLIENT --query "ALTER TABLE ttl_table$i MODIFY SETTING max_replicated_merges_with_ttl_in_queue=0" done +check_replication_consistency "ttl_table" "count(), sum(toUInt64(key))" $CLICKHOUSE_CLIENT --query "SELECT * FROM system.replication_queue where table like 'ttl_table%' and database = '${CLICKHOUSE_DATABASE}' and type='MERGE_PARTS' and last_exception != '' FORMAT Vertical" $CLICKHOUSE_CLIENT --query "SELECT COUNT() > 0 FROM system.part_log where table like 'ttl_table%' and database = '${CLICKHOUSE_DATABASE}'" + for i in $(seq 1 $NUM_REPLICAS); do $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS ttl_table$i" & done diff --git a/tests/queries/0_stateless/02012_changed_enum_type_non_replicated.reference b/tests/queries/0_stateless/02012_changed_enum_type_non_replicated.reference new file mode 100644 index 00000000000..abb96cefb59 --- /dev/null +++ b/tests/queries/0_stateless/02012_changed_enum_type_non_replicated.reference @@ -0,0 +1,4 @@ +one +one +two +two diff --git a/tests/queries/0_stateless/02012_changed_enum_type_non_replicated.sql b/tests/queries/0_stateless/02012_changed_enum_type_non_replicated.sql new file mode 100644 index 00000000000..9e25df0f41a --- /dev/null +++ b/tests/queries/0_stateless/02012_changed_enum_type_non_replicated.sql @@ -0,0 +1,8 @@ +create table enum_alter_issue (a Enum8('one' = 1, 'two' = 2)) engine = MergeTree() ORDER BY a; +insert into enum_alter_issue values ('one'), ('two'); +alter table enum_alter_issue modify column a Enum8('one' = 1, 'two' = 2, 'three' = 3); +insert into enum_alter_issue values ('one'), ('two'); +alter table enum_alter_issue detach partition id 'all'; +alter table enum_alter_issue attach partition id 'all'; +select * from enum_alter_issue order by a; +drop table enum_alter_issue; diff --git a/tests/queries/0_stateless/02012_zookeeper_changed_enum_type.reference b/tests/queries/0_stateless/02012_zookeeper_changed_enum_type.reference new file mode 100644 index 00000000000..a83c714a5cf --- /dev/null +++ b/tests/queries/0_stateless/02012_zookeeper_changed_enum_type.reference @@ -0,0 +1,4 @@ +one 1 +two 2 +one 3 +two 4 diff --git a/tests/queries/0_stateless/02012_zookeeper_changed_enum_type.sql b/tests/queries/0_stateless/02012_zookeeper_changed_enum_type.sql new file mode 100644 index 00000000000..0c95c7ff403 --- /dev/null +++ b/tests/queries/0_stateless/02012_zookeeper_changed_enum_type.sql @@ -0,0 +1,13 @@ +create table enum_alter_issue (a Enum8('one' = 1, 'two' = 2), b Int) +engine = ReplicatedMergeTree('/clickhouse/tables/{database}/test_02012/enum_alter_issue', 'r1') +ORDER BY a; + +insert into enum_alter_issue values ('one', 1), ('two', 2); +alter table enum_alter_issue modify column a Enum8('one' = 1, 'two' = 2, 'three' = 3); +insert into enum_alter_issue values ('one', 3), ('two', 4); + +alter table enum_alter_issue detach partition id 'all'; +alter table enum_alter_issue attach partition id 'all'; +select * from enum_alter_issue order by b; + +drop table enum_alter_issue; diff --git a/tests/queries/0_stateless/02012_zookeeper_changed_enum_type_incompatible.reference b/tests/queries/0_stateless/02012_zookeeper_changed_enum_type_incompatible.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02012_zookeeper_changed_enum_type_incompatible.sql b/tests/queries/0_stateless/02012_zookeeper_changed_enum_type_incompatible.sql new file mode 100644 index 00000000000..e86023c96fa --- /dev/null +++ b/tests/queries/0_stateless/02012_zookeeper_changed_enum_type_incompatible.sql @@ -0,0 +1,12 @@ +drop table if exists enum_alter_issue; +create table enum_alter_issue (a Enum16('one' = 1, 'two' = 2), b Int) +engine = ReplicatedMergeTree('/clickhouse/tables/{database}/test_02012/enum_alter_issue', 'r2') +ORDER BY b; + +insert into enum_alter_issue values ('one', 1), ('two', 1); +alter table enum_alter_issue detach partition id 'all'; +alter table enum_alter_issue modify column a Enum8('one' = 1, 'two' = 2, 'three' = 3); +insert into enum_alter_issue values ('one', 1), ('two', 1); + +alter table enum_alter_issue attach partition id 'all'; -- {serverError TYPE_MISMATCH} +drop table enum_alter_issue; diff --git a/tests/queries/0_stateless/02013_zlib_read_after_eof.go b/tests/queries/0_stateless/02013_zlib_read_after_eof.go new file mode 100644 index 00000000000..a97a1438bdf --- /dev/null +++ b/tests/queries/0_stateless/02013_zlib_read_after_eof.go @@ -0,0 +1,61 @@ +package main + +import ( + "compress/gzip" + "fmt" + "io" + "io/ioutil" + "net/http" + "net/url" + "os" +) + +func compress(data io.Reader) io.Reader { + pr, pw := io.Pipe() + gw := gzip.NewWriter(pw) + + go func() { + _, _ = io.Copy(gw, data) + gw.Close() + pw.Close() + }() + + return pr +} + +func main() { + database := os.Getenv("CLICKHOUSE_DATABASE") + p, err := url.Parse("http://localhost:8123/") + if err != nil { + panic(err) + } + q := p.Query() + + q.Set("query", "INSERT INTO "+database+".graphite FORMAT RowBinary") + p.RawQuery = q.Encode() + queryUrl := p.String() + + var req *http.Request + + req, err = http.NewRequest("POST", queryUrl, compress(os.Stdin)) + req.Header.Add("Content-Encoding", "gzip") + + if err != nil { + panic(err) + } + + client := &http.Client{ + Transport: &http.Transport{DisableKeepAlives: true}, + } + resp, err := client.Do(req) + if err != nil { + panic(err) + } + defer resp.Body.Close() + + body, _ := ioutil.ReadAll(resp.Body) + + if resp.StatusCode != 200 { + panic(fmt.Errorf("clickhouse response status %d: %s", resp.StatusCode, string(body))) + } +} diff --git a/tests/queries/0_stateless/02013_zlib_read_after_eof.reference b/tests/queries/0_stateless/02013_zlib_read_after_eof.reference new file mode 100644 index 00000000000..5caff40c4a0 --- /dev/null +++ b/tests/queries/0_stateless/02013_zlib_read_after_eof.reference @@ -0,0 +1 @@ +10000 diff --git a/tests/queries/0_stateless/02013_zlib_read_after_eof.sh b/tests/queries/0_stateless/02013_zlib_read_after_eof.sh new file mode 100755 index 00000000000..d74dca6cc61 --- /dev/null +++ b/tests/queries/0_stateless/02013_zlib_read_after_eof.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +DATA_FILE=$CUR_DIR/data_zlib/02013_zlib_read_after_eof_data + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS graphite;" + +$CLICKHOUSE_CLIENT -q "CREATE TABLE graphite(\`Path\` String, \`Value\` Float64, \`Time\` UInt32, \`Date\` Date, \`Timestamp\` UInt32) \ + ENGINE = MergeTree PARTITION BY toYYYYMM(Date) ORDER BY (Path, Time) SETTINGS index_granularity = 8192;" + +cat "$DATA_FILE" | go run $CUR_DIR/02013_zlib_read_after_eof.go + +$CLICKHOUSE_CLIENT -q "SELECT count() FROM graphite;" + +$CLICKHOUSE_CLIENT -q "drop table graphite;" diff --git a/tests/queries/0_stateless/02014_map_different_keys.reference b/tests/queries/0_stateless/02014_map_different_keys.reference new file mode 100644 index 00000000000..8af8f57f9df --- /dev/null +++ b/tests/queries/0_stateless/02014_map_different_keys.reference @@ -0,0 +1,22 @@ +...const maps... +0 +2 +0 +4 +0 +0 +2 +0 +4 +0 +4 +4 +...int keys... +foo bar bar + foo foo +...string keys... +foo foo +foo foo +foo foo +bar bar +0 diff --git a/tests/queries/0_stateless/02014_map_different_keys.sql b/tests/queries/0_stateless/02014_map_different_keys.sql new file mode 100644 index 00000000000..0998a9283f7 --- /dev/null +++ b/tests/queries/0_stateless/02014_map_different_keys.sql @@ -0,0 +1,32 @@ +SELECT '...const maps...'; + +WITH map(1, 2, 3, 4) AS m SELECT m[number] FROM numbers(5); +WITH map('1', 2, '3', 4) AS m SELECT m[toString(number)] FROM numbers(5); + +WITH map(1, 2, 3, 4) AS m SELECT m[3]; +WITH map('1', 2, '3', 4) AS m SELECT m['3']; + +DROP TABLE IF EXISTS t_map_02014; + +CREATE TABLE t_map_02014(i1 UInt64, i2 Int32, m1 Map(UInt32, String), m2 Map(Int8, String), m3 Map(Int128, String)) ENGINE = Memory; +INSERT INTO t_map_02014 VALUES (1, -1, map(1, 'foo', 2, 'bar'), map(-1, 'foo', 1, 'bar'), map(-1, 'foo', 1, 'bar')); + +SELECT '...int keys...'; + +SELECT m1[i1], m2[i1], m3[i1] FROM t_map_02014; +SELECT m1[i2], m2[i2], m3[i2] FROM t_map_02014; + +DROP TABLE IF EXISTS t_map_02014; + +CREATE TABLE t_map_02014(s String, fs FixedString(3), m1 Map(String, String), m2 Map(FixedString(3), String)) ENGINE = Memory; +INSERT INTO t_map_02014 VALUES ('aaa', 'bbb', map('aaa', 'foo', 'bbb', 'bar'), map('aaa', 'foo', 'bbb', 'bar')); + +SELECT '...string keys...'; + +SELECT m1['aaa'], m2['aaa'] FROM t_map_02014; +SELECT m1['aaa'::FixedString(3)], m2['aaa'::FixedString(3)] FROM t_map_02014; +SELECT m1[s], m2[s] FROM t_map_02014; +SELECT m1[fs], m2[fs] FROM t_map_02014; +SELECT length(m2['aaa'::FixedString(4)]) FROM t_map_02014; + +DROP TABLE IF EXISTS t_map_02014; diff --git a/tests/queries/0_stateless/2013_lc_nullable_and_infinity.reference b/tests/queries/0_stateless/2013_lc_nullable_and_infinity.reference new file mode 100644 index 00000000000..ef5038b2236 --- /dev/null +++ b/tests/queries/0_stateless/2013_lc_nullable_and_infinity.reference @@ -0,0 +1,4 @@ +0 \N + +0 \N +0 \N diff --git a/tests/queries/0_stateless/2013_lc_nullable_and_infinity.sql b/tests/queries/0_stateless/2013_lc_nullable_and_infinity.sql new file mode 100644 index 00000000000..c1c8a9c00b1 --- /dev/null +++ b/tests/queries/0_stateless/2013_lc_nullable_and_infinity.sql @@ -0,0 +1,3 @@ +set receive_timeout = '10', receive_data_timeout_ms = '10000', extremes = '1', allow_suspicious_low_cardinality_types = '1', force_primary_key = '1', join_use_nulls = '1', max_rows_to_read = '1', join_algorithm = 'partial_merge'; + +SELECT * FROM (SELECT dummy AS val FROM system.one) AS s1 ANY LEFT JOIN (SELECT toLowCardinality(dummy) AS rval FROM system.one) AS s2 ON (val + 9223372036854775806) = (rval * 1); diff --git a/tests/queries/0_stateless/data_zlib/02013_zlib_read_after_eof_data b/tests/queries/0_stateless/data_zlib/02013_zlib_read_after_eof_data new file mode 100644 index 00000000000..3e57c082462 Binary files /dev/null and b/tests/queries/0_stateless/data_zlib/02013_zlib_read_after_eof_data differ diff --git a/tests/queries/0_stateless/mergetree_mutations.lib b/tests/queries/0_stateless/mergetree_mutations.lib index d10ac883764..7d02f9f1b41 100644 --- a/tests/queries/0_stateless/mergetree_mutations.lib +++ b/tests/queries/0_stateless/mergetree_mutations.lib @@ -20,3 +20,23 @@ function wait_for_mutation() done } + +function wait_for_all_mutations() +{ + local table=$1 + local database=$2 + database=${database:="${CLICKHOUSE_DATABASE}"} + + for i in {1..200} + do + sleep 1 + if [[ $(${CLICKHOUSE_CLIENT} --query="SELECT coalesce(minOrNull(is_done), 1) FROM system.mutations WHERE database='$database' AND table like '$table'") -eq 1 ]]; then + break + fi + + if [[ $i -eq 200 ]]; then + echo "Timed out while waiting for mutation to execute!" + fi + + done +} diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib new file mode 100755 index 00000000000..8fe300b59e8 --- /dev/null +++ b/tests/queries/0_stateless/replication.lib @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +# shellcheck source=./mergetree_mutations.lib +. "$CURDIR"/mergetree_mutations.lib + +function try_sync_replicas() +{ + table_name_prefix=$1 + + readarray -t empty_partitions_arr < <(${CLICKHOUSE_CLIENT} -q \ + "SELECT DISTINCT substr(new_part_name, 1, position(new_part_name, '_') - 1) AS partition_id + FROM system.replication_queue + WHERE (database = currentDatabase()) AND (table LIKE '$table_name_prefix%') AND (last_exception LIKE '%No active replica has part%') AND (partition_id NOT IN ( + SELECT partition_id + FROM system.parts + WHERE (database = currentDatabase()) AND (table LIKE '$table_name_prefix%') + ))") + readarray -t tables_arr < <(${CLICKHOUSE_CLIENT} -q "SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$table_name_prefix%' AND engine like '%Replicated%'") + + for t in "${tables_arr[@]}" + do + for p in "${empty_partitions_arr[@]}" + do + # Avoid "Empty part ... is not created instead of lost part because there are no parts in partition" + $CLICKHOUSE_CLIENT -q "ALTER TABLE $t DROP PARTITION ID '$p'" 2>/dev/null + done + done + + for t in "${tables_arr[@]}" + do + # The size of log may be big, so increase timeout. + $CLICKHOUSE_CLIENT --receive_timeout 400 -q "SYSTEM SYNC REPLICA $t" || $CLICKHOUSE_CLIENT -q \ + "select 'sync failed, queue:', * from system.replication_queue where database=currentDatabase() and table='$t' order by database, table, node_name" & + done + wait + echo "Replication did not hang: synced all replicas of $table_name_prefix" +} + +function check_replication_consistency() +{ + table_name_prefix=$1 + check_query_part=$2 + + # Do not check anything if all replicas are readonly, + # because is this case all replicas are probably lost (it may happen and it's not a bug) + res=$($CLICKHOUSE_CLIENT -q "SELECT count() - sum(is_readonly) FROM system.replicas WHERE database=currentDatabase() AND table LIKE '$table_name_prefix%'") + if [ $res -eq 0 ]; then + # Print dummy lines + echo "Replication did not hang: synced all replicas of $table_name_prefix" + echo "Consistency: 1" + return 0 + fi + + # Trigger pullLogsToQueue(...) and updateMutations(...) on some replica to make it pull all mutations, so it will be possible to kill them + some_table=$($CLICKHOUSE_CLIENT -q "SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$table_name_prefix%' ORDER BY rand() LIMIT 1") + $CLICKHOUSE_CLIENT --receive_timeout 3 -q "SYSTEM SYNC REPLICA $some_table" 1>/dev/null 2>/dev/null ||: + some_table=$($CLICKHOUSE_CLIENT -q "SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$table_name_prefix%' ORDER BY rand() LIMIT 1") + $CLICKHOUSE_CLIENT --receive_timeout 3 -q "SYSTEM SYNC REPLICA $some_table" 1>/dev/null 2>/dev/null ||: + + # Forcefully cancel mutations to avoid waiting for them to finish + ${CLICKHOUSE_CLIENT} -q "KILL MUTATION WHERE database=currentDatabase() AND table like '$table_name_prefix%'" > /dev/null + + # SYNC REPLICA is not enough if some MUTATE_PARTs are not assigned yet + wait_for_all_mutations "$table_name_prefix%" + + try_sync_replicas "$table_name_prefix" + + res=$($CLICKHOUSE_CLIENT -q \ + "SELECT + if((countDistinct(data) as c) == 0, 1, c) + FROM + ( + SELECT _table, ($check_query_part) AS data + FROM merge(currentDatabase(), '$table_name_prefix') GROUP BY _table + )") + + echo "Consistency: $res" + if [ $res -ne 1 ]; then + echo "Replicas have diverged:" + $CLICKHOUSE_CLIENT -q "select 'data', _table, $check_query_part, arraySort(groupArrayDistinct(_part)) from merge(currentDatabase(), '$table_name_prefix') group by _table order by _table" + $CLICKHOUSE_CLIENT -q "select 'queue', * from system.replication_queue where database=currentDatabase() and table like '$table_name_prefix%' order by database, table, node_name" + $CLICKHOUSE_CLIENT -q "select 'mutations', * from system.mutations where database=currentDatabase() and table like '$table_name_prefix%' order by database, table, mutation_id" + $CLICKHOUSE_CLIENT -q "select 'parts', * from system.parts where database=currentDatabase() and table like '$table_name_prefix%' order by database, table, name" + echo "Good luck with debugging..." + fi + +} + diff --git a/tests/queries/0_stateless/test_infile.gz b/tests/queries/0_stateless/test_infile.gz new file mode 100644 index 00000000000..feb3ac52068 Binary files /dev/null and b/tests/queries/0_stateless/test_infile.gz differ diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index c2b5782e766..335ed370b9b 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -161,6 +161,9 @@ "00980_zookeeper_merge_tree_alter_settings", "00980_merge_alter_settings", "02009_array_join_partition", + "02012_changed_enum_type_non_replicated", + "02012_zookeeper_changed_enum_type", + "02012_zookeeper_changed_enum_type_incompatible", /// Old syntax is not allowed "01062_alter_on_mutataion_zookeeper", "00925_zookeeper_empty_replicated_merge_tree_optimize_final", @@ -322,6 +325,7 @@ "01076_parallel_alter_replicated_zookeeper", "01079_parallel_alter_add_drop_column_zookeeper", "01079_parallel_alter_detach_table_zookeeper", + "01079_parallel_alter_modify_zookeeper_long", "01080_check_for_error_incorrect_size_of_nested_column", "01083_expressions_in_engine_arguments", "01084_regexp_empty", @@ -483,6 +487,8 @@ "01804_dictionary_decimal256_type", "01850_dist_INSERT_preserve_error", // uses cluster with different static databases shard_0/shard_1 "01821_table_comment", + "01856_create_function", + "01857_create_function_and_check_jit_compiled", "01824_prefer_global_in_and_join", "01870_modulo_partition_key", "01870_buffer_flush", // creates database