diff --git a/SECURITY.md b/SECURITY.md index 79ca0269838..86578b188d8 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,9 +13,10 @@ The following versions of ClickHouse server are currently being supported with s | Version | Supported | |:-|:-| +| 24.2 | ✔️ | | 24.1 | ✔️ | | 23.12 | ✔️ | -| 23.11 | ✔️ | +| 23.11 | ❌ | | 23.10 | ❌ | | 23.9 | ❌ | | 23.8 | ✔️ | diff --git a/base/base/Decimal.h b/base/base/Decimal.h index afa186faf5b..66ff623217c 100644 --- a/base/base/Decimal.h +++ b/base/base/Decimal.h @@ -1,14 +1,9 @@ #pragma once + #include #include +#include -#if !defined(NO_SANITIZE_UNDEFINED) -#if defined(__clang__) - #define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined"))) -#else - #define NO_SANITIZE_UNDEFINED -#endif -#endif namespace DB { diff --git a/base/base/JSON.cpp b/base/base/JSON.cpp index 0b43be38149..9da059c98b6 100644 --- a/base/base/JSON.cpp +++ b/base/base/JSON.cpp @@ -10,14 +10,10 @@ #define JSON_MAX_DEPTH 100 -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec" -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec" POCO_IMPLEMENT_EXCEPTION(JSONException, Poco::Exception, "JSONException") // NOLINT(cert-err60-cpp, modernize-use-noexcept, hicpp-use-noexcept) -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#pragma clang diagnostic pop /// Read unsigned integer in a simple form from a non-0-terminated string. diff --git a/base/base/JSON.h b/base/base/JSON.h index 850b74715c6..bc053670a96 100644 --- a/base/base/JSON.h +++ b/base/base/JSON.h @@ -39,14 +39,10 @@ // NOLINTBEGIN(google-explicit-constructor) -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec" -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec" POCO_DECLARE_EXCEPTION(Foundation_API, JSONException, Poco::Exception) -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#pragma clang diagnostic pop // NOLINTEND(google-explicit-constructor) class JSON diff --git a/base/base/coverage.cpp b/base/base/coverage.cpp index b85f1a16d32..99b897c4571 100644 --- a/base/base/coverage.cpp +++ b/base/base/coverage.cpp @@ -13,11 +13,7 @@ #include -# if defined(__clang__) extern "C" void __llvm_profile_dump(); // NOLINT -# elif defined(__GNUC__) || defined(__GNUG__) -extern "C" void __gcov_exit(); -# endif #endif @@ -28,12 +24,7 @@ void dumpCoverageReportIfPossible() static std::mutex mutex; std::lock_guard lock(mutex); -# if defined(__clang__) __llvm_profile_dump(); // NOLINT -# elif defined(__GNUC__) || defined(__GNUG__) - __gcov_exit(); -# endif - #endif } diff --git a/base/base/defines.h b/base/base/defines.h index 02058a29096..1f02748633d 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -11,7 +11,7 @@ /// including /// - it should not have fallback to 0, /// since this may create false-positive detection (common problem) -#if defined(__clang__) && defined(__has_feature) +#if defined(__has_feature) # define ch_has_feature __has_feature #endif @@ -76,24 +76,11 @@ /// Explicitly allow undefined behaviour for certain functions. Use it as a function attribute. /// It is useful in case when compiler cannot see (and exploit) it, but UBSan can. /// Example: multiplication of signed integers with possibility of overflow when both sides are from user input. -#if defined(__clang__) -# define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined"))) -# define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address"))) -# define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread"))) -# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED __attribute__((__always_inline__, __no_sanitize__("undefined"))) -#else /// It does not work in GCC. GCC 7 cannot recognize this attribute and GCC 8 simply ignores it. -# define NO_SANITIZE_UNDEFINED -# define NO_SANITIZE_ADDRESS -# define NO_SANITIZE_THREAD -# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED ALWAYS_INLINE -#endif - -#if defined(__clang__) && defined(__clang_major__) && __clang_major__ >= 14 -# define DISABLE_SANITIZER_INSTRUMENTATION __attribute__((disable_sanitizer_instrumentation)) -#else -# define DISABLE_SANITIZER_INSTRUMENTATION -#endif - +#define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined"))) +#define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address"))) +#define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread"))) +#define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED __attribute__((__always_inline__, __no_sanitize__("undefined"))) +#define DISABLE_SANITIZER_INSTRUMENTATION __attribute__((disable_sanitizer_instrumentation)) #if !__has_include() || !defined(ADDRESS_SANITIZER) # define ASAN_UNPOISON_MEMORY_REGION(a, b) @@ -135,54 +122,33 @@ /// Macros for Clang Thread Safety Analysis (TSA). They can be safely ignored by other compilers. /// Feel free to extend, but please stay close to https://clang.llvm.org/docs/ThreadSafetyAnalysis.html#mutexheader -#if defined(__clang__) -# define TSA_GUARDED_BY(...) __attribute__((guarded_by(__VA_ARGS__))) /// data is protected by given capability -# define TSA_PT_GUARDED_BY(...) __attribute__((pt_guarded_by(__VA_ARGS__))) /// pointed-to data is protected by the given capability -# define TSA_REQUIRES(...) __attribute__((requires_capability(__VA_ARGS__))) /// thread needs exclusive possession of given capability -# define TSA_REQUIRES_SHARED(...) __attribute__((requires_shared_capability(__VA_ARGS__))) /// thread needs shared possession of given capability -# define TSA_ACQUIRED_AFTER(...) __attribute__((acquired_after(__VA_ARGS__))) /// annotated lock must be locked after given lock -# define TSA_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) /// disable TSA for a function -# define TSA_CAPABILITY(...) __attribute__((capability(__VA_ARGS__))) /// object of a class can be used as capability -# define TSA_ACQUIRE(...) __attribute__((acquire_capability(__VA_ARGS__))) /// function acquires a capability, but does not release it -# define TSA_TRY_ACQUIRE(...) __attribute__((try_acquire_capability(__VA_ARGS__))) /// function tries to acquire a capability and returns a boolean value indicating success or failure -# define TSA_RELEASE(...) __attribute__((release_capability(__VA_ARGS__))) /// function releases the given capability -# define TSA_ACQUIRE_SHARED(...) __attribute__((acquire_shared_capability(__VA_ARGS__))) /// function acquires a shared capability, but does not release it -# define TSA_TRY_ACQUIRE_SHARED(...) __attribute__((try_acquire_shared_capability(__VA_ARGS__))) /// function tries to acquire a shared capability and returns a boolean value indicating success or failure -# define TSA_RELEASE_SHARED(...) __attribute__((release_shared_capability(__VA_ARGS__))) /// function releases the given shared capability -# define TSA_SCOPED_LOCKABLE __attribute__((scoped_lockable)) /// object of a class has scoped lockable capability +#define TSA_GUARDED_BY(...) __attribute__((guarded_by(__VA_ARGS__))) /// data is protected by given capability +#define TSA_PT_GUARDED_BY(...) __attribute__((pt_guarded_by(__VA_ARGS__))) /// pointed-to data is protected by the given capability +#define TSA_REQUIRES(...) __attribute__((requires_capability(__VA_ARGS__))) /// thread needs exclusive possession of given capability +#define TSA_REQUIRES_SHARED(...) __attribute__((requires_shared_capability(__VA_ARGS__))) /// thread needs shared possession of given capability +#define TSA_ACQUIRED_AFTER(...) __attribute__((acquired_after(__VA_ARGS__))) /// annotated lock must be locked after given lock +#define TSA_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) /// disable TSA for a function +#define TSA_CAPABILITY(...) __attribute__((capability(__VA_ARGS__))) /// object of a class can be used as capability +#define TSA_ACQUIRE(...) __attribute__((acquire_capability(__VA_ARGS__))) /// function acquires a capability, but does not release it +#define TSA_TRY_ACQUIRE(...) __attribute__((try_acquire_capability(__VA_ARGS__))) /// function tries to acquire a capability and returns a boolean value indicating success or failure +#define TSA_RELEASE(...) __attribute__((release_capability(__VA_ARGS__))) /// function releases the given capability +#define TSA_ACQUIRE_SHARED(...) __attribute__((acquire_shared_capability(__VA_ARGS__))) /// function acquires a shared capability, but does not release it +#define TSA_TRY_ACQUIRE_SHARED(...) __attribute__((try_acquire_shared_capability(__VA_ARGS__))) /// function tries to acquire a shared capability and returns a boolean value indicating success or failure +#define TSA_RELEASE_SHARED(...) __attribute__((release_shared_capability(__VA_ARGS__))) /// function releases the given shared capability +#define TSA_SCOPED_LOCKABLE __attribute__((scoped_lockable)) /// object of a class has scoped lockable capability /// Macros for suppressing TSA warnings for specific reads/writes (instead of suppressing it for the whole function) /// They use a lambda function to apply function attribute to a single statement. This enable us to suppress warnings locally instead of /// suppressing them in the whole function /// Consider adding a comment when using these macros. -# define TSA_SUPPRESS_WARNING_FOR_READ(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> const auto & { return (x); }()) -# define TSA_SUPPRESS_WARNING_FOR_WRITE(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> auto & { return (x); }()) +#define TSA_SUPPRESS_WARNING_FOR_READ(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> const auto & { return (x); }()) +#define TSA_SUPPRESS_WARNING_FOR_WRITE(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> auto & { return (x); }()) /// This macro is useful when only one thread writes to a member /// and you want to read this member from the same thread without locking a mutex. /// It's safe (because no concurrent writes are possible), but TSA generates a warning. /// (Seems like there's no way to verify it, but it makes sense to distinguish it from TSA_SUPPRESS_WARNING_FOR_READ for readability) -# define TSA_READ_ONE_THREAD(x) TSA_SUPPRESS_WARNING_FOR_READ(x) - -#else -# define TSA_GUARDED_BY(...) -# define TSA_PT_GUARDED_BY(...) -# define TSA_REQUIRES(...) -# define TSA_REQUIRES_SHARED(...) -# define TSA_NO_THREAD_SAFETY_ANALYSIS -# define TSA_CAPABILITY(...) -# define TSA_ACQUIRE(...) -# define TSA_TRY_ACQUIRE(...) -# define TSA_RELEASE(...) -# define TSA_ACQUIRE_SHARED(...) -# define TSA_TRY_ACQUIRE_SHARED(...) -# define TSA_RELEASE_SHARED(...) -# define TSA_SCOPED_LOCKABLE - -# define TSA_SUPPRESS_WARNING_FOR_READ(x) (x) -# define TSA_SUPPRESS_WARNING_FOR_WRITE(x) (x) -# define TSA_READ_ONE_THREAD(x) TSA_SUPPRESS_WARNING_FOR_READ(x) -#endif +#define TSA_READ_ONE_THREAD(x) TSA_SUPPRESS_WARNING_FOR_READ(x) /// A template function for suppressing warnings about unused variables or function results. template diff --git a/base/base/iostream_debug_helpers.h b/base/base/iostream_debug_helpers.h index f531a56031b..5c601251272 100644 --- a/base/base/iostream_debug_helpers.h +++ b/base/base/iostream_debug_helpers.h @@ -155,9 +155,7 @@ Out & dump(Out & out, const char * name, T && x) // NOLINT(cppcoreguidelines-mis return dumpValue(out, x) << "; "; } -#ifdef __clang__ #pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments" -#endif #define DUMPVAR(VAR) ::dump(std::cerr, #VAR, (VAR)); #define DUMPHEAD std::cerr << __FILE__ << ':' << __LINE__ << " [ " << getThreadId() << " ] "; diff --git a/base/base/phdr_cache.cpp b/base/base/phdr_cache.cpp index 7d37f01b560..802d1bf35f5 100644 --- a/base/base/phdr_cache.cpp +++ b/base/base/phdr_cache.cpp @@ -11,10 +11,8 @@ /// Thread Sanitizer uses dl_iterate_phdr function on initialization and fails if we provide our own. #ifdef USE_PHDR_CACHE -#if defined(__clang__) -# pragma clang diagnostic ignored "-Wreserved-id-macro" -# pragma clang diagnostic ignored "-Wunused-macros" -#endif +#pragma clang diagnostic ignored "-Wreserved-id-macro" +#pragma clang diagnostic ignored "-Wunused-macros" #define __msan_unpoison(X, Y) // NOLINT #if defined(ch_has_feature) @@ -57,10 +55,6 @@ std::atomic phdr_cache {}; extern "C" -#ifndef __clang__ -[[gnu::visibility("default")]] -[[gnu::externally_visible]] -#endif int dl_iterate_phdr(int (*callback) (dl_phdr_info * info, size_t size, void * data), void * data) { auto * current_phdr_cache = phdr_cache.load(); diff --git a/base/glibc-compatibility/musl/getauxval.c b/base/glibc-compatibility/musl/getauxval.c index 44a9f979f99..ea5cff9fc11 100644 --- a/base/glibc-compatibility/musl/getauxval.c +++ b/base/glibc-compatibility/musl/getauxval.c @@ -20,11 +20,7 @@ /// Suppress TSan since it is possible for this code to be called from multiple threads, /// and initialization is safe to be done multiple times from multiple threads. -#if defined(__clang__) -# define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread"))) -#else -# define NO_SANITIZE_THREAD -#endif +#define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread"))) // We don't have libc struct available here. // Compute aux vector manually (from /proc/self/auxv). diff --git a/base/harmful/harmful.c b/base/harmful/harmful.c index 78796ca0c05..54b552a84ea 100644 --- a/base/harmful/harmful.c +++ b/base/harmful/harmful.c @@ -6,11 +6,7 @@ /// It is only enabled in debug build (its intended use is for CI checks). #if !defined(NDEBUG) -#if defined(__clang__) - #pragma clang diagnostic ignored "-Wincompatible-library-redeclaration" -#else - #pragma GCC diagnostic ignored "-Wbuiltin-declaration-mismatch" -#endif +#pragma clang diagnostic ignored "-Wincompatible-library-redeclaration" /// We cannot use libc headers here. long write(int, const void *, unsigned long); diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index d39ca312454..2f42854a972 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.1.5.6" +ARG VERSION="24.2.1.2248" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 2d07937ad79..7bd777de5b9 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.1.5.6" +ARG VERSION="24.2.1.2248" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index d4775b17319..03d01cfd5d7 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.1.5.6" +ARG VERSION="24.2.1.2248" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/server-jepsen/run.sh b/docker/test/server-jepsen/run.sh index 81e442e65b6..09198ca1968 100644 --- a/docker/test/server-jepsen/run.sh +++ b/docker/test/server-jepsen/run.sh @@ -20,6 +20,8 @@ if [ -n "$WITH_LOCAL_BINARY" ]; then clickhouse_source="--clickhouse-source /clickhouse" fi +# $TESTS_TO_RUN comes from docker +# shellcheck disable=SC2153 tests_count="--test-count $TESTS_TO_RUN" tests_to_run="test-all" workload="" @@ -47,6 +49,6 @@ fi cd "$CLICKHOUSE_REPO_PATH/tests/jepsen.clickhouse" -(lein run server $tests_to_run $workload --keeper "$KEEPER_NODE" $concurrency $nemesis $rate --nodes-file "$NODES_FILE_PATH" --username "$NODES_USERNAME" --logging-json --password "$NODES_PASSWORD" --time-limit "$TIME_LIMIT" --concurrency 50 $clickhouse_source $tests_count --reuse-binary || true) | tee "$TEST_OUTPUT/jepsen_run_all_tests.log" +(lein run server $tests_to_run "$workload" --keeper "$KEEPER_NODE" "$concurrency" "$nemesis" "$rate" --nodes-file "$NODES_FILE_PATH" --username "$NODES_USERNAME" --logging-json --password "$NODES_PASSWORD" --time-limit "$TIME_LIMIT" --concurrency 50 "$clickhouse_source" "$tests_count" --reuse-binary || true) | tee "$TEST_OUTPUT/jepsen_run_all_tests.log" mv store "$TEST_OUTPUT/" diff --git a/docker/test/stateless/attach_gdb.lib b/docker/test/stateless/attach_gdb.lib index f4738cdc333..d288288bb17 100644 --- a/docker/test/stateless/attach_gdb.lib +++ b/docker/test/stateless/attach_gdb.lib @@ -1,5 +1,6 @@ #!/bin/bash +# shellcheck source=./utils.lib source /utils.lib function attach_gdb_to_clickhouse() diff --git a/docker/test/stateless/stress_tests.lib b/docker/test/stateless/stress_tests.lib index 72c2d8d2f2d..c0fc32ab718 100644 --- a/docker/test/stateless/stress_tests.lib +++ b/docker/test/stateless/stress_tests.lib @@ -19,7 +19,7 @@ function escaped() function head_escaped() { - head -n $FAILURE_CONTEXT_LINES $1 | escaped + head -n "$FAILURE_CONTEXT_LINES" "$1" | escaped } function unts() @@ -29,15 +29,15 @@ function unts() function trim_server_logs() { - head -n $FAILURE_CONTEXT_LINES "/test_output/$1" | grep -Eo " \[ [0-9]+ \] \{.*" | escaped + head -n "$FAILURE_CONTEXT_LINES" "/test_output/$1" | grep -Eo " \[ [0-9]+ \] \{.*" | escaped } function install_packages() { - dpkg -i $1/clickhouse-common-static_*.deb - dpkg -i $1/clickhouse-common-static-dbg_*.deb - dpkg -i $1/clickhouse-server_*.deb - dpkg -i $1/clickhouse-client_*.deb + dpkg -i "$1"/clickhouse-common-static_*.deb + dpkg -i "$1"/clickhouse-common-static-dbg_*.deb + dpkg -i "$1"/clickhouse-server_*.deb + dpkg -i "$1"/clickhouse-client_*.deb } function configure() @@ -54,11 +54,11 @@ function configure() sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml function randomize_config_boolean_value { - value=$(($RANDOM % 2)) - sudo cat /etc/clickhouse-server/config.d/$2.xml \ + value=$((RANDOM % 2)) + sudo cat "/etc/clickhouse-server/config.d/$2.xml" \ | sed "s|<$1>[01]|<$1>$value|" \ - > /etc/clickhouse-server/config.d/$2.xml.tmp - sudo mv /etc/clickhouse-server/config.d/$2.xml.tmp /etc/clickhouse-server/config.d/$2.xml + > "/etc/clickhouse-server/config.d/$2.xml.tmp" + sudo mv "/etc/clickhouse-server/config.d/$2.xml.tmp" "/etc/clickhouse-server/config.d/$2.xml" } if [[ -n "$RANDOMIZE_KEEPER_FEATURE_FLAGS" ]] && [[ "$RANDOMIZE_KEEPER_FEATURE_FLAGS" -eq 1 ]]; then @@ -146,17 +146,17 @@ EOL } -function stop() +function stop_server() { - local max_tries="${1:-90}" - local check_hang="${2:-true}" + local max_tries=90 + local check_hang=true local pid # Preserve the pid, since the server can hung after the PID will be deleted. pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)" clickhouse stop --max-tries "$max_tries" --do-not-kill && return - if [ $check_hang == true ] + if [ "$check_hang" == true ] then # We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces. # Add a special status just in case, so it will be possible to find in the CI DB @@ -165,7 +165,7 @@ function stop() sleep 5 # The server could finally stop while we were terminating gdb, let's recheck if it's still running - kill -s 0 $pid || return + kill -s 0 "$pid" || return echo -e "Possible deadlock on shutdown (see gdb.log)$FAIL" >> /test_output/test_results.tsv echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log @@ -176,12 +176,13 @@ function stop() fi } -function start() +function start_server() { counter=0 + max_attempt=120 until clickhouse-client --query "SELECT 1" do - if [ "$counter" -gt ${1:-120} ] + if [ "$counter" -gt "$max_attempt" ] then echo "Cannot start clickhouse-server" rg --text ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt ||: @@ -286,9 +287,9 @@ function collect_query_and_trace_logs() function collect_core_dumps() { - find . -type f -maxdepth 1 -name 'core.*' | while read core; do - zstd --threads=0 $core - mv $core.zst /test_output/ + find . -type f -maxdepth 1 -name 'core.*' | while read -r core; do + zstd --threads=0 "$core" + mv "$core.zst" /test_output/ done } diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 3981c4dd416..621a6ced7f6 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -16,7 +16,9 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test # Stress tests and upgrade check uses similar code that was placed # in a separate bash library. See tests/ci/stress_tests.lib +# shellcheck source=../stateless/attach_gdb.lib source /attach_gdb.lib +# shellcheck source=../stateless/stress_tests.lib source /stress_tests.lib install_packages package_folder @@ -55,7 +57,7 @@ azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml -start +start_server setup_logs_replication @@ -65,7 +67,7 @@ clickhouse-client --query "SHOW TABLES FROM datasets" clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test" -stop +stop_server mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log # Randomize cache policies. @@ -85,7 +87,7 @@ if [ "$cache_policy" = "SLRU" ]; then mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml fi -start +start_server clickhouse-client --query "SHOW TABLES FROM datasets" clickhouse-client --query "SHOW TABLES FROM test" @@ -188,7 +190,7 @@ clickhouse-client --query "SHOW TABLES FROM test" clickhouse-client --query "SYSTEM STOP THREAD FUZZER" -stop +stop_server # Let's enable S3 storage by default export USE_S3_STORAGE_FOR_MERGE_TREE=1 @@ -222,7 +224,7 @@ if [ $(( $(date +%-d) % 2 )) -eq 1 ]; then > /etc/clickhouse-server/config.d/enable_async_load_databases.xml fi -start +start_server stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \ && echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \ @@ -232,18 +234,18 @@ stress --hung-check --drop-databases --output-folder test_output --skip-func-tes rg -Fa "No queries hung" /test_output/test_results.tsv | grep -Fa "OK" \ || echo -e "Hung check failed, possible deadlock found (see hung_check.log)$FAIL$(head_escaped /test_output/hung_check.log)" >> /test_output/test_results.tsv -stop +stop_server mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log # NOTE Disable thread fuzzer before server start with data after stress test. # In debug build it can take a lot of time. unset "${!THREAD_@}" -start +start_server check_server_start -stop +stop_server [ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL" [ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL" @@ -272,7 +274,7 @@ clickhouse-local --structure "test String, res String, time Nullable(Float32), d (test like '%Signal 9%') DESC, (test like '%Fatal message%') DESC, rowNumberInAllBlocks() -LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv +LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo -e "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv # But OOMs in stress test are allowed diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index f2bac2f5da4..abc2dba0e9d 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -16,7 +16,6 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ moreutils \ python3-fuzzywuzzy \ python3-pip \ - shellcheck \ yamllint \ locales \ && pip3 install black==23.1.0 boto3 codespell==2.2.1 mypy==1.3.0 PyGithub unidiff pylint==2.6.2 \ @@ -30,6 +29,19 @@ ENV LC_ALL en_US.UTF-8 # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH +ARG SHELLCHECK_VERSION=0.9.0 +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) sarch=x86_64 ;; \ + arm64) sarch=aarch64 ;; \ + esac \ + && curl -L \ + "https://github.com/koalaman/shellcheck/releases/download/v${SHELLCHECK_VERSION}/shellcheck-v${SHELLCHECK_VERSION}.linux.${sarch}.tar.xz" \ + | tar xJ --strip=1 -C /tmp \ + && mv /tmp/shellcheck /usr/bin \ + && rm -rf /tmp/* + + # Get act and actionlint from releases RUN arch=${TARGETARCH:-amd64} \ && case $arch in \ diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index de9ac3b3a69..bf08d537260 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -16,7 +16,9 @@ ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_pre # Stress tests and upgrade check uses similar code that was placed # in a separate bash library. See tests/ci/stress_tests.lib +# shellcheck source=../stateless/attach_gdb.lib source /attach_gdb.lib +# shellcheck source=../stateless/stress_tests.lib source /stress_tests.lib azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & @@ -337,7 +339,7 @@ clickhouse-local --structure "test String, res String, time Nullable(Float32), d (test like '%Changed settings%') DESC, (test like '%New settings%') DESC, rowNumberInAllBlocks() -LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv +LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo -e "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv # But OOMs in stress test are allowed diff --git a/docs/changelogs/v24.2.1.2248-stable.md b/docs/changelogs/v24.2.1.2248-stable.md new file mode 100644 index 00000000000..6113dd51ab1 --- /dev/null +++ b/docs/changelogs/v24.2.1.2248-stable.md @@ -0,0 +1,462 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.2.1.2248-stable (891689a4150) FIXME as compared to v24.1.1.2048-stable (5a024dfc093) + +#### Backward Incompatible Change +* Validate suspicious/experimental types in nested types. Previously we didn't validate such types (except JSON) in nested types like Array/Tuple/Map. [#59385](https://github.com/ClickHouse/ClickHouse/pull/59385) ([Kruglov Pavel](https://github.com/Avogar)). +* The sort clause `ORDER BY ALL` (introduced with v23.12) is replaced by `ORDER BY *`. The previous syntax was too error-prone for tables with a column `all`. [#59450](https://github.com/ClickHouse/ClickHouse/pull/59450) ([Robert Schulze](https://github.com/rschu1ze)). +* Rename the setting `extract_kvp_max_pairs_per_row` to `extract_key_value_pairs_max_pairs_per_row`. The bug (unnecessary abbreviation in the setting name) was introduced in https://github.com/ClickHouse/ClickHouse/pull/43606. Fix the documentation of this setting. [#59683](https://github.com/ClickHouse/ClickHouse/pull/59683) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Rename the setting extract_kvp_max_pairs_per_row to extract_key_value_pairs_max_pairs_per_row. The bug (unnecessary abbreviation in the setting name) was introduced in https://github.com/ClickHouse/ClickHouse/pull/43606. Fix the documentation of this setting. [#59960](https://github.com/ClickHouse/ClickHouse/pull/59960) ([jsc0218](https://github.com/jsc0218)). +* Add sanity check for number of threads and block sizes. [#60138](https://github.com/ClickHouse/ClickHouse/pull/60138) ([Raúl Marín](https://github.com/Algunenano)). + +#### New Feature +* Added maximum sequential login failures to the quota. [#54737](https://github.com/ClickHouse/ClickHouse/pull/54737) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Added new syntax which allows to specify definer user in View/Materialized View. This allows to execute selects/inserts from views without explicit grants for underlying tables. [#54901](https://github.com/ClickHouse/ClickHouse/pull/54901) ([pufit](https://github.com/pufit)). +* Backup & Restore support for AzureBlobStorage resolves [#50747](https://github.com/ClickHouse/ClickHouse/issues/50747). [#56988](https://github.com/ClickHouse/ClickHouse/pull/56988) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Implemented automatic conversion of merge tree tables of different kinds to replicated engine. Create empty `convert_to_replicated` file in table's data directory (`/clickhouse/store/xxx/xxxyyyyy-yyyy-yyyy-yyyy-yyyyyyyyyyyy/`) and that table will be converted automatically on next server start. [#57798](https://github.com/ClickHouse/ClickHouse/pull/57798) ([Kirill](https://github.com/kirillgarbar)). +* Added table function `mergeTreeIndex`. It represents the contents of index and marks files of `MergeTree` tables. It can be used for introspection. Syntax: `mergeTreeIndex(database, table, [with_marks = true])` where `database.table` is an existing table with `MergeTree` engine. [#58140](https://github.com/ClickHouse/ClickHouse/pull/58140) ([Anton Popov](https://github.com/CurtizJ)). +* Added function `seriesOutliersTukey` to detect outliers in series data using Tukey's fences algorithm. [#58632](https://github.com/ClickHouse/ClickHouse/pull/58632) ([Bhavna Jindal](https://github.com/bhavnajindal)). +* The user can now specify the template string directly in the query using `format_schema_rows_template` as an alternative to `format_template_row`. Closes [#31363](https://github.com/ClickHouse/ClickHouse/issues/31363). [#59088](https://github.com/ClickHouse/ClickHouse/pull/59088) ([Shaun Struwig](https://github.com/Blargian)). +* Try to detect file format automatically during schema inference if it's unknown in `file/s3/hdfs/url/azureBlobStorage` engines. Closes [#50576](https://github.com/ClickHouse/ClickHouse/issues/50576). [#59092](https://github.com/ClickHouse/ClickHouse/pull/59092) ([Kruglov Pavel](https://github.com/Avogar)). +* Add function variantType that returns Enum with variant type name for each row. [#59398](https://github.com/ClickHouse/ClickHouse/pull/59398) ([Kruglov Pavel](https://github.com/Avogar)). +* Added query `ALTER TABLE table FORGET PARTITION partition` that removes ZooKeeper nodes, related to an empty partition. [#59507](https://github.com/ClickHouse/ClickHouse/pull/59507) ([Sergei Trifonov](https://github.com/serxa)). +* Support JWT credentials file for the NATS table engine. [#59543](https://github.com/ClickHouse/ClickHouse/pull/59543) ([Nickolaj Jepsen](https://github.com/nickolaj-jepsen)). +* Provides new aggregate function ‘groupArrayIntersect’. Follows up: [#49862](https://github.com/ClickHouse/ClickHouse/issues/49862). [#59598](https://github.com/ClickHouse/ClickHouse/pull/59598) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Implemented system.dns_cache table, which can be useful for debugging DNS issues. [#59856](https://github.com/ClickHouse/ClickHouse/pull/59856) ([Kirill Nikiforov](https://github.com/allmazz)). +* The codec `LZ4HC` will accept a new level 2, which is faster than the previous minimum level 3, at the expense of less compression. In previous versions, `LZ4HC(2)` and less was the same as `LZ4HC(3)`. Author: [Cyan4973](https://github.com/Cyan4973). [#60090](https://github.com/ClickHouse/ClickHouse/pull/60090) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Implemented system.dns_cache table, which can be useful for debugging DNS issues. New server setting dns_cache_max_size. [#60257](https://github.com/ClickHouse/ClickHouse/pull/60257) ([Kirill Nikiforov](https://github.com/allmazz)). +* Support single-argument version for the merge table function, as `merge(['db_name', ] 'tables_regexp')`. [#60372](https://github.com/ClickHouse/ClickHouse/pull/60372) ([豪肥肥](https://github.com/HowePa)). +* Added new syntax which allows to specify definer user in View/Materialized View. This allows to execute selects/inserts from views without explicit grants for underlying tables. [#60439](https://github.com/ClickHouse/ClickHouse/pull/60439) ([pufit](https://github.com/pufit)). + +#### Performance Improvement +* Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section. [#52230](https://github.com/ClickHouse/ClickHouse/pull/52230) ([JackyWoo](https://github.com/JackyWoo)). +* Vectorized distance functions used in vector search. [#58866](https://github.com/ClickHouse/ClickHouse/pull/58866) ([Robert Schulze](https://github.com/rschu1ze)). +* Continue optimizing branch miss of if function when result type is float*/decimal*/int* , follow up of https://github.com/ClickHouse/ClickHouse/pull/57885. [#59148](https://github.com/ClickHouse/ClickHouse/pull/59148) ([李扬](https://github.com/taiyang-li)). +* Optimize if function when input type is map, speed up by ~10x. [#59413](https://github.com/ClickHouse/ClickHouse/pull/59413) ([李扬](https://github.com/taiyang-li)). +* Improve performance of Int8 type by implementing strict aliasing. [#59485](https://github.com/ClickHouse/ClickHouse/pull/59485) ([Raúl Marín](https://github.com/Algunenano)). +* Optimize performance of sum/avg conditionally for bigint and big decimal types by reducing branch miss. [#59504](https://github.com/ClickHouse/ClickHouse/pull/59504) ([李扬](https://github.com/taiyang-li)). +* Improve performance of SELECTs with active mutations. [#59531](https://github.com/ClickHouse/ClickHouse/pull/59531) ([Azat Khuzhin](https://github.com/azat)). +* Optimized function `isNotNull` with AVX2. [#59621](https://github.com/ClickHouse/ClickHouse/pull/59621) ([李扬](https://github.com/taiyang-li)). +* Reuse the result of `FunctionFactory::instance().get("isNotNull", context)` and `FunctionFactory::instance().get("assumeNotNull", context)`. Make sure it is called once during the lifetime of `FunctionCoalesce`. [#59627](https://github.com/ClickHouse/ClickHouse/pull/59627) ([李扬](https://github.com/taiyang-li)). +* Improve ASOF JOIN performance for sorted or almost sorted data. [#59731](https://github.com/ClickHouse/ClickHouse/pull/59731) ([Maksim Kita](https://github.com/kitaisreal)). +* Primary key will use less amount of memory. [#60049](https://github.com/ClickHouse/ClickHouse/pull/60049) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve memory usage for primary key and some other operations. [#60050](https://github.com/ClickHouse/ClickHouse/pull/60050) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The tables' primary keys will be loaded in memory lazily on first access. This is controlled by the new MergeTree setting `primary_key_lazy_load`, which is on by default. This provides several advantages: - it will not be loaded for tables that are not used; - if there is not enough memory, an exception will be thrown on first use instead of at server startup. This provides several disadvantages: - the latency of loading the primary key will be paid on the first query rather than before accepting connections; this theoretically may introduce a thundering-herd problem. This closes [#11188](https://github.com/ClickHouse/ClickHouse/issues/11188). [#60093](https://github.com/ClickHouse/ClickHouse/pull/60093) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Vectorized function `dotProduct` which is useful for vector search. [#60202](https://github.com/ClickHouse/ClickHouse/pull/60202) ([Robert Schulze](https://github.com/rschu1ze)). +* As is shown in Fig 1, the replacement of "&&" with "&" could generate the SIMD code. ![image](https://github.com/ClickHouse/ClickHouse/assets/26588299/a5a72ac4-6dc6-4d52-835a-4f512e55f0b9) Fig 1. Code compiled from '&&' (left) and '&' (right). [#60498](https://github.com/ClickHouse/ClickHouse/pull/60498) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). + +#### Improvement +* Added support for parameterized view with analyzer to not analyze create parameterized view. Refactor existing parameterized view logic to not analyze create parameterized view. [#54211](https://github.com/ClickHouse/ClickHouse/pull/54211) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Do not consider data part as broken if projection is broken. Closes [#56593](https://github.com/ClickHouse/ClickHouse/issues/56593). [#56864](https://github.com/ClickHouse/ClickHouse/pull/56864) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add short-circuit ability for `dictGetOrDefault` function. Closes [#52098](https://github.com/ClickHouse/ClickHouse/issues/52098). [#57767](https://github.com/ClickHouse/ClickHouse/pull/57767) ([jsc0218](https://github.com/jsc0218)). +* Running `ALTER COLUMN MATERIALIZE` on a column with `DEFAULT` or `MATERIALIZED` expression now writes the correct values: The default value for existing parts with default value or the non-default value for existing parts with non-default value. Previously, the default value was written for all existing parts. [#58023](https://github.com/ClickHouse/ClickHouse/pull/58023) ([Duc Canh Le](https://github.com/canhld94)). +* Enabled a backoff logic (e.g. exponential). Will provide an ability for reduced CPU usage, memory usage and log file sizes. [#58036](https://github.com/ClickHouse/ClickHouse/pull/58036) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Add improvement to count InitialQuery. [#58195](https://github.com/ClickHouse/ClickHouse/pull/58195) ([Unalian](https://github.com/Unalian)). +* Support negative positional arguments. Closes [#57736](https://github.com/ClickHouse/ClickHouse/issues/57736). [#58292](https://github.com/ClickHouse/ClickHouse/pull/58292) ([flynn](https://github.com/ucasfl)). +* Implement auto-adjustment for asynchronous insert timeouts. The following settings are introduced: async_insert_poll_timeout_ms, async_insert_use_adaptive_busy_timeout, async_insert_busy_timeout_min_ms, async_insert_busy_timeout_max_ms, async_insert_busy_timeout_increase_rate, async_insert_busy_timeout_decrease_rate. [#58486](https://github.com/ClickHouse/ClickHouse/pull/58486) ([Julia Kartseva](https://github.com/jkartseva)). +* Allow to define `volume_priority` in `storage_configuration`. [#58533](https://github.com/ClickHouse/ClickHouse/pull/58533) ([Andrey Zvonov](https://github.com/zvonand)). +* Add support for Date32 type in T64 codec. [#58738](https://github.com/ClickHouse/ClickHouse/pull/58738) ([Hongbin Ma](https://github.com/binmahone)). +* Support `LEFT JOIN`, `ALL INNER JOIN`, and simple subqueries for parallel replicas (only with analyzer). New setting `parallel_replicas_prefer_local_join` chooses local `JOIN` execution (by default) vs `GLOBAL JOIN`. All tables should exist on every replica from `cluster_for_parallel_replicas`. New settings `min_external_table_block_size_rows` and `min_external_table_block_size_bytes` are used to squash small blocks that are sent for temporary tables (only with analyzer). [#58916](https://github.com/ClickHouse/ClickHouse/pull/58916) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Allow trailing commas in types with several items. [#59119](https://github.com/ClickHouse/ClickHouse/pull/59119) ([Aleksandr Musorin](https://github.com/AVMusorin)). +* Allow parallel and distributed processing for `S3Queue` table engine. For distributed processing use setting `s3queue_total_shards_num` (by default `1`). Setting `s3queue_processing_threads_num` previously was not allowed for Ordered processing mode, now it is allowed. Warning: settings `s3queue_processing_threads_num`(processing threads per each shard) and `s3queue_total_shards_num` for ordered mode change how metadata is stored (make the number of `max_processed_file` nodes equal to `s3queue_processing_threads_num * s3queue_total_shards_num`), so they must be the same for all shards and cannot be changed once at least one shard is created. [#59167](https://github.com/ClickHouse/ClickHouse/pull/59167) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow concurrent table creation in `DatabaseReplicated` during `recoverLostReplica`. [#59277](https://github.com/ClickHouse/ClickHouse/pull/59277) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Settings for the Distributed table engine can now be specified in the server configuration file (similar to MergeTree settings), e.g. ``` false ```. [#59291](https://github.com/ClickHouse/ClickHouse/pull/59291) ([Azat Khuzhin](https://github.com/azat)). +* Use MergeTree as a default table engine. It makes the usability much better, and closer to ClickHouse Cloud. [#59316](https://github.com/ClickHouse/ClickHouse/pull/59316) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Retry disconnects and expired sessions when reading `system.zookeeper`. This is helpful when reading many rows from `system.zookeeper` table especially in the presence of fault-injected disconnects. [#59388](https://github.com/ClickHouse/ClickHouse/pull/59388) ([Alexander Gololobov](https://github.com/davenger)). +* Do not interpret numbers with leading zeroes as octals when `input_format_values_interpret_expressions=0`. [#59403](https://github.com/ClickHouse/ClickHouse/pull/59403) ([Joanna Hulboj](https://github.com/jh0x)). +* At startup and whenever config files are changed, ClickHouse updates the hard memory limits of its total memory tracker. These limits are computed based on various server settings and cgroups limits (on Linux). Previously, setting `/sys/fs/cgroup/memory.max` (for cgroups v2) was hard-coded. As a result, cgroup v2 memory limits configured for nested groups (hierarchies), e.g. `/sys/fs/cgroup/my/nested/group/memory.max` were ignored. This is now fixed. The behavior of v1 memory limits remains unchanged. [#59435](https://github.com/ClickHouse/ClickHouse/pull/59435) ([Robert Schulze](https://github.com/rschu1ze)). +* New profile events added to observe the time spent on calculating PK/projections/secondary indices during `INSERT`-s. [#59436](https://github.com/ClickHouse/ClickHouse/pull/59436) ([Nikita Taranov](https://github.com/nickitat)). +* Allow to define a starting point for S3Queue with Ordered mode at creation using setting `s3queue_last_processed_path`. [#59446](https://github.com/ClickHouse/ClickHouse/pull/59446) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Keeper improvement: cache only a certain amount of logs in-memory controlled by `latest_logs_cache_size_threshold` and `commit_logs_cache_size_threshold`. [#59460](https://github.com/ClickHouse/ClickHouse/pull/59460) ([Antonio Andelic](https://github.com/antonio2368)). +* Made comments for system tables also available in `system.tables` in `clickhouse-local`. [#59493](https://github.com/ClickHouse/ClickHouse/pull/59493) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Don't infer floats in exponential notation by default. Add a setting `input_format_try_infer_exponent_floats` that will restore previous behaviour (disabled by default). Closes [#59476](https://github.com/ClickHouse/ClickHouse/issues/59476). [#59500](https://github.com/ClickHouse/ClickHouse/pull/59500) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow alter operations to be surrounded by parenthesis. The emission of parentheses can be controlled by the `format_alter_operations_with_parentheses` config. By default in formatted queries the parentheses are emitted as we store the formatted alter operations in some places as metadata (e.g.: mutations). The new syntax clarifies some of the queries where alter operations end in a list. E.g.: `ALTER TABLE x MODIFY TTL date GROUP BY a, b, DROP COLUMN c` cannot be parsed properly with the old syntax. In the new syntax the query `ALTER TABLE x (MODIFY TTL date GROUP BY a, b), (DROP COLUMN c)` is obvious. Older versions are not able to read the new syntax, therefore using the new syntax might cause issues if newer and older version of ClickHouse are mixed in a single cluster. [#59532](https://github.com/ClickHouse/ClickHouse/pull/59532) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* The previous default value equals to 1 MB for `async_insert_max_data_size` appeared to be too small. The new one would be 10 MiB. [#59536](https://github.com/ClickHouse/ClickHouse/pull/59536) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Previously the whole result was accumulated in memory and returned as one big chunk. This change should help to reduce memory consumption when reading many rows from `system.zookeeper`, allow showing intermediate progress (how many rows have been read so far) and avoid hitting connection timeout when result set is big. [#59545](https://github.com/ClickHouse/ClickHouse/pull/59545) ([Alexander Gololobov](https://github.com/davenger)). +* Now dashboard understands both compressed and uncompressed state of URL's #hash (backward compatibility). Continuation of [#59124](https://github.com/ClickHouse/ClickHouse/issues/59124) . [#59548](https://github.com/ClickHouse/ClickHouse/pull/59548) ([Amos Bird](https://github.com/amosbird)). +* Bumped Intel QPL (used by codec `DEFLATE_QPL`) from v1.3.1 to v1.4.0 . Also fixed a bug for polling timeout mechanism, as we observed in same cases timeout won't work properly, if timeout happen, IAA and CPU may process buffer concurrently. So far, we'd better make sure IAA codec status is not QPL_STS_BEING_PROCESSED, then fallback to SW codec. [#59551](https://github.com/ClickHouse/ClickHouse/pull/59551) ([jasperzhu](https://github.com/jinjunzh)). +* Keeper improvement: reduce size of data node even more. [#59592](https://github.com/ClickHouse/ClickHouse/pull/59592) ([Antonio Andelic](https://github.com/antonio2368)). +* Do not show a warning about the server version in ClickHouse Cloud because ClickHouse Cloud handles seamless upgrades automatically. [#59657](https://github.com/ClickHouse/ClickHouse/pull/59657) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* After self-extraction temporary binary is moved instead copying. [#59661](https://github.com/ClickHouse/ClickHouse/pull/59661) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix stack unwinding on MacOS. This closes [#53653](https://github.com/ClickHouse/ClickHouse/issues/53653). [#59690](https://github.com/ClickHouse/ClickHouse/pull/59690) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Check for stack overflow in parsers even if the user misconfigured the `max_parser_depth` setting to a very high value. This closes [#59622](https://github.com/ClickHouse/ClickHouse/issues/59622). [#59697](https://github.com/ClickHouse/ClickHouse/pull/59697) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Perform synchronous inserts if dependent MV deduplication is enabled through deduplicate_blocks_in_dependent_materialized_views=1. [#59699](https://github.com/ClickHouse/ClickHouse/pull/59699) ([Julia Kartseva](https://github.com/jkartseva)). +* Added settings `split_parts_ranges_into_intersecting_and_non_intersecting_final` and `split_intersecting_parts_ranges_into_layers_final`. This settings are needed to disable optimizations for queries with `FINAL` and needed for debug only. [#59705](https://github.com/ClickHouse/ClickHouse/pull/59705) ([Maksim Kita](https://github.com/kitaisreal)). +* Unify xml and sql created named collection behaviour in kafka storage. [#59710](https://github.com/ClickHouse/ClickHouse/pull/59710) ([Pervakov Grigorii](https://github.com/GrigoryPervakov)). +* In case when `merge_max_block_size_bytes` is small enough and tables contain wide rows (strings or tuples) background merges may stuck in an endless loop. This behaviour is fixed. Follow-up for https://github.com/ClickHouse/ClickHouse/pull/59340. [#59812](https://github.com/ClickHouse/ClickHouse/pull/59812) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Allow uuid in replica_path if CREATE TABLE explicitly has it. [#59908](https://github.com/ClickHouse/ClickHouse/pull/59908) ([Azat Khuzhin](https://github.com/azat)). +* Add column `metadata_version` of ReplicatedMergeTree table in `system.tables` system table. [#59942](https://github.com/ClickHouse/ClickHouse/pull/59942) ([Maksim Kita](https://github.com/kitaisreal)). +* Keeper improvement: send only Keeper related metrics/events for Prometheus. [#59945](https://github.com/ClickHouse/ClickHouse/pull/59945) ([Antonio Andelic](https://github.com/antonio2368)). +* The dashboard will display metrics across different ClickHouse versions even if the structure of system tables has changed after the upgrade. [#59967](https://github.com/ClickHouse/ClickHouse/pull/59967) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow loading AZ info from a file. [#59976](https://github.com/ClickHouse/ClickHouse/pull/59976) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Keeper improvement: add retries on failures for Disk related operations. [#59980](https://github.com/ClickHouse/ClickHouse/pull/59980) ([Antonio Andelic](https://github.com/antonio2368)). +* Add new config setting `backups.remove_backup_files_after_failure`: ``` true ```. [#60002](https://github.com/ClickHouse/ClickHouse/pull/60002) ([Vitaly Baranov](https://github.com/vitlibar)). +* Use multiple threads while reading the metadata of tables from a backup while executing the RESTORE command. [#60040](https://github.com/ClickHouse/ClickHouse/pull/60040) ([Vitaly Baranov](https://github.com/vitlibar)). +* Now if `StorageBuffer` has more than 1 shard (`num_layers` > 1) background flush will happen simultaneously for all shards in multiple threads. [#60111](https://github.com/ClickHouse/ClickHouse/pull/60111) ([alesapin](https://github.com/alesapin)). +* Support specifying users for specific S3 settings in config using `user` key. [#60144](https://github.com/ClickHouse/ClickHouse/pull/60144) ([Antonio Andelic](https://github.com/antonio2368)). +* Copy S3 file GCP fallback to buffer copy in case GCP returned `Internal Error` with `GATEWAY_TIMEOUT` HTTP error code. [#60164](https://github.com/ClickHouse/ClickHouse/pull/60164) ([Maksim Kita](https://github.com/kitaisreal)). +* Implement comparison operator for Variant values and proper Field inserting into Variant column. Don't allow creating `Variant` type with similar variant types by default (allow uder a setting `allow_suspicious_variant_types`) Closes [#59996](https://github.com/ClickHouse/ClickHouse/issues/59996). Closes [#59850](https://github.com/ClickHouse/ClickHouse/issues/59850). [#60198](https://github.com/ClickHouse/ClickHouse/pull/60198) ([Kruglov Pavel](https://github.com/Avogar)). +* Short circuit execution for `ULIDStringToDateTime`. [#60211](https://github.com/ClickHouse/ClickHouse/pull/60211) ([Juan Madurga](https://github.com/jlmadurga)). +* Added `query_id` column for tables `system.backups` and `system.backup_log`. Added error stacktrace to `error` column. [#60220](https://github.com/ClickHouse/ClickHouse/pull/60220) ([Maksim Kita](https://github.com/kitaisreal)). +* Connections through the MySQL port now automatically run with setting `prefer_column_name_to_alias = 1` to support QuickSight out-of-the-box. Also, settings `mysql_map_string_to_text_in_show_columns` and `mysql_map_fixed_string_to_text_in_show_columns` are now enabled by default, affecting also only MySQL connections. This increases compatibility with more BI tools. [#60365](https://github.com/ClickHouse/ClickHouse/pull/60365) ([Robert Schulze](https://github.com/rschu1ze)). +* When output format is Pretty format and a block consists of a single numeric value which exceeds one million, A readable number will be printed on table right. e.g. ``` ┌──────count()─┐ │ 233765663884 │ -- 233.77 billion └──────────────┘ ```. [#60379](https://github.com/ClickHouse/ClickHouse/pull/60379) ([rogeryk](https://github.com/rogeryk)). +* Fix a race condition in JavaScript code leading to duplicate charts on top of each other. [#60392](https://github.com/ClickHouse/ClickHouse/pull/60392) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check for stack overflow in parsers even if the user misconfigured the `max_parser_depth` setting to a very high value. This closes [#59622](https://github.com/ClickHouse/ClickHouse/issues/59622). [#60434](https://github.com/ClickHouse/ClickHouse/pull/60434) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Build/Testing/Packaging Improvement +* Added builds and tests with coverage collection with introspection. Continuation of [#56102](https://github.com/ClickHouse/ClickHouse/issues/56102). [#58792](https://github.com/ClickHouse/ClickHouse/pull/58792) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Include `pytest-reportlog` in integration test CI runner Dockerfile to enable JSON test reports. [#58926](https://github.com/ClickHouse/ClickHouse/pull/58926) ([MyroTk](https://github.com/MyroTk)). +* Update the rust toolchain in `corrosion-cmake` when the CMake cross-compilation toolchain variable is set. [#59309](https://github.com/ClickHouse/ClickHouse/pull/59309) ([Aris Tritas](https://github.com/aris-aiven)). +* Add some fuzzing to ASTLiterals. [#59383](https://github.com/ClickHouse/ClickHouse/pull/59383) ([Raúl Marín](https://github.com/Algunenano)). +* If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)). +* Remove ability to disable generic clickhouse components (like server/client/...), but keep some that requires extra libraries (like ODBC or keeper). [#59857](https://github.com/ClickHouse/ClickHouse/pull/59857) ([Azat Khuzhin](https://github.com/azat)). +* Query fuzzer will fuzz SETTINGS inside queries. [#60087](https://github.com/ClickHouse/ClickHouse/pull/60087) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add test that validates projections still work after attach partition. [#60415](https://github.com/ClickHouse/ClickHouse/pull/60415) ([Arthur Passos](https://github.com/arthurpassos)). +* Add test that validates attach partition fails if structure differs because of materialized column. [#60418](https://github.com/ClickHouse/ClickHouse/pull/60418) ([Arthur Passos](https://github.com/arthurpassos)). +* Add support for building ClickHouse with clang-19 (master). [#60448](https://github.com/ClickHouse/ClickHouse/pull/60448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speedup check-whitespaces check. [#60496](https://github.com/ClickHouse/ClickHouse/pull/60496) ([Raúl Marín](https://github.com/Algunenano)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Non ready set in TTL WHERE. [#57430](https://github.com/ClickHouse/ClickHouse/pull/57430) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix quantilesGK bug [#58216](https://github.com/ClickHouse/ClickHouse/pull/58216) ([李扬](https://github.com/taiyang-li)). +* Disable parallel replicas JOIN with CTE (not analyzer) [#59239](https://github.com/ClickHouse/ClickHouse/pull/59239) ([Raúl Marín](https://github.com/Algunenano)). +* Fix bug with `intDiv` for decimal arguments [#59243](https://github.com/ClickHouse/ClickHouse/pull/59243) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)). +* Fix digest calculation in Keeper [#59439](https://github.com/ClickHouse/ClickHouse/pull/59439) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix stacktraces for binaries without debug symbols [#59444](https://github.com/ClickHouse/ClickHouse/pull/59444) ([Azat Khuzhin](https://github.com/azat)). +* Fix `ASTAlterCommand::formatImpl` in case of column specific settings… [#59445](https://github.com/ClickHouse/ClickHouse/pull/59445) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix `SELECT * FROM [...] ORDER BY ALL` with Analyzer [#59462](https://github.com/ClickHouse/ClickHouse/pull/59462) ([zhongyuankai](https://github.com/zhongyuankai)). +* Fix possible uncaught exception during distributed query cancellation [#59487](https://github.com/ClickHouse/ClickHouse/pull/59487) ([Azat Khuzhin](https://github.com/azat)). +* Make MAX use the same rules as permutation for complex types [#59498](https://github.com/ClickHouse/ClickHouse/pull/59498) ([Raúl Marín](https://github.com/Algunenano)). +* Fix corner case when passing `update_insert_deduplication_token_in_dependent_materialized_views` [#59544](https://github.com/ClickHouse/ClickHouse/pull/59544) ([Jordi Villar](https://github.com/jrdi)). +* Fix incorrect result of arrayElement / map[] on empty value [#59594](https://github.com/ClickHouse/ClickHouse/pull/59594) ([Raúl Marín](https://github.com/Algunenano)). +* Fix crash in topK when merging empty states [#59603](https://github.com/ClickHouse/ClickHouse/pull/59603) ([Raúl Marín](https://github.com/Algunenano)). +* Fix distributed table with a constant sharding key [#59606](https://github.com/ClickHouse/ClickHouse/pull/59606) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix_kql_issue_found_by_wingfuzz [#59626](https://github.com/ClickHouse/ClickHouse/pull/59626) ([Yong Wang](https://github.com/kashwy)). +* Fix error "Read beyond last offset" for AsynchronousBoundedReadBuffer [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)). +* Maintain function alias in RewriteSumFunctionWithSumAndCountVisitor [#59658](https://github.com/ClickHouse/ClickHouse/pull/59658) ([Raúl Marín](https://github.com/Algunenano)). +* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)). +* Validate types of arguments for `minmax` skipping index [#59733](https://github.com/ClickHouse/ClickHouse/pull/59733) ([Anton Popov](https://github.com/CurtizJ)). +* Fix leftPad / rightPad function with FixedString input [#59739](https://github.com/ClickHouse/ClickHouse/pull/59739) ([Raúl Marín](https://github.com/Algunenano)). +* Fix AST fuzzer issue in function `countMatches` [#59752](https://github.com/ClickHouse/ClickHouse/pull/59752) ([Robert Schulze](https://github.com/rschu1ze)). +* rabbitmq: fix having neither acked nor nacked messages [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix StorageURL doing some of the query execution in single thread [#59833](https://github.com/ClickHouse/ClickHouse/pull/59833) ([Michael Kolupaev](https://github.com/al13n321)). +* s3queue: fix uninitialized value [#59897](https://github.com/ClickHouse/ClickHouse/pull/59897) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix parsing of partition expressions surrounded by parens [#59901](https://github.com/ClickHouse/ClickHouse/pull/59901) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix crash in JSONColumnsWithMetadata format over http [#59925](https://github.com/ClickHouse/ClickHouse/pull/59925) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not rewrite sum() to count() if return value differs in analyzer [#59926](https://github.com/ClickHouse/ClickHouse/pull/59926) ([Azat Khuzhin](https://github.com/azat)). +* UniqExactSet read crash fix [#59928](https://github.com/ClickHouse/ClickHouse/pull/59928) ([Maksim Kita](https://github.com/kitaisreal)). +* ReplicatedMergeTree invalid metadata_version fix [#59946](https://github.com/ClickHouse/ClickHouse/pull/59946) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix data race in `StorageDistributed` [#59987](https://github.com/ClickHouse/ClickHouse/pull/59987) ([Nikita Taranov](https://github.com/nickitat)). +* Run init scripts when option is enabled rather than disabled [#59991](https://github.com/ClickHouse/ClickHouse/pull/59991) ([jktng](https://github.com/jktng)). +* Fix scale conversion for DateTime64 [#60004](https://github.com/ClickHouse/ClickHouse/pull/60004) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix INSERT into SQLite with single quote (by escaping single quotes with a quote instead of backslash) [#60015](https://github.com/ClickHouse/ClickHouse/pull/60015) ([Azat Khuzhin](https://github.com/azat)). +* Fix several logical errors in arrayFold [#60022](https://github.com/ClickHouse/ClickHouse/pull/60022) ([Raúl Marín](https://github.com/Algunenano)). +* Fix optimize_uniq_to_count removing the column alias [#60026](https://github.com/ClickHouse/ClickHouse/pull/60026) ([Raúl Marín](https://github.com/Algunenano)). +* Fix possible exception from s3queue table on drop [#60036](https://github.com/ClickHouse/ClickHouse/pull/60036) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix formatting of NOT with single literals [#60042](https://github.com/ClickHouse/ClickHouse/pull/60042) ([Raúl Marín](https://github.com/Algunenano)). +* Use max_query_size from context in DDLLogEntry instead of hardcoded 4096 [#60083](https://github.com/ClickHouse/ClickHouse/pull/60083) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix inconsistent formatting of queries [#60095](https://github.com/ClickHouse/ClickHouse/pull/60095) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix inconsistent formatting of explain in subqueries [#60102](https://github.com/ClickHouse/ClickHouse/pull/60102) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix cosineDistance crash with Nullable [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)). +* Allow casting of bools in string representation to to true bools [#60160](https://github.com/ClickHouse/ClickHouse/pull/60160) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix system.s3queue_log [#60166](https://github.com/ClickHouse/ClickHouse/pull/60166) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix arrayReduce with nullable aggregate function name [#60188](https://github.com/ClickHouse/ClickHouse/pull/60188) ([Raúl Marín](https://github.com/Algunenano)). +* Fix actions execution during preliminary filtering (PK, partition pruning) [#60196](https://github.com/ClickHouse/ClickHouse/pull/60196) ([Azat Khuzhin](https://github.com/azat)). +* Hide sensitive info for s3queue [#60233](https://github.com/ClickHouse/ClickHouse/pull/60233) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Revert "Replace `ORDER BY ALL` by `ORDER BY *`" [#60248](https://github.com/ClickHouse/ClickHouse/pull/60248) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix http exception codes. [#60252](https://github.com/ClickHouse/ClickHouse/pull/60252) ([Austin Kothig](https://github.com/kothiga)). +* s3queue: fix bug (also fixes flaky test_storage_s3_queue/test.py::test_shards_distributed) [#60282](https://github.com/ClickHouse/ClickHouse/pull/60282) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix use-of-uninitialized-value and invalid result in hashing functions with IPv6 [#60359](https://github.com/ClickHouse/ClickHouse/pull/60359) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix OptimizeDateOrDateTimeConverterWithPreimageVisitor with null arguments [#60453](https://github.com/ClickHouse/ClickHouse/pull/60453) ([Raúl Marín](https://github.com/Algunenano)). +* Merging [#59674](https://github.com/ClickHouse/ClickHouse/issues/59674). [#60470](https://github.com/ClickHouse/ClickHouse/pull/60470) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Correctly check keys in s3Cluster [#60477](https://github.com/ClickHouse/ClickHouse/pull/60477) ([Antonio Andelic](https://github.com/antonio2368)). + +#### CI Fix or Improvement (changelog entry is not required) + +* ... [#60457](https://github.com/ClickHouse/ClickHouse/pull/60457) ([Max K.](https://github.com/maxknv)). +* ... [#60512](https://github.com/ClickHouse/ClickHouse/pull/60512) ([Max K.](https://github.com/maxknv)). +* Arm and amd docker build jobs use similar job names and thus overwrite job reports - aarch64 and amd64 suffixes added to fix this. [#60554](https://github.com/ClickHouse/ClickHouse/pull/60554) ([Max K.](https://github.com/maxknv)). +* ... [#60557](https://github.com/ClickHouse/ClickHouse/pull/60557) ([Max K.](https://github.com/maxknv)). +* BUG: build job can report success cache record on failed build Add a check relying on job report fail. [#60587](https://github.com/ClickHouse/ClickHouse/pull/60587) ([Max K.](https://github.com/maxknv)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Revert "Add new aggregation function groupArraySorted()""'. [#59003](https://github.com/ClickHouse/ClickHouse/pull/59003) ([Maksim Kita](https://github.com/kitaisreal)). +* NO CL ENTRY: 'Revert "Update libxml2 version to address some bogus security issues"'. [#59479](https://github.com/ClickHouse/ClickHouse/pull/59479) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Poco Logger small refactoring"'. [#59509](https://github.com/ClickHouse/ClickHouse/pull/59509) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Revert "Poco Logger small refactoring""'. [#59564](https://github.com/ClickHouse/ClickHouse/pull/59564) ([Maksim Kita](https://github.com/kitaisreal)). +* NO CL ENTRY: 'Revert "MergeTree FINAL optimization diagnostics and settings"'. [#59702](https://github.com/ClickHouse/ClickHouse/pull/59702) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Use `MergeTree` as a default table engine"'. [#59711](https://github.com/ClickHouse/ClickHouse/pull/59711) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Rename a setting"'. [#59754](https://github.com/ClickHouse/ClickHouse/pull/59754) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Less error prone interface of read buffers"'. [#59911](https://github.com/ClickHouse/ClickHouse/pull/59911) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* NO CL ENTRY: 'Revert "Update version_date.tsv and changelogs after v24.1.4.19-stable"'. [#59973](https://github.com/ClickHouse/ClickHouse/pull/59973) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* NO CL ENTRY: 'Revert "ReplicatedMergeTree invalid metadata_version fix"'. [#60058](https://github.com/ClickHouse/ClickHouse/pull/60058) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Revert "ReplicatedMergeTree invalid metadata_version fix""'. [#60078](https://github.com/ClickHouse/ClickHouse/pull/60078) ([Maksim Kita](https://github.com/kitaisreal)). +* NO CL ENTRY: 'Revert "Implement system.dns_cache table"'. [#60085](https://github.com/ClickHouse/ClickHouse/pull/60085) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Restriction for the access key id for s3."'. [#60181](https://github.com/ClickHouse/ClickHouse/pull/60181) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Do not retry queries if container is down in integration tests"'. [#60215](https://github.com/ClickHouse/ClickHouse/pull/60215) ([Antonio Andelic](https://github.com/antonio2368)). +* NO CL ENTRY: 'Revert "Check stack size in Parser"'. [#60216](https://github.com/ClickHouse/ClickHouse/pull/60216) ([Antonio Andelic](https://github.com/antonio2368)). +* NO CL ENTRY: 'Revert "Support resource request canceling"'. [#60253](https://github.com/ClickHouse/ClickHouse/pull/60253) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Add definers for views"'. [#60350](https://github.com/ClickHouse/ClickHouse/pull/60350) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Update build-osx.md'. [#60380](https://github.com/ClickHouse/ClickHouse/pull/60380) ([rogeryk](https://github.com/rogeryk)). +* NO CL ENTRY: 'Revert "Fix: IAST::clone() for RENAME"'. [#60398](https://github.com/ClickHouse/ClickHouse/pull/60398) ([Antonio Andelic](https://github.com/antonio2368)). +* NO CL ENTRY: 'Revert "Add table function `mergeTreeIndex`"'. [#60428](https://github.com/ClickHouse/ClickHouse/pull/60428) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Userspace page cache"'. [#60550](https://github.com/ClickHouse/ClickHouse/pull/60550) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Analyzer: compute ALIAS columns right after reading"'. [#60570](https://github.com/ClickHouse/ClickHouse/pull/60570) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Analyzer: support aliases and distributed JOINs in StorageMerge [#50894](https://github.com/ClickHouse/ClickHouse/pull/50894) ([Dmitry Novik](https://github.com/novikd)). +* Userspace page cache [#53770](https://github.com/ClickHouse/ClickHouse/pull/53770) ([Michael Kolupaev](https://github.com/al13n321)). +* Simplify optimize-push-to-prewhere from query plan [#58554](https://github.com/ClickHouse/ClickHouse/pull/58554) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Create ch/chc/chl symlinks by cmake as well (for develop mode) [#58609](https://github.com/ClickHouse/ClickHouse/pull/58609) ([Azat Khuzhin](https://github.com/azat)). +* CI: ci cache. step 1 [#58664](https://github.com/ClickHouse/ClickHouse/pull/58664) ([Max K.](https://github.com/maxknv)). +* Enable building JIT with UBSAN [#58952](https://github.com/ClickHouse/ClickHouse/pull/58952) ([Raúl Marín](https://github.com/Algunenano)). +* Support resource request canceling [#59032](https://github.com/ClickHouse/ClickHouse/pull/59032) ([Sergei Trifonov](https://github.com/serxa)). +* Analyzer: Do not resolve remote table id on initiator [#59073](https://github.com/ClickHouse/ClickHouse/pull/59073) ([Dmitry Novik](https://github.com/novikd)). +* Analyzer: Add cast for ConstantNode from constant folding [#59121](https://github.com/ClickHouse/ClickHouse/pull/59121) ([Dmitry Novik](https://github.com/novikd)). +* Fix the default value of `async_insert_max_data_size` in EN document [#59161](https://github.com/ClickHouse/ClickHouse/pull/59161) ([Alex Cheng](https://github.com/Alex-Cheng)). +* CI: Add ARM integration tests [#59241](https://github.com/ClickHouse/ClickHouse/pull/59241) ([Max K.](https://github.com/maxknv)). +* Fix getting filename from read buffer wrappers [#59298](https://github.com/ClickHouse/ClickHouse/pull/59298) ([Kruglov Pavel](https://github.com/Avogar)). +* Update AWS SDK to 1.11.234 [#59299](https://github.com/ClickHouse/ClickHouse/pull/59299) ([Nikita Taranov](https://github.com/nickitat)). +* Split `ISlotControl` from `ConcurrencyControl` [#59313](https://github.com/ClickHouse/ClickHouse/pull/59313) ([Sergei Trifonov](https://github.com/serxa)). +* Some small fixes for docker images [#59337](https://github.com/ClickHouse/ClickHouse/pull/59337) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* CI: bugfix-validate, integration, functional test scripts updates [#59348](https://github.com/ClickHouse/ClickHouse/pull/59348) ([Max K.](https://github.com/maxknv)). +* MaterializedMySQL: Fix gtid_after_attach_test to retry on detach [#59370](https://github.com/ClickHouse/ClickHouse/pull/59370) ([Val Doroshchuk](https://github.com/valbok)). +* Poco Logger small refactoring [#59375](https://github.com/ClickHouse/ClickHouse/pull/59375) ([Maksim Kita](https://github.com/kitaisreal)). +* Add sanity checks for function return types [#59379](https://github.com/ClickHouse/ClickHouse/pull/59379) ([Raúl Marín](https://github.com/Algunenano)). +* Cleanup connection pool surroundings [#59380](https://github.com/ClickHouse/ClickHouse/pull/59380) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix ARRAY JOIN with subcolumns [#59382](https://github.com/ClickHouse/ClickHouse/pull/59382) ([vdimir](https://github.com/vdimir)). +* Update curl submodule to be version 8.50 to address the irrelevant CVE-2023-46218 and CVE-2023-49219, which we don't care about at all. [#59384](https://github.com/ClickHouse/ClickHouse/pull/59384) ([josh-hildred](https://github.com/josh-hildred)). +* Update libxml2 version to address some bogus security issues [#59386](https://github.com/ClickHouse/ClickHouse/pull/59386) ([josh-hildred](https://github.com/josh-hildred)). +* Update version after release [#59393](https://github.com/ClickHouse/ClickHouse/pull/59393) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Job names [#59395](https://github.com/ClickHouse/ClickHouse/pull/59395) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* CI: fix status and report for docker server jobs [#59396](https://github.com/ClickHouse/ClickHouse/pull/59396) ([Max K.](https://github.com/maxknv)). +* Update version_date.tsv and changelogs after v24.1.1.2048-stable [#59397](https://github.com/ClickHouse/ClickHouse/pull/59397) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Forward declaration for PeekableReadBuffer [#59399](https://github.com/ClickHouse/ClickHouse/pull/59399) ([Azat Khuzhin](https://github.com/azat)). +* Progress bar: use FQDN to differentiate metrics from different hosts [#59404](https://github.com/ClickHouse/ClickHouse/pull/59404) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix test test_stop_other_host_during_backup [#59432](https://github.com/ClickHouse/ClickHouse/pull/59432) ([Vitaly Baranov](https://github.com/vitlibar)). +* Update run.sh [#59433](https://github.com/ClickHouse/ClickHouse/pull/59433) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Post a failure status if can not run the CI [#59440](https://github.com/ClickHouse/ClickHouse/pull/59440) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Safer Rust (catch panic with catch_unwind()) [#59447](https://github.com/ClickHouse/ClickHouse/pull/59447) ([Azat Khuzhin](https://github.com/azat)). +* More parallel insert-select pipeline [#59448](https://github.com/ClickHouse/ClickHouse/pull/59448) ([Nikita Taranov](https://github.com/nickitat)). +* CLion says these headers are unused [#59451](https://github.com/ClickHouse/ClickHouse/pull/59451) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix 02720_row_policy_column_with_dots [#59453](https://github.com/ClickHouse/ClickHouse/pull/59453) ([Duc Canh Le](https://github.com/canhld94)). +* Fix problem detected by UBSAN [#59461](https://github.com/ClickHouse/ClickHouse/pull/59461) ([Raúl Marín](https://github.com/Algunenano)). +* Analyzer: Fix denny_crane [#59483](https://github.com/ClickHouse/ClickHouse/pull/59483) ([vdimir](https://github.com/vdimir)). +* Fix `00191_aggregating_merge_tree_and_final` [#59494](https://github.com/ClickHouse/ClickHouse/pull/59494) ([Nikita Taranov](https://github.com/nickitat)). +* Avoid running all checks when `aspell-dict.txt` was changed [#59496](https://github.com/ClickHouse/ClickHouse/pull/59496) ([Aleksandr Musorin](https://github.com/AVMusorin)). +* Fixes for binary.html [#59499](https://github.com/ClickHouse/ClickHouse/pull/59499) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Parallel replicas: better initial replicas failover (2) [#59501](https://github.com/ClickHouse/ClickHouse/pull/59501) ([Igor Nikonov](https://github.com/devcrafter)). +* Update version_date.tsv and changelogs after v24.1.2.5-stable [#59510](https://github.com/ClickHouse/ClickHouse/pull/59510) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.12.3.40-stable [#59511](https://github.com/ClickHouse/ClickHouse/pull/59511) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.11.5.29-stable [#59515](https://github.com/ClickHouse/ClickHouse/pull/59515) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update memory tracker periodically with cgroup memory usage [#59516](https://github.com/ClickHouse/ClickHouse/pull/59516) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove a scary message if an error is retryable [#59517](https://github.com/ClickHouse/ClickHouse/pull/59517) ([alesapin](https://github.com/alesapin)). +* Update the peter-evans/create-pull-request action to v6 [#59520](https://github.com/ClickHouse/ClickHouse/pull/59520) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix usage of StatusType [#59527](https://github.com/ClickHouse/ClickHouse/pull/59527) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Analyzer fix test_select_access_rights/test_main.py::test_select_count [#59528](https://github.com/ClickHouse/ClickHouse/pull/59528) ([vdimir](https://github.com/vdimir)). +* GRPCServer: do not call value() on empty optional query_info [#59533](https://github.com/ClickHouse/ClickHouse/pull/59533) ([Sema Checherinda](https://github.com/CheSema)). +* Use ConnectionPoolPtr instead of raw pointer [#59534](https://github.com/ClickHouse/ClickHouse/pull/59534) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix data race with `AggregatedDataVariants` [#59537](https://github.com/ClickHouse/ClickHouse/pull/59537) ([Nikita Taranov](https://github.com/nickitat)). +* Refactoring of dashboard state encoding [#59554](https://github.com/ClickHouse/ClickHouse/pull/59554) ([Sergei Trifonov](https://github.com/serxa)). +* CI: ci_cache, enable await [#59555](https://github.com/ClickHouse/ClickHouse/pull/59555) ([Max K.](https://github.com/maxknv)). +* Bump libssh to 0.9.8 [#59563](https://github.com/ClickHouse/ClickHouse/pull/59563) ([Robert Schulze](https://github.com/rschu1ze)). +* MultiVersion use mutex [#59565](https://github.com/ClickHouse/ClickHouse/pull/59565) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix aws submodule reference [#59566](https://github.com/ClickHouse/ClickHouse/pull/59566) ([Raúl Marín](https://github.com/Algunenano)). +* Add missed #include and [#59567](https://github.com/ClickHouse/ClickHouse/pull/59567) ([Mikhnenko Sasha](https://github.com/4JustMe4)). +* CI: nightly job to update latest docker tag only [#59586](https://github.com/ClickHouse/ClickHouse/pull/59586) ([Max K.](https://github.com/maxknv)). +* Analyzer: compute ALIAS columns right after reading [#59595](https://github.com/ClickHouse/ClickHouse/pull/59595) ([vdimir](https://github.com/vdimir)). +* Add another sanity check for function return types [#59605](https://github.com/ClickHouse/ClickHouse/pull/59605) ([Raúl Marín](https://github.com/Algunenano)). +* Update README.md [#59610](https://github.com/ClickHouse/ClickHouse/pull/59610) ([Tyler Hannan](https://github.com/tylerhannan)). +* Updated a list of trusted contributors [#59616](https://github.com/ClickHouse/ClickHouse/pull/59616) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* CI: fix ast fuzzer job report (slack bot issue) [#59629](https://github.com/ClickHouse/ClickHouse/pull/59629) ([Max K.](https://github.com/maxknv)). +* MergeTree FINAL optimization diagnostics and settings [#59650](https://github.com/ClickHouse/ClickHouse/pull/59650) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix default path when path is not specified in config [#59654](https://github.com/ClickHouse/ClickHouse/pull/59654) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Follow up for [#59277](https://github.com/ClickHouse/ClickHouse/issues/59277) [#59659](https://github.com/ClickHouse/ClickHouse/pull/59659) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Pin python dependencies in stateless tests [#59663](https://github.com/ClickHouse/ClickHouse/pull/59663) ([Raúl Marín](https://github.com/Algunenano)). +* Unquote FLAG_LATEST to fix issue with empty argument [#59672](https://github.com/ClickHouse/ClickHouse/pull/59672) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Temporarily remove a feature that doesn't work [#59688](https://github.com/ClickHouse/ClickHouse/pull/59688) ([Alexander Tokmakov](https://github.com/tavplubix)). +* ConnectionEstablisher: remove unused is_finished [#59706](https://github.com/ClickHouse/ClickHouse/pull/59706) ([Igor Nikonov](https://github.com/devcrafter)). +* Add test for increase-always autoscaling lambda [#59709](https://github.com/ClickHouse/ClickHouse/pull/59709) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Remove SourceWithKeyCondition from ReadFromStorageStep [#59720](https://github.com/ClickHouse/ClickHouse/pull/59720) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Make ZooKeeper actually sequentialy consistent [#59735](https://github.com/ClickHouse/ClickHouse/pull/59735) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add assertions around FixedString code [#59737](https://github.com/ClickHouse/ClickHouse/pull/59737) ([Raúl Marín](https://github.com/Algunenano)). +* Fix skipping unused shards with analyzer [#59741](https://github.com/ClickHouse/ClickHouse/pull/59741) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix DB type check - now it'll refuse to create in Replicated databases [#59743](https://github.com/ClickHouse/ClickHouse/pull/59743) ([Michael Kolupaev](https://github.com/al13n321)). +* Analyzer: Fix test_replicating_constants/test.py::test_different_versions [#59750](https://github.com/ClickHouse/ClickHouse/pull/59750) ([Dmitry Novik](https://github.com/novikd)). +* Fix dashboard params default values [#59753](https://github.com/ClickHouse/ClickHouse/pull/59753) ([Sergei Trifonov](https://github.com/serxa)). +* Fix logical optimizer with LowCardinality in new analyzer [#59766](https://github.com/ClickHouse/ClickHouse/pull/59766) ([Antonio Andelic](https://github.com/antonio2368)). +* Update libuv [#59773](https://github.com/ClickHouse/ClickHouse/pull/59773) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Followup [#50894](https://github.com/ClickHouse/ClickHouse/issues/50894) [#59774](https://github.com/ClickHouse/ClickHouse/pull/59774) ([Dmitry Novik](https://github.com/novikd)). +* CI: ci test await [#59778](https://github.com/ClickHouse/ClickHouse/pull/59778) ([Max K.](https://github.com/maxknv)). +* Better logging for adaptive async timeouts [#59781](https://github.com/ClickHouse/ClickHouse/pull/59781) ([Julia Kartseva](https://github.com/jkartseva)). +* Fix broken youtube embedding in ne-tormozit.md [#59782](https://github.com/ClickHouse/ClickHouse/pull/59782) ([Shaun Struwig](https://github.com/Blargian)). +* Hide URL/S3 'headers' argument in SHOW CREATE [#59787](https://github.com/ClickHouse/ClickHouse/pull/59787) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix special build reports in release branches [#59797](https://github.com/ClickHouse/ClickHouse/pull/59797) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* CI: do not reuse builds on release branches [#59798](https://github.com/ClickHouse/ClickHouse/pull/59798) ([Max K.](https://github.com/maxknv)). +* Update version_date.tsv and changelogs after v24.1.3.31-stable [#59799](https://github.com/ClickHouse/ClickHouse/pull/59799) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.12.4.15-stable [#59800](https://github.com/ClickHouse/ClickHouse/pull/59800) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Analyzer: fix test_access_for_functions/test.py::test_access_rights_for_function [#59801](https://github.com/ClickHouse/ClickHouse/pull/59801) ([Dmitry Novik](https://github.com/novikd)). +* Analyzer: Fix test_wrong_db_or_table_name/test.py::test_wrong_table_name [#59806](https://github.com/ClickHouse/ClickHouse/pull/59806) ([Dmitry Novik](https://github.com/novikd)). +* CI: await tune ups [#59807](https://github.com/ClickHouse/ClickHouse/pull/59807) ([Max K.](https://github.com/maxknv)). +* Enforce tests with enabled analyzer in CI [#59814](https://github.com/ClickHouse/ClickHouse/pull/59814) ([Dmitry Novik](https://github.com/novikd)). +* Handle different timestamp related aspects of zip-files [#59815](https://github.com/ClickHouse/ClickHouse/pull/59815) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix settings history azure_max_single_part_copy_size [#59819](https://github.com/ClickHouse/ClickHouse/pull/59819) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Updated a list of trusted contributors [#59844](https://github.com/ClickHouse/ClickHouse/pull/59844) ([Maksim Kita](https://github.com/kitaisreal)). +* Add check for recursiveRemoveLowCardinality() [#59845](https://github.com/ClickHouse/ClickHouse/pull/59845) ([Vitaly Baranov](https://github.com/vitlibar)). +* Better warning for disabled kernel.task_delayacct [#59846](https://github.com/ClickHouse/ClickHouse/pull/59846) ([Azat Khuzhin](https://github.com/azat)). +* Reintroduce 02590_interserver_mode_client_info_initial_query_start_time [#59851](https://github.com/ClickHouse/ClickHouse/pull/59851) ([Azat Khuzhin](https://github.com/azat)). +* Respect CMAKE_OSX_DEPLOYMENT_TARGET for Rust targets [#59852](https://github.com/ClickHouse/ClickHouse/pull/59852) ([Azat Khuzhin](https://github.com/azat)). +* Do not reinitialize ZooKeeperWithFaultInjection on each chunk [#59854](https://github.com/ClickHouse/ClickHouse/pull/59854) ([Alexander Gololobov](https://github.com/davenger)). +* Fix: check if std::function is set before calling it [#59858](https://github.com/ClickHouse/ClickHouse/pull/59858) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix long shutdown of FileLog storage [#59873](https://github.com/ClickHouse/ClickHouse/pull/59873) ([Azat Khuzhin](https://github.com/azat)). +* tests: fix 02322_sql_insert_format flakiness [#59874](https://github.com/ClickHouse/ClickHouse/pull/59874) ([Azat Khuzhin](https://github.com/azat)). +* Follow up for [#58554](https://github.com/ClickHouse/ClickHouse/issues/58554). Cleanup. [#59889](https://github.com/ClickHouse/ClickHouse/pull/59889) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* CI: Fix job failures due to jepsen artifacts [#59890](https://github.com/ClickHouse/ClickHouse/pull/59890) ([Max K.](https://github.com/maxknv)). +* Add test 02988_join_using_prewhere_pushdown [#59892](https://github.com/ClickHouse/ClickHouse/pull/59892) ([vdimir](https://github.com/vdimir)). +* Do not pull mutations if pulling replication log had been stopped [#59895](https://github.com/ClickHouse/ClickHouse/pull/59895) ([Azat Khuzhin](https://github.com/azat)). +* Fix `02982_comments_in_system_tables` [#59896](https://github.com/ClickHouse/ClickHouse/pull/59896) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Refactor Squashing for inserts. [#59899](https://github.com/ClickHouse/ClickHouse/pull/59899) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Do not rebuild a lambda package if it is updated [#59902](https://github.com/ClickHouse/ClickHouse/pull/59902) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix self-extracting: macOS doesn't allow to run renamed executable - copy instead [#59906](https://github.com/ClickHouse/ClickHouse/pull/59906) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Update tests with indexHint for analyzer. [#59907](https://github.com/ClickHouse/ClickHouse/pull/59907) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Petite cleanup around macros and ReplicatedMergeTree [#59909](https://github.com/ClickHouse/ClickHouse/pull/59909) ([Azat Khuzhin](https://github.com/azat)). +* Fix: absence of closing record in query_log for failed insert over http [#59910](https://github.com/ClickHouse/ClickHouse/pull/59910) ([Igor Nikonov](https://github.com/devcrafter)). +* Decrease logging level for http retriable errors to Warning (and fix 00157_cache_dictionary flakiness) [#59920](https://github.com/ClickHouse/ClickHouse/pull/59920) ([Azat Khuzhin](https://github.com/azat)). +* Remove `test_distributed_backward_compatability` [#59921](https://github.com/ClickHouse/ClickHouse/pull/59921) ([Dmitry Novik](https://github.com/novikd)). +* Commands node args should add rvalue to push_back to reduce object copy cost [#59922](https://github.com/ClickHouse/ClickHouse/pull/59922) ([xuzifu666](https://github.com/xuzifu666)). +* tests: fix 02981_vertical_merges_memory_usage flakiness [#59923](https://github.com/ClickHouse/ClickHouse/pull/59923) ([Azat Khuzhin](https://github.com/azat)). +* Analyzer: Update broken integration tests list [#59924](https://github.com/ClickHouse/ClickHouse/pull/59924) ([Dmitry Novik](https://github.com/novikd)). +* CI: integration tests to mysql80 [#59939](https://github.com/ClickHouse/ClickHouse/pull/59939) ([Max K.](https://github.com/maxknv)). +* Register StorageMergeTree exception message fix [#59941](https://github.com/ClickHouse/ClickHouse/pull/59941) ([Maksim Kita](https://github.com/kitaisreal)). +* Replace lambdas with pointers to members to simplify stacks [#59944](https://github.com/ClickHouse/ClickHouse/pull/59944) ([Alexander Gololobov](https://github.com/davenger)). +* Analyzer: Fix test_user_defined_object_persistence [#59948](https://github.com/ClickHouse/ClickHouse/pull/59948) ([Dmitry Novik](https://github.com/novikd)). +* Analyzer: Fix test_mutations_with_merge_tree [#59951](https://github.com/ClickHouse/ClickHouse/pull/59951) ([Dmitry Novik](https://github.com/novikd)). +* Cleanups [#59964](https://github.com/ClickHouse/ClickHouse/pull/59964) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v24.1.4.19-stable [#59966](https://github.com/ClickHouse/ClickHouse/pull/59966) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Less conflicts [#59968](https://github.com/ClickHouse/ClickHouse/pull/59968) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* We don't have external dictionaries from Aerospike [#59969](https://github.com/ClickHouse/ClickHouse/pull/59969) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix max num to warn message [#59972](https://github.com/ClickHouse/ClickHouse/pull/59972) ([Jordi Villar](https://github.com/jrdi)). +* Analyzer: Fix test_settings_profile [#59975](https://github.com/ClickHouse/ClickHouse/pull/59975) ([Dmitry Novik](https://github.com/novikd)). +* Update version_date.tsv and changelogs after v24.1.4.20-stable [#59978](https://github.com/ClickHouse/ClickHouse/pull/59978) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Analyzer: Fix test_storage_rabbitmq [#59981](https://github.com/ClickHouse/ClickHouse/pull/59981) ([Dmitry Novik](https://github.com/novikd)). +* Analyzer: Fix test_shard_level_const_function [#59983](https://github.com/ClickHouse/ClickHouse/pull/59983) ([Dmitry Novik](https://github.com/novikd)). +* Add newlines to SettingsChangesHistory to maybe have less conflicts [#59984](https://github.com/ClickHouse/ClickHouse/pull/59984) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove context from comparison functions. [#59985](https://github.com/ClickHouse/ClickHouse/pull/59985) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Update version_date.tsv and changelogs after v24.1.5.6-stable [#59993](https://github.com/ClickHouse/ClickHouse/pull/59993) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix mark release ready [#59994](https://github.com/ClickHouse/ClickHouse/pull/59994) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Revert "Insert synchronously if dependent MV deduplication is enabled" [#59998](https://github.com/ClickHouse/ClickHouse/pull/59998) ([Julia Kartseva](https://github.com/jkartseva)). +* Fix obviously wrong (but non significant) error in dictionaries [#60005](https://github.com/ClickHouse/ClickHouse/pull/60005) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Inhibit randomization in some tests [#60009](https://github.com/ClickHouse/ClickHouse/pull/60009) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The code should not be complex [#60010](https://github.com/ClickHouse/ClickHouse/pull/60010) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Exclude test run from a slow build [#60011](https://github.com/ClickHouse/ClickHouse/pull/60011) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix broken lambdas formatting [#60012](https://github.com/ClickHouse/ClickHouse/pull/60012) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Verify formatting consistency on the server-side [#60013](https://github.com/ClickHouse/ClickHouse/pull/60013) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Analyzer: Fix test_sql_user_defined_functions_on_cluster [#60019](https://github.com/ClickHouse/ClickHouse/pull/60019) ([Dmitry Novik](https://github.com/novikd)). +* Fix 02981_vertical_merges_memory_usage with SharedMergeTree [#60028](https://github.com/ClickHouse/ClickHouse/pull/60028) ([Raúl Marín](https://github.com/Algunenano)). +* Fix 01656_test_query_log_factories_info with analyzer. [#60037](https://github.com/ClickHouse/ClickHouse/pull/60037) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Ability to detect undead ZooKeeper sessions [#60044](https://github.com/ClickHouse/ClickHouse/pull/60044) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable tests with coverage [#60047](https://github.com/ClickHouse/ClickHouse/pull/60047) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Print CPU flags at startup [#60075](https://github.com/ClickHouse/ClickHouse/pull/60075) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Cleanup: less confusion between config priority and balancing priority in connection pools [#60077](https://github.com/ClickHouse/ClickHouse/pull/60077) ([Igor Nikonov](https://github.com/devcrafter)). +* Temporary table already exists exception message fix [#60080](https://github.com/ClickHouse/ClickHouse/pull/60080) ([Maksim Kita](https://github.com/kitaisreal)). +* Refactor prewhere and primary key optimization [#60082](https://github.com/ClickHouse/ClickHouse/pull/60082) ([Amos Bird](https://github.com/amosbird)). +* Bump curl to version 4.6.0 [#60084](https://github.com/ClickHouse/ClickHouse/pull/60084) ([josh-hildred](https://github.com/josh-hildred)). +* Check wrong abbreviations [#60086](https://github.com/ClickHouse/ClickHouse/pull/60086) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove the check for formatting consistency from the Fuzzer [#60088](https://github.com/ClickHouse/ClickHouse/pull/60088) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid overflow in settings [#60089](https://github.com/ClickHouse/ClickHouse/pull/60089) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* A small preparation for better handling of primary key in memory [#60092](https://github.com/ClickHouse/ClickHouse/pull/60092) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Move threadPoolCallbackRunner to the "Common" folder [#60097](https://github.com/ClickHouse/ClickHouse/pull/60097) ([Vitaly Baranov](https://github.com/vitlibar)). +* Speed up the CI [#60106](https://github.com/ClickHouse/ClickHouse/pull/60106) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Insignificant changes [#60108](https://github.com/ClickHouse/ClickHouse/pull/60108) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not retry queries if container is down in integration tests [#60109](https://github.com/ClickHouse/ClickHouse/pull/60109) ([Azat Khuzhin](https://github.com/azat)). +* Better check for inconsistent formatting [#60110](https://github.com/ClickHouse/ClickHouse/pull/60110) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* skip printing meaningless log [#60123](https://github.com/ClickHouse/ClickHouse/pull/60123) ([conic](https://github.com/conicl)). +* Implement TODO [#60124](https://github.com/ClickHouse/ClickHouse/pull/60124) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad log message [#60125](https://github.com/ClickHouse/ClickHouse/pull/60125) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data race in `IMergeTreeDataPart` [#60139](https://github.com/ClickHouse/ClickHouse/pull/60139) ([Antonio Andelic](https://github.com/antonio2368)). +* Add new setting to changes history [#60141](https://github.com/ClickHouse/ClickHouse/pull/60141) ([Antonio Andelic](https://github.com/antonio2368)). +* Analyzer: fix row level filters with PREWHERE + additional filters [#60142](https://github.com/ClickHouse/ClickHouse/pull/60142) ([vdimir](https://github.com/vdimir)). +* Tests: query log for inserts over http [#60143](https://github.com/ClickHouse/ClickHouse/pull/60143) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix build in master [#60151](https://github.com/ClickHouse/ClickHouse/pull/60151) ([Raúl Marín](https://github.com/Algunenano)). +* Add setting history check to stateless tests [#60154](https://github.com/ClickHouse/ClickHouse/pull/60154) ([Raúl Marín](https://github.com/Algunenano)). +* Mini cleanup of CPUID.h [#60155](https://github.com/ClickHouse/ClickHouse/pull/60155) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix: custom key failover test flakiness [#60158](https://github.com/ClickHouse/ClickHouse/pull/60158) ([Igor Nikonov](https://github.com/devcrafter)). +* Skip sanity checks on secondary CREATE query [#60159](https://github.com/ClickHouse/ClickHouse/pull/60159) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove extensively aggressive check [#60162](https://github.com/ClickHouse/ClickHouse/pull/60162) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix wrong message during compilation [#60178](https://github.com/ClickHouse/ClickHouse/pull/60178) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#44318](https://github.com/ClickHouse/ClickHouse/issues/44318) [#60179](https://github.com/ClickHouse/ClickHouse/pull/60179) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add test for 59437 [#60191](https://github.com/ClickHouse/ClickHouse/pull/60191) ([Raúl Marín](https://github.com/Algunenano)). +* CI: hot fix for gh statuses [#60201](https://github.com/ClickHouse/ClickHouse/pull/60201) ([Max K.](https://github.com/maxknv)). +* Limit libarchive format to what we use [#60203](https://github.com/ClickHouse/ClickHouse/pull/60203) ([San](https://github.com/santrancisco)). +* Fix bucket region discovery [#60204](https://github.com/ClickHouse/ClickHouse/pull/60204) ([Nikita Taranov](https://github.com/nickitat)). +* Fix `test_backup_restore_s3/test.py::test_user_specific_auth` [#60210](https://github.com/ClickHouse/ClickHouse/pull/60210) ([Antonio Andelic](https://github.com/antonio2368)). +* CI: combine analyzer, s3, dbreplicated into one job [#60224](https://github.com/ClickHouse/ClickHouse/pull/60224) ([Max K.](https://github.com/maxknv)). +* Slightly better Keeper loading from snapshot [#60226](https://github.com/ClickHouse/ClickHouse/pull/60226) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix: IAST::clone() for RENAME [#60227](https://github.com/ClickHouse/ClickHouse/pull/60227) ([Igor Nikonov](https://github.com/devcrafter)). +* Treat 2+ in allow_experimental_parallel_reading_from_replicas as 2 [#60228](https://github.com/ClickHouse/ClickHouse/pull/60228) ([Raúl Marín](https://github.com/Algunenano)). +* CI: random job pick support [#60229](https://github.com/ClickHouse/ClickHouse/pull/60229) ([Max K.](https://github.com/maxknv)). +* Fix analyzer - hide arguments for secret functions [#60230](https://github.com/ClickHouse/ClickHouse/pull/60230) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backups delete suspicious file [#60231](https://github.com/ClickHouse/ClickHouse/pull/60231) ([Maksim Kita](https://github.com/kitaisreal)). +* CI: random sanitizer for parallel repl in PR wf [#60234](https://github.com/ClickHouse/ClickHouse/pull/60234) ([Max K.](https://github.com/maxknv)). +* CI: use aarch runner for runconfig job [#60236](https://github.com/ClickHouse/ClickHouse/pull/60236) ([Max K.](https://github.com/maxknv)). +* Add test for 60232 [#60244](https://github.com/ClickHouse/ClickHouse/pull/60244) ([Raúl Marín](https://github.com/Algunenano)). +* Make cloud sync required [#60245](https://github.com/ClickHouse/ClickHouse/pull/60245) ([Raúl Marín](https://github.com/Algunenano)). +* Tests from [#60094](https://github.com/ClickHouse/ClickHouse/issues/60094) [#60256](https://github.com/ClickHouse/ClickHouse/pull/60256) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove bad check in Keeper [#60266](https://github.com/ClickHouse/ClickHouse/pull/60266) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix integration `test_backup_restore_s3` [#60269](https://github.com/ClickHouse/ClickHouse/pull/60269) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Ignore valid 'No such key errors' in stress tests [#60270](https://github.com/ClickHouse/ClickHouse/pull/60270) ([Raúl Marín](https://github.com/Algunenano)). +* Stress test: Include the first sanitizer block message in the report [#60283](https://github.com/ClickHouse/ClickHouse/pull/60283) ([Raúl Marín](https://github.com/Algunenano)). +* Update analyzer_tech_debt.txt [#60303](https://github.com/ClickHouse/ClickHouse/pull/60303) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Minor fixes for hashed dictionary [#60310](https://github.com/ClickHouse/ClickHouse/pull/60310) ([vdimir](https://github.com/vdimir)). +* Install tailscale during AMI build and set it up on runners [#60316](https://github.com/ClickHouse/ClickHouse/pull/60316) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* CI: remove Integration tests asan and release from PR wf [#60327](https://github.com/ClickHouse/ClickHouse/pull/60327) ([Max K.](https://github.com/maxknv)). +* Fix - analyzer related - "executable" function subquery arguments. [#60339](https://github.com/ClickHouse/ClickHouse/pull/60339) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Update settings.md to correct the description for setting `max_concurrent_queries_for_user` [#60343](https://github.com/ClickHouse/ClickHouse/pull/60343) ([Alex Cheng](https://github.com/Alex-Cheng)). +* Fix rapidjson submodule [#60346](https://github.com/ClickHouse/ClickHouse/pull/60346) ([Raúl Marín](https://github.com/Algunenano)). +* Validate experimental and suspicious types inside nested types under a setting [#60353](https://github.com/ClickHouse/ClickHouse/pull/60353) ([Kruglov Pavel](https://github.com/Avogar)). +* Update 01158_zookeeper_log_long.sql [#60357](https://github.com/ClickHouse/ClickHouse/pull/60357) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add missed #include [#60358](https://github.com/ClickHouse/ClickHouse/pull/60358) ([Mikhnenko Sasha](https://github.com/4JustMe4)). +* Follow up [#60082](https://github.com/ClickHouse/ClickHouse/issues/60082) [#60360](https://github.com/ClickHouse/ClickHouse/pull/60360) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove ALTER LIVE VIEW [#60370](https://github.com/ClickHouse/ClickHouse/pull/60370) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)). +* Expose fatal.log separately for fuzzer [#60374](https://github.com/ClickHouse/ClickHouse/pull/60374) ([Azat Khuzhin](https://github.com/azat)). +* Minor changes for dashboard [#60387](https://github.com/ClickHouse/ClickHouse/pull/60387) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove unused method [#60388](https://github.com/ClickHouse/ClickHouse/pull/60388) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to map UI handlers to different paths [#60389](https://github.com/ClickHouse/ClickHouse/pull/60389) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove old tags from integration tests [#60407](https://github.com/ClickHouse/ClickHouse/pull/60407) ([Raúl Marín](https://github.com/Algunenano)). +* Update `liburing` to 2.5 [#60409](https://github.com/ClickHouse/ClickHouse/pull/60409) ([Nikita Taranov](https://github.com/nickitat)). +* Fix undefined-behavior in case of too big max_execution_time setting [#60419](https://github.com/ClickHouse/ClickHouse/pull/60419) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix wrong log message in Fuzzer [#60425](https://github.com/ClickHouse/ClickHouse/pull/60425) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix unrestricted reads from keeper [#60429](https://github.com/ClickHouse/ClickHouse/pull/60429) ([Raúl Marín](https://github.com/Algunenano)). +* Split update_mergeable_check into two functions to force trigger the status [#60431](https://github.com/ClickHouse/ClickHouse/pull/60431) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Revert "Revert "Add table function `mergeTreeIndex`"" [#60435](https://github.com/ClickHouse/ClickHouse/pull/60435) ([Anton Popov](https://github.com/CurtizJ)). +* Revert "Merge pull request [#56864](https://github.com/ClickHouse/ClickHouse/issues/56864) from ClickHouse/broken-projections-better-handling" [#60436](https://github.com/ClickHouse/ClickHouse/pull/60436) ([Nikita Taranov](https://github.com/nickitat)). +* Keeper: fix moving changelog files between disks [#60442](https://github.com/ClickHouse/ClickHouse/pull/60442) ([Antonio Andelic](https://github.com/antonio2368)). +* Replace deprecated distutils by vendored packaging [#60444](https://github.com/ClickHouse/ClickHouse/pull/60444) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Do not fail the build if ci-logs is not healthy [#60445](https://github.com/ClickHouse/ClickHouse/pull/60445) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Move setting `enable_order_by_all` out of the experimental setting section [#60449](https://github.com/ClickHouse/ClickHouse/pull/60449) ([Robert Schulze](https://github.com/rschu1ze)). +* Minor: Replace `boost::algorithm::starts_with()` by `std::string::starts_with()` [#60450](https://github.com/ClickHouse/ClickHouse/pull/60450) ([Robert Schulze](https://github.com/rschu1ze)). +* Minor: Replace boost::algorithm::ends_with() by std::string::ends_with() [#60454](https://github.com/ClickHouse/ClickHouse/pull/60454) ([Robert Schulze](https://github.com/rschu1ze)). +* CI: remove input params for job scripts [#60455](https://github.com/ClickHouse/ClickHouse/pull/60455) ([Max K.](https://github.com/maxknv)). +* Fix: 02496_remove_redundant_sorting_analyzer [#60456](https://github.com/ClickHouse/ClickHouse/pull/60456) ([Igor Nikonov](https://github.com/devcrafter)). +* PR template fix to include ci fix category [#60461](https://github.com/ClickHouse/ClickHouse/pull/60461) ([Max K.](https://github.com/maxknv)). +* Reduce iterations in 01383_log_broken_table [#60465](https://github.com/ClickHouse/ClickHouse/pull/60465) ([Raúl Marín](https://github.com/Algunenano)). +* Merge [#57434](https://github.com/ClickHouse/ClickHouse/issues/57434) [#60466](https://github.com/ClickHouse/ClickHouse/pull/60466) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad test: looks like an obvious race condition, but I didn't check in detail. [#60471](https://github.com/ClickHouse/ClickHouse/pull/60471) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make test slower [#60472](https://github.com/ClickHouse/ClickHouse/pull/60472) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix cgroups v1 rss parsing in CgroupsMemoryUsageObserver [#60481](https://github.com/ClickHouse/ClickHouse/pull/60481) ([Maksim Kita](https://github.com/kitaisreal)). +* CI: fix pr check status to not fail mergeable check [#60483](https://github.com/ClickHouse/ClickHouse/pull/60483) ([Max K.](https://github.com/maxknv)). +* Report respects skipped builds [#60488](https://github.com/ClickHouse/ClickHouse/pull/60488) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* CI: quick style fix [#60490](https://github.com/ClickHouse/ClickHouse/pull/60490) ([Max K.](https://github.com/maxknv)). +* Decrease logging level for http retriable errors to Info [#60508](https://github.com/ClickHouse/ClickHouse/pull/60508) ([Raúl Marín](https://github.com/Algunenano)). +* Remove broken test while we fix it [#60547](https://github.com/ClickHouse/ClickHouse/pull/60547) ([Raúl Marín](https://github.com/Algunenano)). + diff --git a/docs/en/development/build.md b/docs/en/development/build.md index b474c445604..5cbf851b785 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -14,20 +14,6 @@ Supported platforms: - PowerPC 64 LE (experimental) - RISC-V 64 (experimental) -## Building in docker -We use the docker image `clickhouse/binary-builder` for our CI builds. It contains everything necessary to build the binary and packages. There is a script `docker/packager/packager` to ease the image usage: - -```bash -# define a directory for the output artifacts -output_dir="build_results" -# a simplest build -./docker/packager/packager --package-type=binary --output-dir "$output_dir" -# build debian packages -./docker/packager/packager --package-type=deb --output-dir "$output_dir" -# by default, debian packages use thin LTO, so we can override it to speed up the build -CMAKE_FLAGS='-DENABLE_THINLTO=' ./docker/packager/packager --package-type=deb --output-dir "./$(git rev-parse --show-cdup)/build_results" -``` - ## Building on Ubuntu The following tutorial is based on Ubuntu Linux. @@ -37,6 +23,7 @@ The minimum recommended Ubuntu version for development is 22.04 LTS. ### Install Prerequisites {#install-prerequisites} ``` bash +sudo apt-get update sudo apt-get install git cmake ccache python3 ninja-build nasm yasm gawk lsb-release wget software-properties-common gnupg ``` @@ -57,7 +44,7 @@ sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test For other Linux distributions - check the availability of LLVM's [prebuild packages](https://releases.llvm.org/download.html). -As of August 2023, clang-16 or higher will work. +As of March 2024, clang-17 or higher will work. GCC as a compiler is not supported. To build with a specific Clang version: @@ -67,8 +54,8 @@ to see what version you have installed before setting this environment variable. ::: ``` bash -export CC=clang-17 -export CXX=clang++-17 +export CC=clang-18 +export CXX=clang++-18 ``` ### Checkout ClickHouse Sources {#checkout-clickhouse-sources} @@ -133,3 +120,17 @@ mkdir build cmake -S . -B build cmake --build build ``` + +## Building in docker +We use the docker image `clickhouse/binary-builder` for our CI builds. It contains everything necessary to build the binary and packages. There is a script `docker/packager/packager` to ease the image usage: + +```bash +# define a directory for the output artifacts +output_dir="build_results" +# a simplest build +./docker/packager/packager --package-type=binary --output-dir "$output_dir" +# build debian packages +./docker/packager/packager --package-type=deb --output-dir "$output_dir" +# by default, debian packages use thin LTO, so we can override it to speed up the build +CMAKE_FLAGS='-DENABLE_THINLTO=' ./docker/packager/packager --package-type=deb --output-dir "./$(git rev-parse --show-cdup)/build_results" +``` diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 003277c8d4f..84251812c01 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -275,6 +275,16 @@ Cache profile events: - `CachedWriteBufferCacheWriteBytes`, `CachedWriteBufferCacheWriteMicroseconds` +## Using in-memory cache (userspace page cache) {#userspace-page-cache} + +The File Cache described above stores cached data in local files. Alternatively, object-store-based disks can be configured to use "Userspace Page Cache", which is RAM-only. Userspace page cache is recommended only if file cache can't be used for some reason, e.g. if the machine doesn't have a local disk at all. Note that file cache effectively uses RAM for caching too, since the OS caches contents of local files. + +To enable userspace page cache for disks that don't use file cache, use setting `use_page_cache_for_disks_without_file_cache`. + +By default, on Linux, the userspace page cache will use all available memory, similar to the OS page cache. In tools like `top` and `ps`, the clickhouse server process will typically show resident set size near 100% of the machine's RAM - this is normal, and most of this memory is actually reclaimable by the OS on memory pressure (`MADV_FREE`). This behavior can be disabled with server setting `page_cache_use_madv_free = 0`, making the userspace page cache just use a fixed amount of memory `page_cache_size` with no special interaction with the OS. On Mac OS, `page_cache_use_madv_free` is always disabled as it doesn't have lazy `MADV_FREE`. + +Unfortunately, `page_cache_use_madv_free` makes it difficult to tell if the server is close to running out of memory, since the RSS metric becomes useless. Async metric `UnreclaimableRSS` shows the amount of physical memory used by the server, excluding the memory reclaimable by the OS: `select value from system.asynchronous_metrics where metric = 'UnreclaimableRSS'`. Use it for monitoring instead of RSS. This metric is only available if `page_cache_use_madv_free` is enabled. + ## Storing Data on Web Server {#storing-data-on-webserver} There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`. diff --git a/docs/zh/getting-started/example-datasets/opensky.mdx b/docs/zh/getting-started/example-datasets/opensky.mdx index 92cd104e06e..b79c02ab780 100644 --- a/docs/zh/getting-started/example-datasets/opensky.mdx +++ b/docs/zh/getting-started/example-datasets/opensky.mdx @@ -1,4 +1,4 @@ ---- +--- slug: /zh/getting-started/example-datasets/opensky sidebar_label: 空中交通数据 description: 该数据集中的数据是从完整的 OpenSky 数据集中衍生而来的,对其中的数据进行了必要的清理,用以展示在 COVID-19 期间空中交通的发展。 @@ -53,12 +53,12 @@ CREATE TABLE opensky ls -1 flightlist_*.csv.gz | xargs -P100 -I{} bash -c 'gzip -c -d "{}" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"' ``` -- 这里我们将文件列表(`ls -1 flightlist_*.csv.gz`)传递给`xargs`以进行并行处理。 `xargs -P100` 指定最多使用 100 个并行工作程序,但由于我们只有 30 个文件,工作程序的数量将只有 30 个。 -- 对于每个文件,`xargs` 将通过 `bash -c` 为每个文件运行一个脚本文件。该脚本通过使用 `{}` 表示文件名占位符,然后 `xargs` 由命令进行填充(使用 `-I{}`)。 -- 该脚本会将文件 (`gzip -c -d "{}"`) 解压缩到标准输出(`-c` 参数),并将输出重定向到 `clickhouse-client`。 -- 我们还要求使用扩展解析器解析 [DateTime](../../sql-reference/data-types/datetime.md) 字段 ([--date_time_input_format best_effort](../../operations/settings/ settings.md#settings-date_time_input_format)) 以识别具有时区偏移的 ISO-8601 格式。 +- 这里我们将文件列表(`ls -1 flightlist_*.csv.gz`)传递给`xargs`以进行并行处理。 `xargs -P100` 指定最多使用 100 个并行工作程序,但由于我们只有 30 个文件,工作程序的数量将只有 30 个。 +- 对于每个文件,`xargs` 将通过 `bash -c` 为每个文件运行一个脚本文件。该脚本通过使用 `{}` 表示文件名占位符,然后 `xargs` 由命令进行填充(使用 `-I{}`)。 +- 该脚本会将文件 (`gzip -c -d "{}"`) 解压缩到标准输出(`-c` 参数),并将输出重定向到 `clickhouse-client`。 +- 我们还要求使用扩展解析器解析 [DateTime](/docs/zh/sql-reference/data-types/datetime.md) 字段 ([--date_time_input_format best_effort](/docs/zh/operations/settings/settings.md#settings-date_time_input_format)) 以识别具有时区偏移的 ISO-8601 格式。 -最后,`clickhouse-client` 会以 [CSVWithNames](../../interfaces/formats.md#csvwithnames) 格式读取输入数据然后执行插入。 +最后,`clickhouse-client` 会以 [CSVWithNames](/docs/zh/interfaces/formats.md#csvwithnames) 格式读取输入数据然后执行插入。 并行导入需要 24 秒。 diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index fac88c0621f..45dadfef774 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -567,10 +567,6 @@ public: } -#ifndef __clang__ -#pragma GCC optimize("-fno-var-tracking-assignments") -#endif - int mainEntryClickHouseBenchmark(int argc, char ** argv) { using namespace DB; diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 649a64b9de4..a2bd6b6016a 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -51,10 +51,6 @@ #include #include -#ifndef __clang__ -#pragma GCC optimize("-fno-var-tracking-assignments") -#endif - namespace fs = std::filesystem; using namespace std::literals; diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index a10f47be0b8..93562d6df90 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1228,6 +1228,13 @@ try } global_context->setMarkCache(mark_cache_policy, mark_cache_size, mark_cache_size_ratio); + size_t page_cache_size = server_settings.page_cache_size; + if (page_cache_size != 0) + global_context->setPageCache( + server_settings.page_cache_chunk_size, server_settings.page_cache_mmap_size, + page_cache_size, server_settings.page_cache_use_madv_free, + server_settings.page_cache_use_transparent_huge_pages); + String index_uncompressed_cache_policy = server_settings.index_uncompressed_cache_policy; size_t index_uncompressed_cache_size = server_settings.index_uncompressed_cache_size; double index_uncompressed_cache_size_ratio = server_settings.index_uncompressed_cache_size_ratio; diff --git a/rust/prql/src/lib.rs b/rust/prql/src/lib.rs index f4253cef08f..2e5b2061fcb 100644 --- a/rust/prql/src/lib.rs +++ b/rust/prql/src/lib.rs @@ -14,44 +14,41 @@ fn set_output(result: String, out: *mut *mut u8, out_size: *mut u64) { *out_ptr = CString::new(result).unwrap().into_raw() as *mut u8; } +/// Converts a PRQL query from a raw C string to SQL, returning an error code if the conversion fails. pub unsafe extern "C" fn prql_to_sql_impl( query: *const u8, size: u64, out: *mut *mut u8, out_size: *mut u64, ) -> i64 { - let query_vec = unsafe { slice::from_raw_parts(query, size.try_into().unwrap()) }.to_vec(); - let maybe_prql_query = String::from_utf8(query_vec); - if maybe_prql_query.is_err() { + let query_vec = slice::from_raw_parts(query, size.try_into().unwrap()).to_vec(); + let Ok(query_str) = String::from_utf8(query_vec) else { set_output( - String::from("The PRQL query must be UTF-8 encoded!"), + "The PRQL query must be UTF-8 encoded!".to_string(), out, out_size, ); return 1; - } - let prql_query = maybe_prql_query.unwrap(); - let opts = &Options { + }; + + let opts = Options { format: true, target: Target::Sql(Some(Dialect::ClickHouse)), signature_comment: false, color: false, }; - let (is_err, res) = match prqlc::compile(&prql_query, &opts) { - Ok(sql_str) => (false, sql_str), - Err(err) => (true, err.to_string()), - }; - // NOTE: Over at PRQL we're considering to un-deprecate & re-enable the - // `color: false` option. If that happens, we can remove the `strip_str` - // here, which strips the output of color codes. - use anstream::adapter::strip_str; - - set_output(strip_str(&res).to_string(), out, out_size); - - match is_err { - true => 1, - false => 0, + if let Ok(sql_str) = prqlc::compile(&query_str, &opts) { + // NOTE: Over at PRQL we're considering to un-deprecate & re-enable the + // `color: false` option. If that happens, we can remove the `strip_str` + // here, which strips color codes from the output. + use anstream::adapter::strip_str; + let sql_str = strip_str(&sql_str).to_string(); + set_output(sql_str, out, out_size); + 0 + } else { + set_output("PRQL compilation failed!".to_string(), out, out_size); + 1 } } diff --git a/rust/skim/src/lib.rs b/rust/skim/src/lib.rs index a20b1b35033..58d5be51baa 100644 --- a/rust/skim/src/lib.rs +++ b/rust/skim/src/lib.rs @@ -1,7 +1,7 @@ -use skim::prelude::*; -use term::terminfo::TermInfo; use cxx::{CxxString, CxxVector}; +use skim::prelude::*; use std::panic; +use term::terminfo::TermInfo; #[cxx::bridge] mod ffi { @@ -16,7 +16,7 @@ struct Item { } impl Item { fn new(text: String) -> Self { - return Self{ + Self { // Text that will be printed by skim, and will be used for matching. // // Text that will be shown should not contains new lines since in this case skim may @@ -24,16 +24,16 @@ impl Item { text_no_newlines: text.replace("\n", " "), // This will be used when the match had been selected. orig_text: text, - }; + } } } impl SkimItem for Item { fn text(&self) -> Cow { - return Cow::Borrowed(&self.text_no_newlines); + Cow::Borrowed(&self.text_no_newlines) } fn output(&self) -> Cow { - return Cow::Borrowed(&self.orig_text); + Cow::Borrowed(&self.orig_text) } } @@ -88,14 +88,11 @@ fn skim_impl(prefix: &CxxString, words: &CxxVector) -> Result) -> Result { - let ret = panic::catch_unwind(|| { - return skim_impl(prefix, words); - }); - return match ret { + match panic::catch_unwind(|| skim_impl(prefix, words)) { Err(err) => { let e = if let Some(s) = err.downcast_ref::() { format!("{}", s) @@ -105,7 +102,7 @@ fn skim(prefix: &CxxString, words: &CxxVector) -> Result res, } } diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index ea3fb123b38..87f96ca48be 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -163,6 +163,7 @@ enum class AccessType M(SYSTEM_DROP_FILESYSTEM_CACHE, "SYSTEM DROP FILESYSTEM CACHE, DROP FILESYSTEM CACHE", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_DROP_DISTRIBUTED_CACHE, "SYSTEM DROP DISTRIBUTED CACHE, DROP DISTRIBUTED CACHE", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_SYNC_FILESYSTEM_CACHE, "SYSTEM REPAIR FILESYSTEM CACHE, REPAIR FILESYSTEM CACHE, SYNC FILESYSTEM CACHE", GLOBAL, SYSTEM) \ + M(SYSTEM_DROP_PAGE_CACHE, "SYSTEM DROP PAGE CACHE, DROP PAGE CACHE", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_DROP_SCHEMA_CACHE, "SYSTEM DROP SCHEMA CACHE, DROP SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_DROP_FORMAT_SCHEMA_CACHE, "SYSTEM DROP FORMAT SCHEMA CACHE, DROP FORMAT SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_DROP_S3_CLIENT_CACHE, "SYSTEM DROP S3 CLIENT, DROP S3 CLIENT CACHE", GLOBAL, SYSTEM_DROP_CACHE) \ diff --git a/src/Columns/Collator.cpp b/src/Columns/Collator.cpp index 434a30c0450..f6a3bb40d25 100644 --- a/src/Columns/Collator.cpp +++ b/src/Columns/Collator.cpp @@ -8,10 +8,8 @@ # include # include #else -# if defined(__clang__) -# pragma clang diagnostic ignored "-Wunused-private-field" -# pragma clang diagnostic ignored "-Wmissing-noreturn" -# endif +# pragma clang diagnostic ignored "-Wunused-private-field" +# pragma clang diagnostic ignored "-Wmissing-noreturn" #endif #include diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index ca00f2fd513..f7482d44b66 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -379,7 +379,6 @@ M(467, CANNOT_PARSE_BOOL) \ M(468, CANNOT_PTHREAD_ATTR) \ M(469, VIOLATED_CONSTRAINT) \ - M(470, QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW) \ M(471, INVALID_SETTING_VALUE) \ M(472, READONLY_SETTING) \ M(473, DEADLOCK_AVOIDED) \ diff --git a/src/Common/FailPoint.h b/src/Common/FailPoint.h index 613cfb15322..b3e1214d597 100644 --- a/src/Common/FailPoint.h +++ b/src/Common/FailPoint.h @@ -5,18 +5,14 @@ #include #include -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdocumentation" #pragma clang diagnostic ignored "-Wreserved-macro-identifier" -#endif #include #include -#ifdef __clang__ #pragma clang diagnostic pop -#endif #include diff --git a/src/Common/MatchGenerator.cpp b/src/Common/MatchGenerator.cpp index f047c21b470..9078a5d181f 100644 --- a/src/Common/MatchGenerator.cpp +++ b/src/Common/MatchGenerator.cpp @@ -1,18 +1,14 @@ -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -# pragma clang diagnostic ignored "-Wgnu-anonymous-struct" -# pragma clang diagnostic ignored "-Wnested-anon-types" -# pragma clang diagnostic ignored "-Wunused-parameter" -# pragma clang diagnostic ignored "-Wshadow-field-in-constructor" -# pragma clang diagnostic ignored "-Wdtor-name" -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" +#pragma clang diagnostic ignored "-Wgnu-anonymous-struct" +#pragma clang diagnostic ignored "-Wnested-anon-types" +#pragma clang diagnostic ignored "-Wunused-parameter" +#pragma clang diagnostic ignored "-Wshadow-field-in-constructor" +#pragma clang diagnostic ignored "-Wdtor-name" #include #include #include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#pragma clang diagnostic pop #ifdef LOG_INFO #undef LOG_INFO diff --git a/src/Common/MemorySanitizer.h b/src/Common/MemorySanitizer.h index bd44ff62acb..5d72e0b8f73 100644 --- a/src/Common/MemorySanitizer.h +++ b/src/Common/MemorySanitizer.h @@ -2,10 +2,8 @@ #include -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wreserved-id-macro" -#endif #undef __msan_unpoison #undef __msan_test_shadow @@ -32,6 +30,4 @@ # endif #endif -#ifdef __clang__ #pragma clang diagnostic pop -#endif diff --git a/src/Common/NetlinkMetricsProvider.cpp b/src/Common/NetlinkMetricsProvider.cpp index 6969b5b7542..172fede525a 100644 --- a/src/Common/NetlinkMetricsProvider.cpp +++ b/src/Common/NetlinkMetricsProvider.cpp @@ -22,10 +22,8 @@ #include #include -#if defined(__clang__) - #pragma clang diagnostic ignored "-Wgnu-anonymous-struct" - #pragma clang diagnostic ignored "-Wnested-anon-types" -#endif +#pragma clang diagnostic ignored "-Wgnu-anonymous-struct" +#pragma clang diagnostic ignored "-Wnested-anon-types" /// Basic idea is motivated by "iotop" tool. /// More info: https://www.kernel.org/doc/Documentation/accounting/taskstats.txt diff --git a/src/Common/PageCache.cpp b/src/Common/PageCache.cpp new file mode 100644 index 00000000000..511ec23d431 --- /dev/null +++ b/src/Common/PageCache.cpp @@ -0,0 +1,688 @@ +#include "PageCache.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ProfileEvents +{ + extern const Event PageCacheChunkMisses; + extern const Event PageCacheChunkShared; + extern const Event PageCacheChunkDataHits; + extern const Event PageCacheChunkDataPartialHits; + extern const Event PageCacheChunkDataMisses; + extern const Event PageCacheBytesUnpinnedRoundedToPages; + extern const Event PageCacheBytesUnpinnedRoundedToHugePages; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYSTEM_ERROR; + extern const int MEMORY_LIMIT_EXCEEDED; + extern const int CANNOT_ALLOCATE_MEMORY; + extern const int INVALID_SETTING_VALUE; + extern const int FILE_DOESNT_EXIST; +} + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunknown-warning-option" +#pragma clang diagnostic ignored "-Wreadability-make-member-function-const" + +PinnedPageChunk::PinnedPageChunk(PinnedPageChunk && c) noexcept + : cache(std::exchange(c.cache, nullptr)), chunk(std::exchange(c.chunk, nullptr)) {} + +PinnedPageChunk & PinnedPageChunk::operator=(PinnedPageChunk && c) noexcept +{ + if (cache) + cache->removeRef(chunk); + cache = std::exchange(c.cache, nullptr); + chunk = std::exchange(c.chunk, nullptr); + return *this; +} + +PinnedPageChunk::~PinnedPageChunk() noexcept +{ + if (cache) + cache->removeRef(chunk); +} + +PinnedPageChunk::PinnedPageChunk(PageCache * cache_, PageChunk * chunk_) noexcept : cache(cache_), chunk(chunk_) {} + +const PageChunk * PinnedPageChunk::getChunk() const { return chunk; } + +bool PinnedPageChunk::markPagePopulated(size_t page_idx) +{ + bool r = chunk->pages_populated.set(page_idx); + return r; +} + +void PinnedPageChunk::markPrefixPopulated(size_t bytes) +{ + for (size_t i = 0; i < (bytes + chunk->page_size - 1) / chunk->page_size; ++i) + markPagePopulated(i); +} + +bool PinnedPageChunk::isPrefixPopulated(size_t bytes) const +{ + for (size_t i = 0; i < (bytes + chunk->page_size - 1) / chunk->page_size; ++i) + if (!chunk->pages_populated.get(i)) + return false; + return true; +} + +AtomicBitSet::AtomicBitSet() = default; + +void AtomicBitSet::init(size_t nn) +{ + n = nn; + v = std::make_unique[]>((n + 7) / 8); +} + +bool AtomicBitSet::get(size_t i) const +{ + return (v[i / 8] & (1 << (i % 8))) != 0; +} + +bool AtomicBitSet::any() const +{ + for (size_t i = 0; i < (n + 7) / 8; ++i) + if (v[i]) + return true; + return false; +} + +bool AtomicBitSet::set(size_t i) const +{ + UInt8 prev = v[i / 8].fetch_or(1 << (i % 8)); + return (prev & (1 << (i % 8))) == 0; +} + +bool AtomicBitSet::set(size_t i, bool val) const +{ + if (val) + return set(i); + else + return unset(i); +} + +bool AtomicBitSet::unset(size_t i) const +{ + UInt8 prev = v[i / 8].fetch_and(~(1 << (i % 8))); + return (prev & (1 << (i % 8))) != 0; +} + +void AtomicBitSet::unsetAll() const +{ + for (size_t i = 0; i < (n + 7) / 8; ++i) + v[i].store(0, std::memory_order_relaxed); +} + +PageCache::PageCache(size_t bytes_per_chunk, size_t bytes_per_mmap, size_t bytes_total, bool use_madv_free_, bool use_huge_pages_) + : bytes_per_page(getPageSize()) + , use_madv_free(use_madv_free_) + , use_huge_pages(use_huge_pages_) + , rng(randomSeed()) +{ + if (bytes_per_chunk == 0 || bytes_per_mmap == 0) + throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Userspace page cache chunk size and mmap size can't be zero."); + + if (use_huge_pages) + { + use_huge_pages = false; + bool print_warning = false; +#ifdef OS_LINUX + try + { + ReadBufferFromFile in("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"); + size_t huge_page_size; + readIntText(huge_page_size, in); + + if (huge_page_size == 0 || huge_page_size % bytes_per_page != 0) + throw Exception(ErrorCodes::SYSTEM_ERROR, "Invalid huge page size reported by the OS: {}", huge_page_size); + + /// THP can be configured to be 2 MiB or 1 GiB in size. 1 GiB is way too big for us. + if (huge_page_size <= (16 << 20)) + { + pages_per_big_page = huge_page_size / bytes_per_page; + use_huge_pages = true; + } + else + { + LOG_WARNING(&Poco::Logger::get("PageCache"), "The OS huge page size is too large for our purposes: {} KiB. Using regular pages. Userspace page cache will be relatively slow.", huge_page_size); + } + } + catch (Exception & e) + { + if (e.code() != ErrorCodes::FILE_DOESNT_EXIST) + throw; + print_warning = true; + } +#else + print_warning = true; +#endif + if (print_warning) + LOG_WARNING(&Poco::Logger::get("PageCache"), "The OS doesn't support transparent huge pages. Userspace page cache will be relatively slow."); + } + + pages_per_chunk = ((bytes_per_chunk - 1) / (bytes_per_page * pages_per_big_page) + 1) * pages_per_big_page; + chunks_per_mmap_target = (bytes_per_mmap - 1) / (bytes_per_page * pages_per_chunk) + 1; + max_mmaps = (bytes_total - 1) / (bytes_per_page * pages_per_chunk * chunks_per_mmap_target) + 1; +} + +PageCache::~PageCache() +{ + chassert(getPinnedSize() == 0); +} + +size_t PageCache::pageSize() const { return bytes_per_page; } +size_t PageCache::chunkSize() const { return bytes_per_page * pages_per_chunk; } +size_t PageCache::maxChunks() const { return chunks_per_mmap_target * max_mmaps; } + +size_t PageCache::getPinnedSize() const +{ + std::unique_lock lock(global_mutex); + return (total_chunks - lru.size()) * bytes_per_page * pages_per_chunk; +} + +PageCache::MemoryStats PageCache::getResidentSetSize() const +{ + MemoryStats stats; +#ifdef OS_LINUX + if (use_madv_free) + { + std::unordered_set cache_mmap_addrs; + for (const auto & m : mmaps) + cache_mmap_addrs.insert(reinterpret_cast(m.ptr)); + + ReadBufferFromFile in("/proc/self/smaps"); + + /// Parse the smaps contents, which is text consisting of entries like this: + /// + /// 117ba4a00000-117be4a00000 rw-p 00000000 00:00 0 + /// Size: 1048576 kB + /// KernelPageSize: 4 kB + /// MMUPageSize: 4 kB + /// Rss: 539516 kB + /// Pss: 539516 kB + /// ... + + auto read_token = [&] + { + String res; + while (!in.eof()) + { + char c = *in.position(); + if (c == '\n' || c == '\t' || c == ' ' || c == '-') + break; + res += c; + ++in.position(); + } + return res; + }; + + auto skip_whitespace = [&] + { + while (!in.eof()) + { + char c = *in.position(); + if (c != ' ' && c != '\t') + break; + ++in.position(); + } + }; + + bool current_range_is_cache = false; + size_t total_rss = 0; + size_t total_lazy_free = 0; + while (!in.eof()) + { + String s = read_token(); + if (!in.eof() && *in.position() == '-') + { + if (s.size() < 16) + s.insert(0, 16 - s.size(), '0'); + UInt64 addr = unhexUInt(s.c_str()); + current_range_is_cache = cache_mmap_addrs.contains(addr); + } + else if (s == "Rss:" || s == "LazyFree") + { + skip_whitespace(); + size_t val; + readIntText(val, in); + skip_whitespace(); + String unit = read_token(); + if (unit != "kB") + throw Exception(ErrorCodes::SYSTEM_ERROR, "Unexpected units in /proc/self/smaps: {}", unit); + size_t bytes = val * 1024; + + if (s == "Rss:") + { + total_rss += bytes; + if (current_range_is_cache) + stats.page_cache_rss += bytes; + } + else + total_lazy_free += bytes; + } + skipToNextLineOrEOF(in); + } + stats.unreclaimable_rss = total_rss - std::min(total_lazy_free, total_rss); + + return stats; + } +#endif + + stats.page_cache_rss = bytes_per_page * pages_per_chunk * total_chunks; + return stats; +} + +PinnedPageChunk PageCache::getOrSet(PageCacheKey key, bool detached_if_missing, bool inject_eviction) +{ + PageChunk * chunk; + /// Make sure we increment exactly one of the counters about the fate of a chunk lookup. + bool incremented_profile_events = false; + + { + std::unique_lock lock(global_mutex); + + auto * it = chunk_by_key.find(key); + if (it == chunk_by_key.end()) + { + chunk = getFreeChunk(lock); + chassert(!chunk->key.has_value()); + + if (!detached_if_missing) + { + chunk->key = key; + chunk_by_key.insert({key, chunk}); + } + + ProfileEvents::increment(ProfileEvents::PageCacheChunkMisses); + incremented_profile_events = true; + } + else + { + chunk = it->getMapped(); + size_t prev_pin_count = chunk->pin_count.fetch_add(1); + + if (prev_pin_count == 0) + { + /// Not eligible for LRU eviction while pinned. + chassert(chunk->is_linked()); + lru.erase(lru.iterator_to(*chunk)); + + if (detached_if_missing) + { + /// Peek the first page to see if it's evicted. + /// (Why not use the full probing procedure instead, restoreChunkFromLimbo()? + /// Right here we can't do it because of how the two mutexes are organized. + /// And we want to do the check+detach before unlocking global_mutex, because + /// otherwise we may detach a chunk pinned by someone else, which may be unexpected + /// for that someone else. Or maybe the latter is fine, dropCache() already does it.) + if (chunk->pages_populated.get(0) && reinterpret_cast*>(chunk->data)->load(std::memory_order_relaxed) == 0) + evictChunk(chunk, lock); + } + + if (inject_eviction && chunk->key.has_value() && rng() % 10 == 0) + { + /// Simulate eviction of the chunk or some of its pages. + if (rng() % 2 == 0) + evictChunk(chunk, lock); + else + for (size_t i = 0; i < 20; ++i) + chunk->pages_populated.unset(rng() % (chunk->size / chunk->page_size)); + } + } + else + { + ProfileEvents::increment(ProfileEvents::PageCacheChunkShared); + incremented_profile_events = true; + } + } + } + + { + std::unique_lock chunk_lock(chunk->chunk_mutex); + + if (chunk->pages_state == PageChunkState::Limbo) + { + auto [pages_restored, pages_evicted] = restoreChunkFromLimbo(chunk, chunk_lock); + chunk->pages_state = PageChunkState::Stable; + + if (!incremented_profile_events) + { + if (pages_evicted == 0) + ProfileEvents::increment(ProfileEvents::PageCacheChunkDataHits); + else if (pages_evicted < pages_restored) + ProfileEvents::increment(ProfileEvents::PageCacheChunkDataPartialHits); + else + ProfileEvents::increment(ProfileEvents::PageCacheChunkDataMisses); + } + } + } + + return PinnedPageChunk(this, chunk); +} + +void PageCache::removeRef(PageChunk * chunk) noexcept +{ + /// Fast path if this is not the last reference. + size_t prev_pin_count = chunk->pin_count.load(); + if (prev_pin_count > 1 && chunk->pin_count.compare_exchange_strong(prev_pin_count, prev_pin_count - 1)) + return; + + { + std::unique_lock lock(global_mutex); + + prev_pin_count = chunk->pin_count.fetch_sub(1); + if (prev_pin_count > 1) + return; + + chassert(!chunk->is_linked()); + if (chunk->key.has_value()) + lru.push_back(*chunk); + else + /// Unpinning detached chunk. We'd rather reuse it soon, so put it at the front. + lru.push_front(*chunk); + } + + { + std::unique_lock chunk_lock(chunk->chunk_mutex); + + /// Need to be extra careful here because we unlocked global_mutex above, so other + /// getOrSet()/removeRef() calls could have happened during this brief period. + if (use_madv_free && chunk->pages_state == PageChunkState::Stable && chunk->pin_count.load() == 0) + { + sendChunkToLimbo(chunk, chunk_lock); + chunk->pages_state = PageChunkState::Limbo; + } + } +} + +static void logUnexpectedSyscallError(std::string name) +{ + std::string message = fmt::format("{} failed: {}", name, errnoToString()); + LOG_WARNING(&Poco::Logger::get("PageCache"), "{}", message); +#if defined(ABORT_ON_LOGICAL_ERROR) + volatile bool true_ = true; + if (true_) // suppress warning about missing [[noreturn]] + abortOnFailedAssertion(message); +#endif +} + +void PageCache::sendChunkToLimbo(PageChunk * chunk [[maybe_unused]], std::unique_lock & /* chunk_mutex */) const noexcept +{ +#ifdef MADV_FREE // if we're not on a very old version of Linux + chassert(chunk->size == bytes_per_page * pages_per_chunk); + size_t populated_pages = 0; + size_t populated_big_pages = 0; + for (size_t big_page_idx = 0; big_page_idx < pages_per_chunk / pages_per_big_page; ++big_page_idx) + { + bool big_page_populated = false; + for (size_t sub_idx = 0; sub_idx < pages_per_big_page; ++sub_idx) + { + size_t idx = big_page_idx * pages_per_big_page + sub_idx; + if (!chunk->pages_populated.get(idx)) + continue; + big_page_populated = true; + populated_pages += 1; + + auto & byte = reinterpret_cast &>(chunk->data[idx * bytes_per_page]); + chunk->first_bit_of_each_page.set(idx, (byte.load(std::memory_order_relaxed) & 1) != 0); + byte.fetch_or(1, std::memory_order_relaxed); + } + if (big_page_populated) + populated_big_pages += 1; + } + int r = madvise(chunk->data, chunk->size, MADV_FREE); + if (r != 0) + logUnexpectedSyscallError("madvise(MADV_FREE)"); + + ProfileEvents::increment(ProfileEvents::PageCacheBytesUnpinnedRoundedToPages, bytes_per_page * populated_pages); + ProfileEvents::increment(ProfileEvents::PageCacheBytesUnpinnedRoundedToHugePages, bytes_per_page * pages_per_big_page * populated_big_pages); +#endif +} + +std::pair PageCache::restoreChunkFromLimbo(PageChunk * chunk, std::unique_lock & /* chunk_mutex */) const noexcept +{ + static_assert(sizeof(std::atomic) == 1, "char is not atomic?"); + // Make sure our strategic memory reads/writes are not reordered or optimized out. + auto * data = reinterpret_cast *>(chunk->data); + size_t pages_restored = 0; + size_t pages_evicted = 0; + for (size_t idx = 0; idx < chunk->size / bytes_per_page; ++idx) + { + if (!chunk->pages_populated.get(idx)) + continue; + + /// After MADV_FREE, it's guaranteed that: + /// * writing to the page makes it non-freeable again (reading doesn't), + /// * after the write, the page contents are either fully intact or fully zero-filled, + /// * even before the write, reads return either intact data (if the page wasn't freed) or zeroes (if it was, and the read page-faulted). + /// (And when doing the write there's no way to tell whether it page-faulted or not, AFAICT; that would make our life much easier!) + /// + /// With that in mind, we do the following dance to bring the page back from the MADV_FREE limbo: + /// 0. [in advance] Before doing MADV_FREE, make sure the page's first byte is not zero. + /// We do it by setting the lowest bit of the first byte to 1, after saving the original value of that bit into a bitset. + /// 1. Read the second byte. + /// 2. Write the second byte back. This makes the page non-freeable. + /// 3. Read the first byte. + /// 3a. If it's zero, the page was freed. + /// Set the second byte to 0, to keep the buffer zero-filled if the page was freed + /// between steps 1 and 2. + /// 3b. If it's nonzero, the page is intact. + /// Restore the lowest bit of the first byte to the saved original value from the bitset. + + char second_byte = data[idx * bytes_per_page + 1].load(std::memory_order_relaxed); + data[idx * bytes_per_page + 1].store(second_byte, std::memory_order_relaxed); + + char first_byte = data[idx * bytes_per_page].load(std::memory_order_relaxed); + if (first_byte == 0) + { + pages_evicted += 1; + data[idx * bytes_per_page + 1].store(0, std::memory_order_relaxed); + chunk->pages_populated.unset(idx); + } + else + { + pages_restored += 1; + chassert(first_byte & 1); + if (!chunk->first_bit_of_each_page.get(idx)) + data[idx * bytes_per_page].fetch_and(~1, std::memory_order_relaxed); + } + } + return {pages_restored, pages_evicted}; +} + +PageChunk * PageCache::getFreeChunk(std::unique_lock & lock /* global_mutex */) +{ + if (lru.empty() || (mmaps.size() < max_mmaps && lru.front().key.has_value())) + addMmap(lock); + if (lru.empty()) + throw Exception(ErrorCodes::MEMORY_LIMIT_EXCEEDED, "All chunks in the entire page cache ({:.3} GiB) are pinned.", + bytes_per_page * pages_per_chunk * total_chunks * 1. / (1l << 30)); + + PageChunk * chunk = &lru.front(); + lru.erase(lru.iterator_to(*chunk)); + + size_t prev_pin_count = chunk->pin_count.fetch_add(1); + chassert(prev_pin_count == 0); + + evictChunk(chunk, lock); + + return chunk; +} + +void PageCache::evictChunk(PageChunk * chunk, std::unique_lock & /* global_mutex */) +{ + if (chunk->key.has_value()) + { + size_t erased = chunk_by_key.erase(chunk->key.value()); + chassert(erased); + chunk->key.reset(); + } + + chunk->state.reset(); + + /// This is tricky. We're not holding the chunk_mutex, so another thread might be running + /// sendChunkToLimbo() or even restoreChunkFromLimbo() on this chunk right now. + /// + /// Nevertheless, it's correct and sufficient to clear pages_populated here because sendChunkToLimbo() + /// and restoreChunkFromLimbo() only touch pages_populated (only unsetting the bits), + /// first_bit_of_each_page, and the data; and we don't care about first_bit_of_each_page and the data. + /// + /// This is precarious, but I don't have better ideas. Note that this clearing (or something else) + /// must be done before unlocking the global_mutex because otherwise another call to getOrSet() might + /// return this chunk before we clear it. + chunk->pages_populated.unsetAll(); +} + +void PageCache::addMmap(std::unique_lock & /* global_mutex */) +{ + /// ASLR by hand. + void * address_hint = reinterpret_cast(std::uniform_int_distribution(0x100000000000UL, 0x700000000000UL)(rng)); + + mmaps.emplace_back(bytes_per_page, pages_per_chunk, pages_per_big_page, chunks_per_mmap_target, address_hint, use_huge_pages); + + size_t num_chunks = mmaps.back().num_chunks; + total_chunks += num_chunks; + for (size_t i = 0; i < num_chunks; ++i) + /// Link in reverse order, so they get assigned in increasing order. Not important, just seems nice. + lru.push_front(mmaps.back().chunks[num_chunks - 1 - i]); +} + +void PageCache::dropCache() +{ + std::unique_lock lock(global_mutex); + + /// Detach and free unpinned chunks. + bool logged_error = false; + for (PageChunk & chunk : lru) + { + evictChunk(&chunk, lock); + + if (use_madv_free) + { + /// This might happen in parallel with sendChunkToLimbo() or restoreChunkFromLimbo(), but it's ok. + int r = madvise(chunk.data, chunk.size, MADV_DONTNEED); + if (r != 0 && !logged_error) + { + logUnexpectedSyscallError("madvise(MADV_DONTNEED)"); + logged_error = true; + } + } + } + + /// Detach pinned chunks. + for (auto [key, chunk] : chunk_by_key) + { + chassert(chunk->key == key); + chassert(chunk->pin_count > 0); // otherwise it would have been evicted above + chunk->key.reset(); + } + chunk_by_key.clear(); +} + +PageCache::Mmap::Mmap(size_t bytes_per_page_, size_t pages_per_chunk_, size_t pages_per_big_page_, size_t num_chunks_, void * address_hint, bool use_huge_pages_) +{ + num_chunks = num_chunks_; + size = bytes_per_page_ * pages_per_chunk_ * num_chunks; + + size_t alignment = bytes_per_page_ * pages_per_big_page_; + address_hint = reinterpret_cast(reinterpret_cast(address_hint) / alignment * alignment); + + auto temp_chunks = std::make_unique(num_chunks); + + int flags = MAP_PRIVATE | MAP_ANONYMOUS; +#ifdef OS_LINUX + flags |= MAP_NORESERVE; +#endif + ptr = mmap(address_hint, size, PROT_READ | PROT_WRITE, flags, -1, 0); + if (MAP_FAILED == ptr) + throw ErrnoException(ErrorCodes::CANNOT_ALLOCATE_MEMORY, fmt::format("Cannot mmap {}.", ReadableSize(size))); + if (reinterpret_cast(ptr) % bytes_per_page_ != 0) + { + munmap(ptr, size); + throw Exception(ErrorCodes::SYSTEM_ERROR, "mmap returned unaligned address: {}", ptr); + } + + void * chunks_start = ptr; + +#ifdef OS_LINUX + if (madvise(ptr, size, MADV_DONTDUMP) != 0) + logUnexpectedSyscallError("madvise(MADV_DONTDUMP)"); + if (madvise(ptr, size, MADV_DONTFORK) != 0) + logUnexpectedSyscallError("madvise(MADV_DONTFORK)"); + + if (use_huge_pages_) + { + if (reinterpret_cast(ptr) % alignment != 0) + { + LOG_DEBUG(&Poco::Logger::get("PageCache"), "mmap() returned address not aligned on huge page boundary."); + chunks_start = reinterpret_cast((reinterpret_cast(ptr) / alignment + 1) * alignment); + chassert(reinterpret_cast(chunks_start) % alignment == 0); + num_chunks -= 1; + } + + if (madvise(ptr, size, MADV_HUGEPAGE) != 0) + LOG_WARNING(&Poco::Logger::get("PageCache"), + "madvise(MADV_HUGEPAGE) failed: {}. Userspace page cache will be relatively slow.", errnoToString()); + } +#else + (void)use_huge_pages_; +#endif + + chunks = std::move(temp_chunks); + for (size_t i = 0; i < num_chunks; ++i) + { + PageChunk * chunk = &chunks[i]; + chunk->data = reinterpret_cast(chunks_start) + bytes_per_page_ * pages_per_chunk_ * i; + chunk->size = bytes_per_page_ * pages_per_chunk_; + chunk->page_size = bytes_per_page_; + chunk->big_page_size = bytes_per_page_ * pages_per_big_page_; + chunk->pages_populated.init(pages_per_chunk_); + chunk->first_bit_of_each_page.init(pages_per_chunk_); + } +} + +PageCache::Mmap::Mmap(Mmap && m) noexcept : ptr(std::exchange(m.ptr, nullptr)), size(std::exchange(m.size, 0)), chunks(std::move(m.chunks)), num_chunks(std::exchange(m.num_chunks, 0)) {} + +PageCache::Mmap::~Mmap() noexcept +{ + if (ptr && 0 != munmap(ptr, size)) + logUnexpectedSyscallError("munmap"); +} + +void FileChunkState::reset() {} + +PageCacheKey FileChunkAddress::hash() const +{ + SipHash hash(offset); + hash.update(path.data(), path.size()); + if (!file_version.empty()) + { + hash.update("\0", 1); + hash.update(file_version.data(), file_version.size()); + } + return hash.get128(); +} + +std::string FileChunkAddress::toString() const +{ + return fmt::format("{}:{}{}{}", path, offset, file_version.empty() ? "" : ":", file_version); +} + +#pragma clang diagnostic pop + +} diff --git a/src/Common/PageCache.h b/src/Common/PageCache.h new file mode 100644 index 00000000000..7ff376baa6b --- /dev/null +++ b/src/Common/PageCache.h @@ -0,0 +1,299 @@ +#pragma once + +#include +#include +#include +#include +#include + +/// "Userspace page cache" +/// A cache for contents of remote files. +/// Uses MADV_FREE to allow Linux to evict pages from our cache under memory pressure. +/// Typically takes up almost all of the available memory, similar to the actual page cache. +/// +/// Intended for caching data retrieved from distributed cache, but can be used for other things too, +/// just replace FileChunkState with a discriminated union, or something, if needed. +/// +/// There are two fixed-size units of caching here: +/// * OS pages, typically 4 KiB each. +/// * Page chunks, 2 MiB each (configurable with page_cache_block_size setting). +/// +/// Each file is logically split into aligned 2 MiB blocks, which are mapped to page chunks inside the cache. +/// They are cached independently from each other. +/// +/// Each page chunk has a contiguous 2 MiB buffer that can be pinned and directly used e.g. by ReadBuffers. +/// While pinned (by at least one PinnedPageChunk), the pages are not reclaimable by the OS. +/// +/// Inside each page chunk, any subset of pages may be populated. Unpopulated pages may or not be +/// mapped to any physical RAM. We maintain a bitmask that keeps track of which pages are populated. +/// Pages become unpopulated if they're reclaimed by the OS (when the page chunk is not pinned), +/// or if we just never populate them in the first place (e.g. if a file is shorter than 2 MiB we +/// still create a 2 MiB page chunk, but use only a prefix of it). +/// +/// There are two separate eviction mechanisms at play: +/// * LRU eviction of page chunks in PageCache. +/// * OS reclaiming pages on memory pressure. We have no control over the eviction policy. +/// It probably picks the pages in the same order in which they were marked with MADV_FREE, so +/// effectively in the same LRU order as our policy in PageCache. +/// When using PageCache in oversubscribed fashion, using all available memory and relying on OS eviction, +/// the PageCache's eviction policy mostly doesn't matter. It just needs to be similar enough to the OS's +/// policy that we rarely evict chunks with unevicted pages. +/// +/// We mmap memory directly instead of using allocator because this enables: +/// * knowing how much RAM the cache is using, via /proc/self/smaps, +/// * MADV_HUGEPAGE (use transparent huge pages - this makes MADV_FREE 10x less slow), +/// * MAP_NORESERVE (don't reserve swap space - otherwise large mmaps usually fail), +/// * MADV_DONTDUMP (don't include in core dumps), +/// * page-aligned addresses without padding. +/// +/// madvise(MADV_FREE) call is slow: ~6 GiB/s (doesn't scale with more threads). Enabling transparent +/// huge pages (MADV_HUGEPAGE) makes it 10x less slow, so we do that. That makes the physical RAM allocation +/// work at 2 MiB granularity instead of 4 KiB, so the cache becomes less suitable for small files. +/// If this turns out to be a problem, we may consider allowing different mmaps to have different flags, +/// some having no huge pages. +/// Note that we do our bookkeeping at small-page granularity even if huge pages are enabled. +/// +/// It's unfortunate that Linux's MADV_FREE eviction doesn't use the two-list strategy like the real +/// page cache (IIUC, MADV_FREE puts the pages at the head of the inactive list, and they can never +/// get to the active list). +/// If this turns out to be a problem, we could make PageCache do chunk eviction based on observed +/// system memory usage, so that most eviction is done by us, and the MADV_FREE eviction kicks in +/// only as a last resort. Then we can make PageCache's eviction policy arbitrarily more sophisticated. + +namespace DB +{ + +/// Hash of FileChunkAddress. +using PageCacheKey = UInt128; + +/// Identifies a chunk of a file or object. +/// We assume that contents of such file/object don't change (without file_version changing), so +/// cache invalidation is needed. +struct FileChunkAddress +{ + /// Path, usually prefixed with storage system name and anything else needed to make it unique. + /// E.g. "s3:/" + std::string path; + /// Optional string with ETag, or file modification time, or anything else. + std::string file_version; + size_t offset = 0; + + PageCacheKey hash() const; + + std::string toString() const; +}; + +struct AtomicBitSet +{ + size_t n = 0; + std::unique_ptr[]> v; + + AtomicBitSet(); + + void init(size_t n); + + bool get(size_t i) const; + bool any() const; + /// These return true if the bit was changed, false if it already had the target value. + /// (These methods are logically not const, but clang insists that I make them const, and + /// '#pragma clang diagnostic ignored' doesn't seem to work.) + bool set(size_t i) const; + bool set(size_t i, bool val) const; + bool unset(size_t i) const; + void unsetAll() const; +}; + +enum class PageChunkState +{ + /// Pages are not reclaimable by the OS, the buffer has correct contents. + Stable, + /// Pages are reclaimable by the OS, the buffer contents are altered (first bit of each page set to 1). + Limbo, +}; + +/// (This is a separate struct just in case we want to use this cache for other things in future. +/// Then this struct would be the customization point, while the rest of PageChunk can stay unchanged.) +struct FileChunkState +{ + std::mutex download_mutex; + + void reset(); +}; + +using PageChunkLRUListHook = boost::intrusive::list_base_hook<>; + +/// Cache entry. +struct PageChunk : public PageChunkLRUListHook +{ + char * data; + size_t size; // in bytes + /// Page size for use in pages_populated and first_bit_of_each_page. Same as PageCache::pageSize(). + size_t page_size; + + /// Actual eviction granularity. Just for information. If huge pages are used, huge page size, otherwise page_size. + size_t big_page_size; + + mutable FileChunkState state; + + AtomicBitSet pages_populated; + +private: + friend class PinnedPageChunk; + friend class PageCache; + + /// If nullopt, the chunk is "detached", i.e. not associated with any key. + /// Detached chunks may still be pinned. Chunk may get detached even while pinned, in particular when dropping cache. + /// Protected by global_mutex. + std::optional key; + + /// Refcount for usage of this chunk. When zero, the pages are reclaimable by the OS, and + /// the PageChunk itself is evictable (linked into PageCache::lru). + std::atomic pin_count {0}; + + /// Bit mask containing the first bit of data from each page. Needed for the weird probing procedure when un-MADV_FREE-ing the pages. + AtomicBitSet first_bit_of_each_page; + + /// Locked when changing pages_state, along with the corresponding expensive MADV_FREE/un-MADV_FREE operation. + mutable std::mutex chunk_mutex; + + /// Normally pin_count == 0 <=> state == PageChunkState::Limbo, + /// pin_count > 0 <=> state == PageChunkState::Stable. + /// This separate field is needed because of synchronization: pin_count is changed with global_mutex locked, + /// this field is changed with chunk_mutex locked, and we never have to lock both mutexes at once. + PageChunkState pages_state = PageChunkState::Stable; +}; + +class PageCache; + +/// Handle for a cache entry. Neither the entry nor its pages can get evicted while there's at least one PinnedPageChunk pointing to it. +class PinnedPageChunk +{ +public: + const PageChunk * getChunk() const; + + /// Sets the bit in pages_populated. Returns true if it actually changed (i.e. was previously 0). + bool markPagePopulated(size_t page_idx); + + /// Calls markPagePopulated() for pages 0..ceil(bytes/page_size). + void markPrefixPopulated(size_t bytes); + + bool isPrefixPopulated(size_t bytes) const; + + PinnedPageChunk() = default; + ~PinnedPageChunk() noexcept; + + PinnedPageChunk(PinnedPageChunk &&) noexcept; + PinnedPageChunk & operator=(PinnedPageChunk &&) noexcept; + +private: + friend class PageCache; + + PageCache * cache = nullptr; + PageChunk * chunk = nullptr; + + PinnedPageChunk(PageCache * cache_, PageChunk * chunk_) noexcept; +}; + +class PageCache +{ +public: + PageCache(size_t bytes_per_chunk, size_t bytes_per_mmap, size_t bytes_total, bool use_madv_free, bool use_huge_pages); + ~PageCache(); + + /// Get or insert a chunk for the given key. + /// + /// If detached_if_missing = true, and the key is not present in the cache, the returned chunk + /// won't be associated with the key and will be evicted as soon as it's unpinned. + /// It's like "get if exists, otherwise return null", but instead of null we return a usable + /// temporary buffer, for convenience. Pinning and page eviction make the story more complicated: + /// * If the chunk for this key is pinned, we return it even if it's not fully populated + /// (because PageCache doesn't know what "fully populated" means). + /// * If the chunk exists, but some of its pages were evicted, we detach it. (Currently we only + /// check the first page here.) + PinnedPageChunk getOrSet(PageCacheKey key, bool detached_if_missing, bool inject_eviction); + + /// OS page size, e.g. 4 KiB on x86, 4 KiB or 64 KiB on aarch64. + /// + /// If transparent huge pages are enabled, this is still the regular page size, and all our bookkeeping + /// is still based on regular page size (e.g. pages_populated), because (a) it's cheap anyway, + /// and (b) I'm not sure if Linux guarantees that MADV_FREE reclamation always happens at huge page + /// granularity, and wouldn't want to rely on this even if it does. + size_t pageSize() const; + size_t chunkSize() const; + size_t maxChunks() const; + + struct MemoryStats + { + /// How many bytes of actual RAM are used for the cache pages. Doesn't include metadata + /// and overhead (e.g. PageChunk structs). + size_t page_cache_rss = 0; + /// Resident set size for the whole process, excluding any MADV_FREE pages (PageCache's or not). + /// This can be used as a more useful memory usage number for clickhouse server, instead of RSS. + /// Populated only if MADV_FREE is used, otherwise zero. + std::optional unreclaimable_rss; + }; + + /// Reads /proc/self/smaps, so not very fast. + MemoryStats getResidentSetSize() const; + + /// Total length of memory ranges currently pinned by PinnedPageChunk-s, including unpopulated pages. + size_t getPinnedSize() const; + + /// Clears the key -> chunk mapping. Frees memory (MADV_DONTNEED) of all chunks that are not pinned. + /// Doesn't unmap any virtual memory. Detaches but doesn't free the pinned chunks. + /// Locks the global mutex for the duration of the operation, which may block queries for hundreds of milliseconds. + void dropCache(); + +private: + friend class PinnedPageChunk; + + struct Mmap + { + void * ptr = nullptr; + size_t size = 0; + + std::unique_ptr chunks; + size_t num_chunks = 0; // might be smaller than chunks_per_mmap_target because of alignment + + Mmap(Mmap &&) noexcept; + Mmap(size_t bytes_per_page, size_t pages_per_chunk, size_t pages_per_big_page, size_t num_chunks, void * address_hint, bool use_huge_pages_); + ~Mmap() noexcept; + }; + + size_t bytes_per_page; + size_t pages_per_chunk; + size_t chunks_per_mmap_target; + size_t max_mmaps; + size_t pages_per_big_page = 1; // if huge pages are used, huge_page_size/page_size, otherwise 1 + bool use_madv_free = true; + bool use_huge_pages = true; + + mutable std::mutex global_mutex; + + pcg64 rng; + + std::vector mmaps; + size_t total_chunks = 0; + + /// All non-pinned chunks, including ones not assigned to any file. Least recently used is begin(). + boost::intrusive::list, boost::intrusive::constant_time_size> lru; + + HashMap chunk_by_key; + + /// Get a usable chunk, doing eviction or allocation if needed. + /// Caller is responsible for clearing pages_populated. + PageChunk * getFreeChunk(std::unique_lock & /* global_mutex */); + void addMmap(std::unique_lock & /* global_mutex */); + void evictChunk(PageChunk * chunk, std::unique_lock & /* global_mutex */); + + void removeRef(PageChunk * chunk) noexcept; + + /// These may run in parallel with getFreeChunk(), so be very careful about which fields of the PageChunk we touch here. + void sendChunkToLimbo(PageChunk * chunk, std::unique_lock & /* chunk_mutex */) const noexcept; + /// Returns {pages_restored, pages_evicted}. + std::pair restoreChunkFromLimbo(PageChunk * chunk, std::unique_lock & /* chunk_mutex */) const noexcept; +}; + +using PageCachePtr = std::shared_ptr; + +} diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index d8ca1ab9e93..3a8659b8b27 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -63,6 +63,15 @@ M(MarkCacheMisses, "Number of times an entry has not been found in the mark cache, so we had to load a mark file in memory, which is a costly operation, adding to query latency.") \ M(QueryCacheHits, "Number of times a query result has been found in the query cache (and query computation was avoided). Only updated for SELECT queries with SETTING use_query_cache = 1.") \ M(QueryCacheMisses, "Number of times a query result has not been found in the query cache (and required query computation). Only updated for SELECT queries with SETTING use_query_cache = 1.") \ + /* Each page cache chunk access increments exactly one of the following 5 PageCacheChunk* counters. */ \ + /* Something like hit rate: (PageCacheChunkShared + PageCacheChunkDataHits) / [sum of all 5]. */ \ + M(PageCacheChunkMisses, "Number of times a chunk has not been found in the userspace page cache.") \ + M(PageCacheChunkShared, "Number of times a chunk has been found in the userspace page cache, already in use by another thread.") \ + M(PageCacheChunkDataHits, "Number of times a chunk has been found in the userspace page cache, not in use, with all pages intact.") \ + M(PageCacheChunkDataPartialHits, "Number of times a chunk has been found in the userspace page cache, not in use, but some of its pages were evicted by the OS.") \ + M(PageCacheChunkDataMisses, "Number of times a chunk has been found in the userspace page cache, not in use, but all its pages were evicted by the OS.") \ + M(PageCacheBytesUnpinnedRoundedToPages, "Total size of populated pages in chunks that became evictable in PageCache. Rounded up to whole pages.") \ + M(PageCacheBytesUnpinnedRoundedToHugePages, "See PageCacheBytesUnpinnedRoundedToPages, but rounded to huge pages. Use the ratio between the two as a measure of memory waste from using huge pages.") \ M(CreatedReadBufferOrdinary, "Number of times ordinary read buffer was created for reading data (while choosing among other read methods).") \ M(CreatedReadBufferDirectIO, "Number of times a read buffer with O_DIRECT was created for reading data (while choosing among other read methods).") \ M(CreatedReadBufferDirectIOFailed, "Number of times a read buffer with O_DIRECT was attempted to be created for reading data (while choosing among other read methods), but the OS did not allow it (due to lack of filesystem support or other reasons) and we fallen back to the ordinary reading method.") \ diff --git a/src/Common/StackTrace.h b/src/Common/StackTrace.h index e5654162ecb..a16d889a67a 100644 --- a/src/Common/StackTrace.h +++ b/src/Common/StackTrace.h @@ -11,9 +11,7 @@ #ifdef OS_DARWIN // ucontext is not available without _XOPEN_SOURCE -# ifdef __clang__ -# pragma clang diagnostic ignored "-Wreserved-id-macro" -# endif +# pragma clang diagnostic ignored "-Wreserved-id-macro" # define _XOPEN_SOURCE 700 #endif #include diff --git a/src/Common/TargetSpecific.h b/src/Common/TargetSpecific.h index 229150ecccb..f9523f667b2 100644 --- a/src/Common/TargetSpecific.h +++ b/src/Common/TargetSpecific.h @@ -102,8 +102,6 @@ String toString(TargetArch arch); /// NOLINTNEXTLINE #define USE_MULTITARGET_CODE 1 -#if defined(__clang__) - #define AVX512VBMI2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,avx512vbmi2"))) #define AVX512VBMI_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi"))) #define AVX512BW_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw"))) @@ -134,45 +132,7 @@ String toString(TargetArch arch); * To prevent this warning we define this function inside every macros with pragmas. */ # define DUMMY_FUNCTION_DEFINITION [[maybe_unused]] void _dummy_function_definition(); -#else -#define AVX512VBMI2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,avx512vbmi2,tune=native"))) -#define AVX512VBMI_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,tune=native"))) -#define AVX512BW_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,tune=native"))) -#define AVX512_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,tune=native"))) -#define AVX2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,bmi2,tune=native"))) -#define AVX_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,tune=native"))) -#define SSE42_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt",tune=native))) -#define DEFAULT_FUNCTION_SPECIFIC_ATTRIBUTE - -# define BEGIN_AVX512VBMI2_SPECIFIC_CODE \ - _Pragma("GCC push_options") \ - _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,avx512vbmi2,tune=native\")") -# define BEGIN_AVX512VBMI_SPECIFIC_CODE \ - _Pragma("GCC push_options") \ - _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,tune=native\")") -# define BEGIN_AVX512BW_SPECIFIC_CODE \ - _Pragma("GCC push_options") \ - _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,tune=native\")") -# define BEGIN_AVX512F_SPECIFIC_CODE \ - _Pragma("GCC push_options") \ - _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,tune=native\")") -# define BEGIN_AVX2_SPECIFIC_CODE \ - _Pragma("GCC push_options") \ - _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,bmi2,tune=native\")") -# define BEGIN_AVX_SPECIFIC_CODE \ - _Pragma("GCC push_options") \ - _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,tune=native\")") -# define BEGIN_SSE42_SPECIFIC_CODE \ - _Pragma("GCC push_options") \ - _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,tune=native\")") -# define END_TARGET_SPECIFIC_CODE \ - _Pragma("GCC pop_options") - -/* GCC doesn't show such warning, we don't need to define anything. - */ -# define DUMMY_FUNCTION_DEFINITION -#endif #define DECLARE_SSE42_SPECIFIC_CODE(...) \ BEGIN_SSE42_SPECIFIC_CODE \ diff --git a/src/Common/benchmarks/integer_hash_tables_and_hashes.cpp b/src/Common/benchmarks/integer_hash_tables_and_hashes.cpp index 0bf13ef91ed..e6c09905ab8 100644 --- a/src/Common/benchmarks/integer_hash_tables_and_hashes.cpp +++ b/src/Common/benchmarks/integer_hash_tables_and_hashes.cpp @@ -26,10 +26,8 @@ #include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wused-but-marked-unused" -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wused-but-marked-unused" #include using Key = UInt64; @@ -385,6 +383,4 @@ OK_GOOGLE(TestRndInput, HashMap, TwoRoundsTwoVarsHash, elements_to_insert) OK_GOOGLE(TestRndInput, HashMap, WyHash, elements_to_insert) OK_GOOGLE(TestRndInput, HashMap, XXH3Hash, elements_to_insert) -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#pragma clang diagnostic pop diff --git a/src/Common/intExp.h b/src/Common/intExp.h index 69b0f09975a..25ae2a8a4b6 100644 --- a/src/Common/intExp.h +++ b/src/Common/intExp.h @@ -4,15 +4,7 @@ #include #include - -// Also defined in Core/Defines.h -#if !defined(NO_SANITIZE_UNDEFINED) -#if defined(__clang__) - #define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined"))) -#else - #define NO_SANITIZE_UNDEFINED -#endif -#endif +#include /// On overflow, the function returns unspecified value. diff --git a/src/Common/re2.h b/src/Common/re2.h index c81b7157e91..ef1d2ba2a16 100644 --- a/src/Common/re2.h +++ b/src/Common/re2.h @@ -1,11 +1,6 @@ #pragma once - -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" #include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#pragma clang diagnostic pop diff --git a/src/Common/tests/gtest_DateLUTImpl.cpp b/src/Common/tests/gtest_DateLUTImpl.cpp index 3f9b75e264d..d1d10dafb63 100644 --- a/src/Common/tests/gtest_DateLUTImpl.cpp +++ b/src/Common/tests/gtest_DateLUTImpl.cpp @@ -16,9 +16,7 @@ /// For the expansion of gtest macros. -#if defined(__clang__) - #pragma clang diagnostic ignored "-Wused-but-marked-unused" -#endif +#pragma clang diagnostic ignored "-Wused-but-marked-unused" // All timezones present at build time and embedded into ClickHouse binary. extern const char * auto_time_zones[]; diff --git a/src/Compression/CompressionCodecMultiple.cpp b/src/Compression/CompressionCodecMultiple.cpp index b1eb7fb50c3..6dc10677a3f 100644 --- a/src/Compression/CompressionCodecMultiple.cpp +++ b/src/Compression/CompressionCodecMultiple.cpp @@ -1,14 +1,9 @@ #include #include #include -#include #include -#include -#include #include #include -#include -#include namespace DB @@ -88,14 +83,34 @@ void CompressionCodecMultiple::doDecompressData(const char * source, UInt32 sour const auto codec = CompressionCodecFactory::instance().get(compression_method); auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer(); - compressed_buf.resize(compressed_buf.size() + additional_size_at_the_end_of_buffer); + if (compressed_buf.size() >= 1_GiB) + throw Exception(decompression_error_code, "Too large compressed size: {}", compressed_buf.size()); + + { + UInt32 bytes_to_resize; + if (common::addOverflow(static_cast(compressed_buf.size()), additional_size_at_the_end_of_buffer, bytes_to_resize)) + throw Exception(decompression_error_code, "Too large compressed size: {}", compressed_buf.size()); + + compressed_buf.resize(compressed_buf.size() + additional_size_at_the_end_of_buffer); + } + UInt32 uncompressed_size = readDecompressedBlockSize(compressed_buf.data()); + if (uncompressed_size >= 1_GiB) + throw Exception(decompression_error_code, "Too large uncompressed size: {}", uncompressed_size); + if (idx == 0 && uncompressed_size != decompressed_size) throw Exception(decompression_error_code, "Wrong final decompressed size in codec Multiple, got {}, expected {}", uncompressed_size, decompressed_size); - uncompressed_buf.resize(uncompressed_size + additional_size_at_the_end_of_buffer); + { + UInt32 bytes_to_resize; + if (common::addOverflow(uncompressed_size, additional_size_at_the_end_of_buffer, bytes_to_resize)) + throw Exception(decompression_error_code, "Too large uncompressed size: {}", uncompressed_size); + + uncompressed_buf.resize(bytes_to_resize); + } + codec->decompress(compressed_buf.data(), source_size, uncompressed_buf.data()); uncompressed_buf.swap(compressed_buf); source_size = uncompressed_size; diff --git a/src/Compression/CompressionCodecNone.cpp b/src/Compression/CompressionCodecNone.cpp index 065ac4a2625..53d62e51920 100644 --- a/src/Compression/CompressionCodecNone.cpp +++ b/src/Compression/CompressionCodecNone.cpp @@ -27,8 +27,12 @@ UInt32 CompressionCodecNone::doCompressData(const char * source, UInt32 source_s return source_size; } -void CompressionCodecNone::doDecompressData(const char * source, UInt32 /*source_size*/, char * dest, UInt32 uncompressed_size) const +void CompressionCodecNone::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const { + if (source_size != uncompressed_size) + throw Exception(decompression_error_code, "Wrong data for compression codec NONE: source_size ({}) != uncompressed_size ({})", + source_size, uncompressed_size); + memcpy(dest, source, uncompressed_size); } diff --git a/src/Compression/CompressionCodecNone.h b/src/Compression/CompressionCodecNone.h index 1565720947d..5d6f135b351 100644 --- a/src/Compression/CompressionCodecNone.h +++ b/src/Compression/CompressionCodecNone.h @@ -18,9 +18,7 @@ public: void updateHash(SipHash & hash) const override; protected: - UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; - void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; bool isCompression() const override { return false; } diff --git a/src/Compression/LZ4_decompress_faster.cpp b/src/Compression/LZ4_decompress_faster.cpp index c7f6571cb46..b548feed848 100644 --- a/src/Compression/LZ4_decompress_faster.cpp +++ b/src/Compression/LZ4_decompress_faster.cpp @@ -49,9 +49,7 @@ inline void copy8(UInt8 * dst, const UInt8 * src) inline void wildCopy8(UInt8 * dst, const UInt8 * src, const UInt8 * dst_end) { /// Unrolling with clang is doing >10% performance degrade. -#if defined(__clang__) #pragma nounroll -#endif do { copy8(dst, src); @@ -234,9 +232,7 @@ inline void copy16(UInt8 * dst, const UInt8 * src) inline void wildCopy16(UInt8 * dst, const UInt8 * src, const UInt8 * dst_end) { /// Unrolling with clang is doing >10% performance degrade. -#if defined(__clang__) #pragma nounroll -#endif do { copy16(dst, src); @@ -371,9 +367,7 @@ inline void copy32(UInt8 * dst, const UInt8 * src) inline void wildCopy32(UInt8 * dst, const UInt8 * src, const UInt8 * dst_end) { /// Unrolling with clang is doing >10% performance degrade. -#if defined(__clang__) #pragma nounroll -#endif do { copy32(dst, src); @@ -487,9 +481,7 @@ bool NO_INLINE decompressImpl(const char * const source, char * const dest, size UInt8 * const output_end = op + dest_size; /// Unrolling with clang is doing >10% performance degrade. -#if defined(__clang__) #pragma nounroll -#endif while (true) { size_t length; diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index b06e321aeec..a36a074ce89 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -1,3 +1,6 @@ +#include +#include + #include #include @@ -374,11 +377,16 @@ void KeeperContext::updateKeeperMemorySoftLimit(const Poco::Util::AbstractConfig bool KeeperContext::setShutdownCalled() { - std::unique_lock lock(local_logs_preprocessed_cv_mutex); + std::unique_lock local_logs_preprocessed_lock(local_logs_preprocessed_cv_mutex); + std::unique_lock last_committed_log_idx_lock(last_committed_log_idx_cv_mutex); + if (!shutdown_called.exchange(true)) { - lock.unlock(); + local_logs_preprocessed_lock.unlock(); + last_committed_log_idx_lock.unlock(); + local_logs_preprocessed_cv.notify_all(); + last_committed_log_idx_cv.notify_all(); return true; } @@ -410,4 +418,36 @@ const CoordinationSettingsPtr & KeeperContext::getCoordinationSettings() const return coordination_settings; } +uint64_t KeeperContext::lastCommittedIndex() const +{ + return last_committed_log_idx.load(std::memory_order_relaxed); +} + +void KeeperContext::setLastCommitIndex(uint64_t commit_index) +{ + bool should_notify; + { + std::lock_guard lock(last_committed_log_idx_cv_mutex); + last_committed_log_idx.store(commit_index, std::memory_order_relaxed); + + should_notify = wait_commit_upto_idx.has_value() && commit_index >= wait_commit_upto_idx; + } + + if (should_notify) + last_committed_log_idx_cv.notify_all(); +} + +bool KeeperContext::waitCommittedUpto(uint64_t log_idx, uint64_t wait_timeout_ms) +{ + std::unique_lock lock(last_committed_log_idx_cv_mutex); + wait_commit_upto_idx = log_idx; + bool success = last_committed_log_idx_cv.wait_for( + lock, + std::chrono::milliseconds(wait_timeout_ms), + [&] { return shutdown_called || lastCommittedIndex() >= wait_commit_upto_idx; }); + + wait_commit_upto_idx.reset(); + return success; +} + } diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h index a7169e64387..e283e65dffa 100644 --- a/src/Coordination/KeeperContext.h +++ b/src/Coordination/KeeperContext.h @@ -76,21 +76,10 @@ public: void waitLocalLogsPreprocessedOrShutdown(); - uint64_t lastCommittedIndex() const - { - return last_committed_log_idx.load(std::memory_order_relaxed); - } - - void setLastCommitIndex(uint64_t commit_index) - { - last_committed_log_idx.store(commit_index, std::memory_order_relaxed); - last_committed_log_idx.notify_all(); - } - - void waitLastCommittedIndexUpdated(uint64_t current_last_committed_idx) - { - last_committed_log_idx.wait(current_last_committed_idx, std::memory_order_relaxed); - } + uint64_t lastCommittedIndex() const; + void setLastCommitIndex(uint64_t commit_index); + /// returns true if the log is committed, false if timeout happened + bool waitCommittedUpto(uint64_t log_idx, uint64_t wait_timeout_ms); const CoordinationSettingsPtr & getCoordinationSettings() const; @@ -142,6 +131,11 @@ private: std::atomic last_committed_log_idx = 0; + /// will be set by dispatcher when waiting for certain commits + std::optional wait_commit_upto_idx = 0; + std::mutex last_committed_log_idx_cv_mutex; + std::condition_variable last_committed_log_idx_cv; + CoordinationSettingsPtr coordination_settings; }; diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 65f41f3eb0d..cd642087130 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -5,6 +5,7 @@ #include #include +#include "Common/ZooKeeper/IKeeper.h" #include #include #include @@ -213,10 +214,13 @@ void KeeperDispatcher::requestThread() if (shutdown_called) break; + bool execute_requests_after_write = has_read_request || has_reconfig_request; + nuraft::ptr result_buf = nullptr; /// Forcefully process all previous pending requests if (prev_result) - result_buf = forceWaitAndProcessResult(prev_result, prev_batch); + result_buf + = forceWaitAndProcessResult(prev_result, prev_batch, /*clear_requests_on_success=*/!execute_requests_after_write); /// Process collected write requests batch if (!current_batch.empty()) @@ -237,10 +241,11 @@ void KeeperDispatcher::requestThread() } /// If we will execute read or reconfig next, we have to process result now - if (has_read_request || has_reconfig_request) + if (execute_requests_after_write) { if (prev_result) - result_buf = forceWaitAndProcessResult(prev_result, current_batch); + result_buf = forceWaitAndProcessResult( + prev_result, prev_batch, /*clear_requests_on_success=*/!execute_requests_after_write); /// In case of older version or disabled async replication, result buf will be set to value of `commit` function /// which always returns nullptr @@ -252,19 +257,15 @@ void KeeperDispatcher::requestThread() nuraft::buffer_serializer bs(result_buf); auto log_idx = bs.get_u64(); - /// we will wake up this thread on each commit so we need to run it in loop until the last request of batch is committed - while (true) - { - if (shutdown_called) - return; + /// if timeout happened set error responses for the requests + if (!keeper_context->waitCommittedUpto(log_idx, coordination_settings->operation_timeout_ms.totalMilliseconds())) + addErrorResponses(prev_batch, Coordination::Error::ZOPERATIONTIMEOUT); - auto current_last_committed_idx = keeper_context->lastCommittedIndex(); - if (current_last_committed_idx >= log_idx) - break; - - keeper_context->waitLastCommittedIndexUpdated(current_last_committed_idx); - } + if (shutdown_called) + return; } + + prev_batch.clear(); } if (has_reconfig_request) @@ -503,10 +504,6 @@ void KeeperDispatcher::shutdown() LOG_DEBUG(log, "Shutting down storage dispatcher"); - /// some threads can be waiting for certain commits, so we set value - /// of the last commit index to something that will always unblock - keeper_context->setLastCommitIndex(std::numeric_limits::max()); - if (session_cleaner_thread.joinable()) session_cleaner_thread.join(); @@ -720,7 +717,8 @@ void KeeperDispatcher::addErrorResponses(const KeeperStorage::RequestsForSession } } -nuraft::ptr KeeperDispatcher::forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions) +nuraft::ptr KeeperDispatcher::forceWaitAndProcessResult( + RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions, bool clear_requests_on_success) { if (!result->has_result()) result->get(); @@ -734,7 +732,10 @@ nuraft::ptr KeeperDispatcher::forceWaitAndProcessResult(RaftAppe auto result_buf = result->get(); result = nullptr; - requests_for_sessions.clear(); + + if (!result_buf || clear_requests_on_success) + requests_for_sessions.clear(); + return result_buf; } diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index e8ee486be88..231ef7e94e9 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -100,7 +100,8 @@ private: /// Forcefully wait for result and sets errors if something when wrong. /// Clears both arguments - nuraft::ptr forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions); + nuraft::ptr forceWaitAndProcessResult( + RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions, bool clear_requests_on_success); public: std::mutex read_request_queue_mutex; diff --git a/src/Core/Block.h b/src/Core/Block.h index 1a4f8c2e446..c8bebb4552a 100644 --- a/src/Core/Block.h +++ b/src/Core/Block.h @@ -177,7 +177,6 @@ using BlockPtr = std::shared_ptr; using Blocks = std::vector; using BlocksList = std::list; using BlocksPtr = std::shared_ptr; -using BlocksPtrs = std::shared_ptr>; /// Extends block with extra data in derived classes struct ExtraBlock diff --git a/src/Core/Defines.h b/src/Core/Defines.h index bf9fb1db6bc..cc6f49aa361 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -70,6 +70,15 @@ static constexpr auto DBMS_DEFAULT_MAX_QUERY_SIZE = 262144; /// Max depth of hierarchical dictionary static constexpr auto DBMS_HIERARCHICAL_DICTIONARY_MAX_DEPTH = 1000; +#ifdef OS_LINUX +#define DBMS_DEFAULT_PAGE_CACHE_USE_MADV_FREE true +#else +/// On Mac OS, MADV_FREE is not lazy, so page_cache_use_madv_free should be disabled. +/// On FreeBSD, it may work but we haven't tested it. +#define DBMS_DEFAULT_PAGE_CACHE_USE_MADV_FREE false +#endif + + /// Default maximum (total and entry) sizes and policies of various caches static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_POLICY = "SLRU"; static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE = 0_MiB; diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index c82255ec59c..fc478ae4f41 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -65,7 +65,7 @@ namespace DB M(UInt64, max_concurrent_insert_queries, 0, "Maximum number of concurrently INSERT queries. Zero means unlimited.", 0) \ M(UInt64, max_concurrent_select_queries, 0, "Maximum number of concurrently SELECT queries. Zero means unlimited.", 0) \ \ - M(Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size ro RAM max ratio. Allows to lower cache size on low-memory systems.", 0) \ + M(Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size to RAM max ratio. Allows to lower cache size on low-memory systems.", 0) \ M(String, uncompressed_cache_policy, DEFAULT_UNCOMPRESSED_CACHE_POLICY, "Uncompressed cache policy name.", 0) \ M(UInt64, uncompressed_cache_size, DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks. Zero means disabled.", 0) \ M(Double, uncompressed_cache_size_ratio, DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the uncompressed cache relative to the cache's total size.", 0) \ @@ -78,6 +78,11 @@ namespace DB M(String, index_mark_cache_policy, DEFAULT_INDEX_MARK_CACHE_POLICY, "Secondary index mark cache policy name.", 0) \ M(UInt64, index_mark_cache_size, DEFAULT_INDEX_MARK_CACHE_MAX_SIZE, "Size of cache for secondary index marks. Zero means disabled.", 0) \ M(Double, index_mark_cache_size_ratio, DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index mark cache relative to the cache's total size.", 0) \ + M(UInt64, page_cache_chunk_size, 2 << 20, "Bytes per chunk in userspace page cache. Rounded up to a multiple of page size (typically 4 KiB) or huge page size (typically 2 MiB, only if page_cache_use_thp is enabled).", 0) \ + M(UInt64, page_cache_mmap_size, 1 << 30, "Bytes per memory mapping in userspace page cache. Not important.", 0) \ + M(UInt64, page_cache_size, 10ul << 30, "Amount of virtual memory to map for userspace page cache. If page_cache_use_madv_free is enabled, it's recommended to set this higher than the machine's RAM size. Use 0 to disable userspace page cache.", 0) \ + M(Bool, page_cache_use_madv_free, DBMS_DEFAULT_PAGE_CACHE_USE_MADV_FREE, "If true, the userspace page cache will allow the OS to automatically reclaim memory from the cache on memory pressure (using MADV_FREE).", 0) \ + M(Bool, page_cache_use_transparent_huge_pages, true, "Userspace will attempt to use transparent huge pages on Linux. This is best-effort.", 0) \ M(UInt64, mmap_cache_size, DEFAULT_MMAP_CACHE_MAX_SIZE, "A cache for mmapped files.", 0) \ \ M(Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0) \ diff --git a/src/Core/Settings.h b/src/Core/Settings.h index ae6ea165cc9..7d1112af3a7 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -777,6 +777,10 @@ class IColumn; M(Bool, throw_on_error_from_cache_on_write_operations, false, "Ignore error from cache when caching on write operations (INSERT, merges)", 0) \ M(UInt64, filesystem_cache_segments_batch_size, 20, "Limit on size of a single batch of file segments that a read buffer can request from cache. Too low value will lead to excessive requests to cache, too large may slow down eviction from cache", 0) \ \ + M(Bool, use_page_cache_for_disks_without_file_cache, false, "Use userspace page cache for remote disks that don't have filesystem cache enabled.", 0) \ + M(Bool, read_from_page_cache_if_exists_otherwise_bypass_cache, false, "Use userspace page cache in passive mode, similar to read_from_filesystem_cache_if_exists_otherwise_bypass_cache.", 0) \ + M(Bool, page_cache_inject_eviction, false, "Userspace page cache will sometimes invalidate some pages at random. Intended for testing.", 0) \ + \ M(Bool, load_marks_asynchronously, false, "Load MergeTree marks asynchronously", 0) \ M(Bool, enable_filesystem_read_prefetches_log, false, "Log to system.filesystem prefetch_log during query. Should be used only for testing or debugging, not recommended to be turned on by default", 0) \ M(Bool, allow_prefetched_read_pool_for_remote_filesystem, true, "Prefer prefetched threadpool if all parts are on remote filesystem", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index face1def4b4..a7f96679bbe 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -114,6 +114,9 @@ static std::map sett {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, + {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, + {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, + {"page_cache_inject_eviction", false, false, "Added userspace page cache"}, }}, {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index d67d5eb24e0..dec77119eed 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -474,16 +474,18 @@ DataTypePtr getLeastSupertype(const DataTypes & types) type_ids.insert(type->getTypeId()); /// For String and FixedString, or for different FixedStrings, the common type is String. - /// No other types are compatible with Strings. TODO Enums? + /// If there are Enums and any type of Strings, the common type is String. + /// No other types are compatible with Strings. { size_t have_string = type_ids.count(TypeIndex::String); size_t have_fixed_string = type_ids.count(TypeIndex::FixedString); + size_t have_enums = type_ids.count(TypeIndex::Enum8) + type_ids.count(TypeIndex::Enum16); if (have_string || have_fixed_string) { - bool all_strings = type_ids.size() == (have_string + have_fixed_string); - if (!all_strings) - return throwOrReturn(types, "because some of them are String/FixedString and some of them are not", ErrorCodes::NO_COMMON_TYPE); + bool all_compatible_with_string = type_ids.size() == (have_string + have_fixed_string + have_enums); + if (!all_compatible_with_string) + return throwOrReturn(types, "because some of them are String/FixedString/Enum and some of them are not", ErrorCodes::NO_COMMON_TYPE); return std::make_shared(); } diff --git a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp index 2373640704b..1a9cd2c994c 100644 --- a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp +++ b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp @@ -129,6 +129,7 @@ void AsynchronousBoundedReadBuffer::setReadUntilPosition(size_t position) /// new read until position is after the current position in the working buffer file_offset_of_buffer_end = position; working_buffer.resize(working_buffer.size() - (file_offset_of_buffer_end - position)); + pos = std::min(pos, working_buffer.end()); } else { @@ -235,9 +236,6 @@ bool AsynchronousBoundedReadBuffer::nextImpl() file_offset_of_buffer_end = impl->getFileOffsetOfBufferEnd(); - /// In case of multiple files for the same file in clickhouse (i.e. log family) - /// file_offset_of_buffer_end will not match getImplementationBufferOffset() - /// so we use [impl->getImplementationBufferOffset(), impl->getFileSize()] chassert(file_offset_of_buffer_end <= impl->getFileSize()); if (read_until_position && (file_offset_of_buffer_end > *read_until_position)) @@ -264,7 +262,7 @@ off_t AsynchronousBoundedReadBuffer::seek(off_t offset, int whence) size_t new_pos; if (whence == SEEK_SET) { - assert(offset >= 0); + chassert(offset >= 0); new_pos = offset; } else if (whence == SEEK_CUR) @@ -290,8 +288,8 @@ off_t AsynchronousBoundedReadBuffer::seek(off_t offset, int whence) /// Position is still inside the buffer. /// Probably it is at the end of the buffer - then we will load data on the following 'next' call. pos = working_buffer.end() - file_offset_of_buffer_end + new_pos; - assert(pos >= working_buffer.begin()); - assert(pos <= working_buffer.end()); + chassert(pos >= working_buffer.begin()); + chassert(pos <= working_buffer.end()); return new_pos; } @@ -317,7 +315,7 @@ off_t AsynchronousBoundedReadBuffer::seek(off_t offset, int whence) break; } - assert(!prefetch_future.valid()); + chassert(!prefetch_future.valid()); /// First reset the buffer so the next read will fetch new data to the buffer. resetWorkingBuffer(); diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 7ce3d58dcd8..47ee5858562 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -1215,7 +1215,7 @@ size_t CachedOnDiskReadBufferFromFile::getRemainingSizeToRead() void CachedOnDiskReadBufferFromFile::setReadUntilPosition(size_t position) { - if (!allow_seeks_after_first_read) + if (initialized && !allow_seeks_after_first_read) throw Exception(ErrorCodes::LOGICAL_ERROR, "Method `setReadUntilPosition()` not allowed"); if (read_until_position == position) diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index 0b3ecca3587..417f7615dd7 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -16,12 +17,16 @@ using namespace DB; namespace { -bool withCache(const ReadSettings & settings) +bool withFileCache(const ReadSettings & settings) { return settings.remote_fs_cache && settings.enable_filesystem_cache && (!CurrentThread::getQueryId().empty() || settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache || !settings.avoid_readthrough_cache_outside_query_context); } +bool withPageCache(const ReadSettings & settings, bool with_file_cache) +{ + return settings.page_cache && !with_file_cache && settings.use_page_cache_for_disks_without_file_cache; +} } namespace DB @@ -34,7 +39,7 @@ namespace ErrorCodes size_t chooseBufferSizeForRemoteReading(const DB::ReadSettings & settings, size_t file_size) { /// Only when cache is used we could download bigger portions of FileSegments than what we actually gonna read within particular task. - if (!withCache(settings)) + if (!withFileCache(settings)) return settings.remote_fs_buffer_size; /// Buffers used for prefetch and pre-download better to have enough size, but not bigger than the whole file. @@ -44,27 +49,30 @@ size_t chooseBufferSizeForRemoteReading(const DB::ReadSettings & settings, size_ ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather( ReadBufferCreator && read_buffer_creator_, const StoredObjects & blobs_to_read_, + const std::string & cache_path_prefix_, const ReadSettings & settings_, std::shared_ptr cache_log_, bool use_external_buffer_) - : ReadBufferFromFileBase( - use_external_buffer_ ? 0 : chooseBufferSizeForRemoteReading(settings_, getTotalSize(blobs_to_read_)), nullptr, 0) + : ReadBufferFromFileBase(use_external_buffer_ ? 0 : chooseBufferSizeForRemoteReading( + settings_, getTotalSize(blobs_to_read_)), nullptr, 0) , settings(settings_) , blobs_to_read(blobs_to_read_) , read_buffer_creator(std::move(read_buffer_creator_)) + , cache_path_prefix(cache_path_prefix_) , cache_log(settings.enable_filesystem_cache_log ? cache_log_ : nullptr) , query_id(CurrentThread::getQueryId()) , use_external_buffer(use_external_buffer_) - , with_cache(withCache(settings)) + , with_file_cache(withFileCache(settings)) + , with_page_cache(withPageCache(settings, with_file_cache)) , log(getLogger("ReadBufferFromRemoteFSGather")) { if (!blobs_to_read.empty()) current_object = blobs_to_read.front(); } -SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(const StoredObject & object) +SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(const StoredObject & object, size_t start_offset) { - if (current_buf && !with_cache) + if (current_buf && !with_file_cache) { appendUncachedReadInfo(); } @@ -72,30 +80,45 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c current_object = object; const auto & object_path = object.remote_path; - size_t current_read_until_position = read_until_position ? read_until_position : object.bytes_size; - auto current_read_buffer_creator = [=, this]() { return read_buffer_creator(object_path, current_read_until_position); }; + std::unique_ptr buf; #ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD - if (with_cache) + if (with_file_cache) { auto cache_key = settings.remote_fs_cache->createKeyForPath(object_path); - return std::make_shared( + buf = std::make_unique( object_path, cache_key, settings.remote_fs_cache, FileCache::getCommonUser(), - std::move(current_read_buffer_creator), + [=, this]() { return read_buffer_creator(/* restricted_seek */true, object_path); }, settings, query_id, object.bytes_size, /* allow_seeks */false, /* use_external_buffer */true, - read_until_position ? std::optional(read_until_position) : std::nullopt, + /* read_until_position */std::nullopt, cache_log); } #endif - return current_read_buffer_creator(); + /// Can't wrap CachedOnDiskReadBufferFromFile in CachedInMemoryReadBufferFromFile because the + /// former doesn't support seeks. + if (with_page_cache && !buf) + { + auto inner = read_buffer_creator(/* restricted_seek */false, object_path); + auto cache_key = FileChunkAddress { .path = cache_path_prefix + object_path }; + buf = std::make_unique( + cache_key, settings.page_cache, std::move(inner), settings); + } + + if (!buf) + buf = read_buffer_creator(/* restricted_seek */true, object_path); + + if (read_until_position > start_offset && read_until_position < start_offset + object.bytes_size) + buf->setReadUntilPosition(read_until_position - start_offset); + + return buf; } void ReadBufferFromRemoteFSGather::appendUncachedReadInfo() @@ -124,12 +147,12 @@ void ReadBufferFromRemoteFSGather::initialize() return; /// One clickhouse file can be split into multiple files in remote fs. - auto current_buf_offset = file_offset_of_buffer_end; + size_t start_offset = 0; for (size_t i = 0; i < blobs_to_read.size(); ++i) { const auto & object = blobs_to_read[i]; - if (object.bytes_size > current_buf_offset) + if (start_offset + object.bytes_size > file_offset_of_buffer_end) { LOG_TEST(log, "Reading from file: {} ({})", object.remote_path, object.local_path); @@ -137,14 +160,14 @@ void ReadBufferFromRemoteFSGather::initialize() if (!current_buf || current_buf_idx != i) { current_buf_idx = i; - current_buf = createImplementationBuffer(object); + current_buf = createImplementationBuffer(object, start_offset); } - current_buf->seek(current_buf_offset, SEEK_SET); + current_buf->seek(file_offset_of_buffer_end - start_offset, SEEK_SET); return; } - current_buf_offset -= object.bytes_size; + start_offset += object.bytes_size; } current_buf_idx = blobs_to_read.size(); current_buf = nullptr; @@ -171,14 +194,14 @@ bool ReadBufferFromRemoteFSGather::nextImpl() bool ReadBufferFromRemoteFSGather::moveToNextBuffer() { /// If there is no available buffers - nothing to read. - if (current_buf_idx + 1 >= blobs_to_read.size()) + if (current_buf_idx + 1 >= blobs_to_read.size() || (read_until_position && file_offset_of_buffer_end >= read_until_position)) return false; ++current_buf_idx; const auto & object = blobs_to_read[current_buf_idx]; LOG_TEST(log, "Reading from next file: {} ({})", object.remote_path, object.local_path); - current_buf = createImplementationBuffer(object); + current_buf = createImplementationBuffer(object, file_offset_of_buffer_end); return true; } @@ -263,7 +286,7 @@ off_t ReadBufferFromRemoteFSGather::seek(off_t offset, int whence) ReadBufferFromRemoteFSGather::~ReadBufferFromRemoteFSGather() { - if (!with_cache) + if (!with_file_cache) appendUncachedReadInfo(); } diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index f6b7506a54f..8362b354e23 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -21,11 +21,12 @@ class ReadBufferFromRemoteFSGather final : public ReadBufferFromFileBase friend class ReadIndirectBufferFromRemoteFS; public: - using ReadBufferCreator = std::function(const std::string & path, size_t read_until_position)>; + using ReadBufferCreator = std::function(bool restricted_seek, const std::string & path)>; ReadBufferFromRemoteFSGather( ReadBufferCreator && read_buffer_creator_, const StoredObjects & blobs_to_read_, + const std::string & cache_path_prefix_, const ReadSettings & settings_, std::shared_ptr cache_log_, bool use_external_buffer_); @@ -53,7 +54,7 @@ public: bool isContentCached(size_t offset, size_t size) override; private: - SeekableReadBufferPtr createImplementationBuffer(const StoredObject & object); + SeekableReadBufferPtr createImplementationBuffer(const StoredObject & object, size_t start_offset); bool nextImpl() override; @@ -70,10 +71,12 @@ private: const ReadSettings settings; const StoredObjects blobs_to_read; const ReadBufferCreator read_buffer_creator; + const std::string cache_path_prefix; const std::shared_ptr cache_log; const String query_id; const bool use_external_buffer; - const bool with_cache; + const bool with_file_cache; + const bool with_page_cache; size_t read_until_position = 0; size_t file_offset_of_buffer_end = 0; diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp index f3caf62ffd5..590fc4c4656 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp @@ -152,6 +152,8 @@ IAsynchronousReader::Result ThreadPoolRemoteFSReader::execute(Request request, b IAsynchronousReader::Result read_result; if (result) { + chassert(reader.buffer().begin() == request.buf); + chassert(reader.buffer().end() <= request.buf + request.size); read_result.size = reader.buffer().size(); read_result.offset = reader.offset(); ProfileEvents::increment(ProfileEvents::ThreadpoolReaderReadBytes, read_result.size); diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.h b/src/Disks/IO/ThreadPoolRemoteFSReader.h index abc251b2b10..eacce5a54ac 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.h +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.h @@ -29,6 +29,9 @@ private: class RemoteFSFileDescriptor : public IAsynchronousReader::IFileDescriptor { public: + /// `reader_` implementation must ensure that next() places data at the start of internal_buffer, + /// even if there was previously a seek. I.e. seek() shouldn't leave pending data (no short seek + /// optimization), and nextImpl() shouldn't assign nextimpl_working_buffer_offset. explicit RemoteFSFileDescriptor( SeekableReadBuffer & reader_, std::shared_ptr async_read_counters_) diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index fba61c7c392..d3fb4b79237 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -206,7 +206,7 @@ std::unique_ptr AzureObjectStorage::readObjects( /// NOL auto read_buffer_creator = [this, settings_ptr, disk_read_settings] - (const std::string & path, size_t read_until_position) -> std::unique_ptr + (bool restricted_seek, const std::string & path) -> std::unique_ptr { return std::make_unique( client.get(), @@ -215,8 +215,7 @@ std::unique_ptr AzureObjectStorage::readObjects( /// NOL settings_ptr->max_single_read_retries, settings_ptr->max_single_download_retries, /* use_external_buffer */true, - /* restricted_seek */true, - read_until_position); + restricted_seek); }; switch (read_settings.remote_fs_method) @@ -226,16 +225,17 @@ std::unique_ptr AzureObjectStorage::readObjects( /// NOL return std::make_unique( std::move(read_buffer_creator), objects, + "azure:", disk_read_settings, global_context->getFilesystemCacheLog(), /* use_external_buffer */false); - } case RemoteFSReadMethod::threadpool: { auto impl = std::make_unique( std::move(read_buffer_creator), objects, + "azure:", disk_read_settings, global_context->getFilesystemCacheLog(), /* use_external_buffer */true); diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 2a648f28f14..16183ec20c1 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -527,10 +527,9 @@ std::unique_ptr DiskObjectStorage::readFile( std::optional read_hint, std::optional file_size) const { - auto storage_objects = metadata_storage->getStorageObjects(path); + const auto storage_objects = metadata_storage->getStorageObjects(path); const bool file_can_be_empty = !file_size.has_value() || *file_size == 0; - if (storage_objects.empty() && file_can_be_empty) return std::make_unique(); diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index fa5e227d853..f8545ecfe39 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -60,7 +60,7 @@ std::unique_ptr HDFSObjectStorage::readObjects( /// NOLI auto disk_read_settings = patchSettings(read_settings); auto read_buffer_creator = [this, disk_read_settings] - (const std::string & path, size_t /* read_until_position */) -> std::unique_ptr + (bool /* restricted_seek */, const std::string & path) -> std::unique_ptr { size_t begin_of_path = path.find('/', path.find("//") + 2); auto hdfs_path = path.substr(begin_of_path); @@ -71,7 +71,7 @@ std::unique_ptr HDFSObjectStorage::readObjects( /// NOLI }; return std::make_unique( - std::move(read_buffer_creator), objects, disk_read_settings, nullptr, /* use_external_buffer */false); + std::move(read_buffer_creator), objects, "hdfs:", disk_read_settings, nullptr, /* use_external_buffer */false); } std::unique_ptr HDFSObjectStorage::writeObject( /// NOLINT diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp index 02700b358e0..7fd4536f266 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp @@ -47,7 +47,7 @@ std::unique_ptr LocalObjectStorage::readObjects( /// NOL auto modified_settings = patchSettings(read_settings); auto global_context = Context::getGlobalContextInstance(); auto read_buffer_creator = - [=] (const std::string & file_path, size_t /* read_until_position */) + [=] (bool /* restricted_seek */, const std::string & file_path) -> std::unique_ptr { return createReadBufferFromFileBase(file_path, modified_settings, read_hint, file_size); @@ -58,13 +58,13 @@ std::unique_ptr LocalObjectStorage::readObjects( /// NOL case RemoteFSReadMethod::read: { return std::make_unique( - std::move(read_buffer_creator), objects, modified_settings, + std::move(read_buffer_creator), objects, "file:", modified_settings, global_context->getFilesystemCacheLog(), /* use_external_buffer */false); } case RemoteFSReadMethod::threadpool: { auto impl = std::make_unique( - std::move(read_buffer_creator), objects, modified_settings, + std::move(read_buffer_creator), objects, "file:", modified_settings, global_context->getFilesystemCacheLog(), /* use_external_buffer */true); auto & reader = global_context->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER); diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 5771eb1ebe0..d89c7c93e51 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -171,7 +171,7 @@ std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT auto read_buffer_creator = [this, settings_ptr, disk_read_settings] - (const std::string & path, size_t read_until_position) -> std::unique_ptr + (bool restricted_seek, const std::string & path) -> std::unique_ptr { return std::make_unique( client.get(), @@ -182,8 +182,8 @@ std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT disk_read_settings, /* use_external_buffer */true, /* offset */0, - read_until_position, - /* restricted_seek */true); + /* read_until_position */0, + restricted_seek); }; switch (read_settings.remote_fs_method) @@ -193,16 +193,17 @@ std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT return std::make_unique( std::move(read_buffer_creator), objects, + "s3:" + uri.bucket + "/", disk_read_settings, global_context->getFilesystemCacheLog(), /* use_external_buffer */false); - } case RemoteFSReadMethod::threadpool: { auto impl = std::make_unique( std::move(read_buffer_creator), objects, + "s3:" + uri.bucket + "/", disk_read_settings, global_context->getFilesystemCacheLog(), /* use_external_buffer */true); diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp index 786b23caf48..48de0bf4168 100644 --- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp @@ -252,14 +252,13 @@ std::unique_ptr WebObjectStorage::readObject( /// NOLINT { auto read_buffer_creator = [this, read_settings] - (const std::string & path_, size_t read_until_position) -> std::unique_ptr + (bool /* restricted_seek */, const std::string & path_) -> std::unique_ptr { return std::make_unique( fs::path(url) / path_, getContext(), read_settings, - /* use_external_buffer */true, - read_until_position); + /* use_external_buffer */true); }; auto global_context = Context::getGlobalContextInstance(); @@ -271,6 +270,7 @@ std::unique_ptr WebObjectStorage::readObject( /// NOLINT return std::make_unique( std::move(read_buffer_creator), StoredObjects{object}, + "url:" + url + "/", read_settings, global_context->getFilesystemCacheLog(), /* use_external_buffer */false); @@ -280,6 +280,7 @@ std::unique_ptr WebObjectStorage::readObject( /// NOLINT auto impl = std::make_unique( std::move(read_buffer_creator), StoredObjects{object}, + "url:" + url + "/", read_settings, global_context->getFilesystemCacheLog(), /* use_external_buffer */true); diff --git a/src/Formats/MarkInCompressedFile.h b/src/Formats/MarkInCompressedFile.h index 92f4a030a1a..06ed1476410 100644 --- a/src/Formats/MarkInCompressedFile.h +++ b/src/Formats/MarkInCompressedFile.h @@ -12,10 +12,8 @@ namespace DB /// It's a bug in clang with three-way comparison operator /// https://github.com/llvm/llvm-project/issues/55919 -#ifdef __clang__ - #pragma clang diagnostic push - #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" /** Mark is the position in the compressed file. The compressed file consists of adjacent compressed blocks. * Mark is a tuple - the offset in the file to the start of the compressed block, the offset in the decompressed block to the start of the data. @@ -41,9 +39,7 @@ struct MarkInCompressedFile } }; -#ifdef __clang__ - #pragma clang diagnostic pop -#endif +#pragma clang diagnostic pop /** * In-memory representation of an array of marks. diff --git a/src/Functions/FunctionSQLJSON.h b/src/Functions/FunctionSQLJSON.h index 0533f3d419a..3efa40df9be 100644 --- a/src/Functions/FunctionSQLJSON.h +++ b/src/Functions/FunctionSQLJSON.h @@ -26,6 +26,7 @@ #include "config.h" + namespace DB { namespace ErrorCodes @@ -114,8 +115,6 @@ private: }; -class EmptyJSONStringSerializer{}; - class FunctionSQLJSONHelpers { @@ -156,25 +155,11 @@ public: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument (JSONPath) must be constant string"); } - const ColumnPtr & arg_jsonpath = json_path_column.column; - const auto * arg_jsonpath_const = typeid_cast(arg_jsonpath.get()); - const auto * arg_jsonpath_string = typeid_cast(arg_jsonpath_const->getDataColumnPtr().get()); - - const ColumnPtr & arg_json = json_column.column; - const auto * col_json_const = typeid_cast(arg_json.get()); - const auto * col_json_string - = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); - - /// Get data and offsets for 1 argument (JSONPath) - const ColumnString::Chars & chars_path = arg_jsonpath_string->getChars(); - const ColumnString::Offsets & offsets_path = arg_jsonpath_string->getOffsets(); - /// Prepare to parse 1 argument (JSONPath) - const char * query_begin = reinterpret_cast(&chars_path[0]); - const char * query_end = query_begin + offsets_path[0] - 1; + String query = typeid_cast(*json_path_column.column).getValue(); - /// Tokenize query - Tokens tokens(query_begin, query_end); + /// Tokenize the query + Tokens tokens(query.data(), query.data() + query.size()); /// Max depth 0 indicates that depth is not limited IParser::Pos token_iterator(tokens, parse_depth); @@ -188,10 +173,6 @@ public: throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unable to parse JSONPath"); } - /// Get data and offsets for 2 argument (JSON) - const ColumnString::Chars & chars_json = col_json_string->getChars(); - const ColumnString::Offsets & offsets_json = col_json_string->getOffsets(); - JSONParser json_parser; using Element = typename JSONParser::Element; Element document; @@ -200,10 +181,9 @@ public: /// Parse JSON for every row Impl impl; GeneratorJSONPath generator_json_path(res); - for (const auto i : collections::range(0, input_rows_count)) + for (size_t i = 0; i < input_rows_count; ++i) { - std::string_view json{ - reinterpret_cast(&chars_json[offsets_json[i - 1]]), offsets_json[i] - offsets_json[i - 1] - 1}; + std::string_view json = json_column.column->getDataAt(i).toView(); document_ok = json_parser.parse(json, document); bool added_to_column = false; diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index d0edd34e657..79b33e2f75b 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -9,10 +9,8 @@ #include "config.h" -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wused-but-marked-unused" -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wused-but-marked-unused" #include #include @@ -1604,6 +1602,4 @@ using FunctionXXH3 = FunctionAnyHash; using FunctionWyHash64 = FunctionAnyHash; } -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#pragma clang diagnostic pop diff --git a/src/Functions/idna.cpp b/src/Functions/idna.cpp index a73347400c6..c9682b44b2c 100644 --- a/src/Functions/idna.cpp +++ b/src/Functions/idna.cpp @@ -6,16 +6,12 @@ #include #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wnewline-eof" -#endif -# include -# include -# include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wnewline-eof" +#include +#include +#include +#pragma clang diagnostic pop namespace DB { @@ -199,4 +195,3 @@ Computes the Unicode representation of ASCII-encoded Internationalized Domain Na } #endif - diff --git a/src/Functions/punycode.cpp b/src/Functions/punycode.cpp index 159189744bd..107302069b4 100644 --- a/src/Functions/punycode.cpp +++ b/src/Functions/punycode.cpp @@ -6,15 +6,11 @@ #include #include -#ifdef __clang__ # pragma clang diagnostic push # pragma clang diagnostic ignored "-Wnewline-eof" -#endif # include # include -#ifdef __clang__ # pragma clang diagnostic pop -#endif namespace DB { diff --git a/src/Functions/s2_fwd.h b/src/Functions/s2_fwd.h index 6e0b58ae118..4ed5d4fcc1b 100644 --- a/src/Functions/s2_fwd.h +++ b/src/Functions/s2_fwd.h @@ -1,8 +1,6 @@ #pragma once -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wambiguous-reversed-operator" -#endif #include #include @@ -11,6 +9,4 @@ #include #include -#ifdef __clang__ #pragma clang diagnostic pop -#endif diff --git a/src/Functions/seriesDecomposeSTL.cpp b/src/Functions/seriesDecomposeSTL.cpp index e9276c4aefb..fbabc801913 100644 --- a/src/Functions/seriesDecomposeSTL.cpp +++ b/src/Functions/seriesDecomposeSTL.cpp @@ -1,15 +1,9 @@ -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wold-style-cast" #pragma clang diagnostic ignored "-Wshadow" #pragma clang diagnostic ignored "-Wimplicit-float-conversion" -#endif - #include - -#ifdef __clang__ #pragma clang diagnostic pop -#endif #include #include diff --git a/src/Functions/seriesPeriodDetectFFT.cpp b/src/Functions/seriesPeriodDetectFFT.cpp index 61e3319d810..c01f6b7f07b 100644 --- a/src/Functions/seriesPeriodDetectFFT.cpp +++ b/src/Functions/seriesPeriodDetectFFT.cpp @@ -1,18 +1,14 @@ #include "config.h" #if USE_POCKETFFT -# ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wshadow" -# pragma clang diagnostic ignored "-Wextra-semi-stmt" -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -# endif +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wshadow" +# pragma clang diagnostic ignored "-Wextra-semi-stmt" +# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" # include -# ifdef __clang__ -# pragma clang diagnostic pop -# endif +# pragma clang diagnostic pop # include # include diff --git a/src/IO/Archives/ArchiveUtils.h b/src/IO/Archives/ArchiveUtils.h index 810b9d8d730..1b66be005a2 100644 --- a/src/IO/Archives/ArchiveUtils.h +++ b/src/IO/Archives/ArchiveUtils.h @@ -4,11 +4,9 @@ #if USE_LIBARCHIVE -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wreserved-macro-identifier" #include #include #endif -#endif diff --git a/src/IO/AsynchronousReader.h b/src/IO/AsynchronousReader.h index 05d306011b0..815a7b2774e 100644 --- a/src/IO/AsynchronousReader.h +++ b/src/IO/AsynchronousReader.h @@ -54,6 +54,9 @@ public: struct Result { + /// The read data is at [buf + offset, buf + size), where `buf` is from Request struct. + /// (Notice that `offset` is included in `size`.) + /// size /// Less than requested amount of data can be returned. /// If size is zero - the file has ended. diff --git a/src/IO/BufferBase.h b/src/IO/BufferBase.h index 5a546c83580..e98f00270e2 100644 --- a/src/IO/BufferBase.h +++ b/src/IO/BufferBase.h @@ -60,6 +60,9 @@ public: BufferBase(Position ptr, size_t size, size_t offset) : pos(ptr + offset), working_buffer(ptr, ptr + size), internal_buffer(ptr, ptr + size) {} + /// Assign the buffers and pos. + /// Be careful when calling this from ReadBuffer::nextImpl() implementations: `offset` is + /// effectively ignored because ReadBuffer::next() reassigns `pos`. void set(Position ptr, size_t size, size_t offset) { internal_buffer = Buffer(ptr, ptr + size); diff --git a/src/IO/CachedInMemoryReadBufferFromFile.cpp b/src/IO/CachedInMemoryReadBufferFromFile.cpp new file mode 100644 index 00000000000..384d2229f14 --- /dev/null +++ b/src/IO/CachedInMemoryReadBufferFromFile.cpp @@ -0,0 +1,188 @@ +#include "CachedInMemoryReadBufferFromFile.h" +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNEXPECTED_END_OF_FILE; + extern const int CANNOT_SEEK_THROUGH_FILE; + extern const int SEEK_POSITION_OUT_OF_BOUND; +} + +CachedInMemoryReadBufferFromFile::CachedInMemoryReadBufferFromFile( + FileChunkAddress cache_key_, PageCachePtr cache_, std::unique_ptr in_, const ReadSettings & settings_) + : ReadBufferFromFileBase(0, nullptr, 0, in_->getFileSize()), cache_key(cache_key_), cache(cache_), settings(settings_), in(std::move(in_)) + , read_until_position(file_size.value()) +{ + cache_key.offset = 0; +} + +String CachedInMemoryReadBufferFromFile::getFileName() const +{ + return in->getFileName(); +} + +off_t CachedInMemoryReadBufferFromFile::seek(off_t off, int whence) +{ + if (whence != SEEK_SET) + throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET mode is allowed."); + + size_t offset = static_cast(off); + if (offset > file_size.value()) + throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bounds. Offset: {}", off); + + if (offset >= file_offset_of_buffer_end - working_buffer.size() && offset <= file_offset_of_buffer_end) + { + pos = working_buffer.end() - (file_offset_of_buffer_end - offset); + chassert(getPosition() == off); + return off; + } + + resetWorkingBuffer(); + + file_offset_of_buffer_end = offset; + chunk.reset(); + + chassert(getPosition() == off); + return off; +} + +off_t CachedInMemoryReadBufferFromFile::getPosition() +{ + return file_offset_of_buffer_end - available(); +} + +size_t CachedInMemoryReadBufferFromFile::getFileOffsetOfBufferEnd() const +{ + return file_offset_of_buffer_end; +} + +void CachedInMemoryReadBufferFromFile::setReadUntilPosition(size_t position) +{ + read_until_position = position; + if (position < static_cast(getPosition())) + { + resetWorkingBuffer(); + chunk.reset(); + } + else if (position < file_offset_of_buffer_end) + { + size_t diff = file_offset_of_buffer_end - position; + working_buffer.resize(working_buffer.size() - diff); + file_offset_of_buffer_end -= diff; + } +} + +void CachedInMemoryReadBufferFromFile::setReadUntilEnd() +{ + setReadUntilPosition(file_size.value()); +} + +bool CachedInMemoryReadBufferFromFile::nextImpl() +{ + chassert(read_until_position <= file_size.value()); + if (file_offset_of_buffer_end >= read_until_position) + return false; + + if (chunk.has_value() && file_offset_of_buffer_end >= cache_key.offset + cache->chunkSize()) + { + chassert(file_offset_of_buffer_end == cache_key.offset + cache->chunkSize()); + chunk.reset(); + } + + if (!chunk.has_value()) + { + cache_key.offset = file_offset_of_buffer_end / cache->chunkSize() * cache->chunkSize(); + chunk = cache->getOrSet(cache_key.hash(), settings.read_from_page_cache_if_exists_otherwise_bypass_cache, settings.page_cache_inject_eviction); + + size_t chunk_size = std::min(cache->chunkSize(), file_size.value() - cache_key.offset); + + std::unique_lock download_lock(chunk->getChunk()->state.download_mutex); + + if (!chunk->isPrefixPopulated(chunk_size)) + { + /// A few things could be improved here, which may or may not be worth the added complexity: + /// * If the next file chunk is in cache, use in->setReadUntilPosition() to limit the read to + /// just one chunk. More generally, look ahead in the cache to count how many next chunks + /// need to be downloaded. (Up to some limit? And avoid changing `in`'s until-position if + /// it's already reasonable; otherwise we'd increase it by one chunk every chunk, discarding + /// a half-completed HTTP request every time.) + /// * If only a subset of pages are missing from this chunk, download only them, + /// with some threshold for avoiding short seeks. + /// In particular, if a previous download failed in the middle of the chunk, we could + /// resume from that position instead of from the beginning of the chunk. + /// (It's also possible in principle that a proper subset of chunk's pages was reclaimed + /// by the OS. But, for performance purposes, we should completely ignore that, because + /// (a) PageCache normally uses 2 MiB transparent huge pages and has just one such page + /// per chunk, and (b) even with 4 KiB pages partial chunk eviction is extremely rare.) + /// * If our [position, read_until_position) covers only part of the chunk, we could download + /// just that part. (Which would be bad if someone else needs the rest of the chunk and has + /// to do a whole new HTTP request to get it. Unclear what the policy should be.) + /// * Instead of doing in->next() in a loop until we get the whole chunk, we could return the + /// results as soon as in->next() produces them. + /// (But this would make the download_mutex situation much more complex, similar to the + /// FileSegment::State::PARTIALLY_DOWNLOADED and FileSegment::setRemoteFileReader() stuff.) + + Buffer prev_in_buffer = in->internalBuffer(); + SCOPE_EXIT({ in->set(prev_in_buffer.begin(), prev_in_buffer.size()); }); + + size_t pos = 0; + while (pos < chunk_size) + { + char * piece_start = chunk->getChunk()->data + pos; + size_t piece_size = chunk_size - pos; + in->set(piece_start, piece_size); + LOG_INFO(&Poco::Logger::get("asdqwe"), "this {:x}, in {:x}, path {}, size {}, offset {:x}, pos {:x}", reinterpret_cast(this), reinterpret_cast(in.get()), cache_key.path, file_size.value(), cache_key.offset, pos); + if (pos == 0) + in->seek(cache_key.offset, SEEK_SET); + else + chassert(!in->available()); + + if (in->eof()) + throw Exception(ErrorCodes::UNEXPECTED_END_OF_FILE, "File {} ended after {} bytes, but we expected {}", + getFileName(), cache_key.offset + pos, file_size.value()); + + chassert(in->position() >= piece_start && in->buffer().end() <= piece_start + piece_size); + chassert(in->getPosition() == static_cast(cache_key.offset + pos)); + + size_t n = in->available(); + chassert(n); + if (in->position() != piece_start) + memmove(piece_start, in->position(), n); + in->position() += n; + pos += n; + LOG_INFO(&Poco::Logger::get("asdqwe"), "this {:x}, got {:x} bytes", reinterpret_cast(this), n); + } + + chunk->markPrefixPopulated(chunk_size); + } + } + + nextimpl_working_buffer_offset = file_offset_of_buffer_end - cache_key.offset; + working_buffer = Buffer( + chunk->getChunk()->data, + chunk->getChunk()->data + std::min(chunk->getChunk()->size, read_until_position - cache_key.offset)); + pos = working_buffer.begin() + nextimpl_working_buffer_offset; + + if (!internal_buffer.empty()) + { + /// We were given an external buffer to read into. Copy the data into it. + /// Would be nice to avoid this copy, somehow, maybe by making ReadBufferFromRemoteFSGather + /// and AsynchronousBoundedReadBuffer explicitly aware of the page cache. + size_t n = std::min(available(), internal_buffer.size()); + memcpy(internal_buffer.begin(), pos, n); + working_buffer = Buffer(internal_buffer.begin(), internal_buffer.begin() + n); + pos = working_buffer.begin(); + nextimpl_working_buffer_offset = 0; + } + + file_offset_of_buffer_end += available(); + + return true; +} + +} diff --git a/src/IO/CachedInMemoryReadBufferFromFile.h b/src/IO/CachedInMemoryReadBufferFromFile.h new file mode 100644 index 00000000000..300c2e82386 --- /dev/null +++ b/src/IO/CachedInMemoryReadBufferFromFile.h @@ -0,0 +1,41 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class CachedInMemoryReadBufferFromFile : public ReadBufferFromFileBase +{ +public: + /// `in_` must support using external buffer. I.e. we assign its internal_buffer before each next() + /// call and expect the read data to be put into that buffer. + /// `in_` should be seekable and should be able to read the whole file from 0 to in_->getFileSize(); + /// if you set `in_`'s read-until-position bypassing CachedInMemoryReadBufferFromFile then + /// CachedInMemoryReadBufferFromFile will break. + CachedInMemoryReadBufferFromFile(FileChunkAddress cache_key_, PageCachePtr cache_, std::unique_ptr in_, const ReadSettings & settings_); + + String getFileName() const override; + off_t seek(off_t off, int whence) override; + off_t getPosition() override; + size_t getFileOffsetOfBufferEnd() const override; + bool supportsRightBoundedReads() const override { return true; } + void setReadUntilPosition(size_t position) override; + void setReadUntilEnd() override; + +private: + FileChunkAddress cache_key; // .offset is offset of `chunk` start + PageCachePtr cache; + ReadSettings settings; + std::unique_ptr in; + + size_t file_offset_of_buffer_end = 0; + size_t read_until_position; + + std::optional chunk; + + bool nextImpl() override; +}; + +} diff --git a/src/IO/DoubleConverter.h b/src/IO/DoubleConverter.h index 18cbe4e3a1d..45721da5248 100644 --- a/src/IO/DoubleConverter.h +++ b/src/IO/DoubleConverter.h @@ -1,17 +1,13 @@ #pragma once -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdouble-promotion" -#endif #include #include #include -#ifdef __clang__ #pragma clang diagnostic pop -#endif namespace DB diff --git a/src/IO/ReadBuffer.h b/src/IO/ReadBuffer.h index b45bc8f3dbc..00325734354 100644 --- a/src/IO/ReadBuffer.h +++ b/src/IO/ReadBuffer.h @@ -225,11 +225,22 @@ public: * - seek() to a position above the until position (even if you setReadUntilPosition() to a * higher value right after the seek!), * - * Typical implementations discard any current buffers and connections, even if the position is - * adjusted only a little. + * Implementations are recommended to: + * - Allow the read-until-position to go below current position, e.g.: + * // Read block [300, 400) + * setReadUntilPosition(400); + * seek(300); + * next(); + * // Read block [100, 200) + * setReadUntilPosition(200); // oh oh, this is below the current position, but should be allowed + * seek(100); // but now everything's fine again + * next(); + * // (Swapping the order of seek and setReadUntilPosition doesn't help: then it breaks if the order of blocks is reversed.) + * - Check if new read-until-position value is equal to the current value and do nothing in this case, + * so that the caller doesn't have to. * - * Typical usage is to call it right after creating the ReadBuffer, before it started doing any - * work. + * Typical implementations discard any current buffers and connections when the + * read-until-position changes even by a small (nonzero) amount. */ virtual void setReadUntilPosition(size_t /* position */) {} diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index c397689d6ad..f4dc7880be4 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -61,6 +61,7 @@ enum class RemoteFSReadMethod }; class MMappedFileCache; +class PageCache; struct ReadSettings { @@ -102,6 +103,12 @@ struct ReadSettings bool avoid_readthrough_cache_outside_query_context = true; size_t filesystem_cache_segments_batch_size = 20; + //asdqwe assign these two + bool use_page_cache_for_disks_without_file_cache = false; + bool read_from_page_cache_if_exists_otherwise_bypass_cache = false; + bool page_cache_inject_eviction = false; + std::shared_ptr page_cache; + size_t filesystem_cache_max_download_size = (128UL * 1024 * 1024 * 1024); bool skip_download_if_exceeds_query_cache = true; diff --git a/src/IO/S3/Requests.cpp b/src/IO/S3/Requests.cpp index 56d2e44a2c4..50ed2e21bfc 100644 --- a/src/IO/S3/Requests.cpp +++ b/src/IO/S3/Requests.cpp @@ -52,6 +52,20 @@ Aws::Http::HeaderValueCollection CopyObjectRequest::GetRequestSpecificHeaders() return headers; } +void CompleteMultipartUploadRequest::SetAdditionalCustomHeaderValue(const Aws::String& headerName, const Aws::String& headerValue) +{ + // S3's CompleteMultipartUpload doesn't support metadata headers so we skip adding them + if (!headerName.starts_with("x-amz-meta-")) + Model::CompleteMultipartUploadRequest::SetAdditionalCustomHeaderValue(headerName, headerValue); +} + +void UploadPartRequest::SetAdditionalCustomHeaderValue(const Aws::String& headerName, const Aws::String& headerValue) +{ + // S3's UploadPart doesn't support metadata headers so we skip adding them + if (!headerName.starts_with("x-amz-meta-")) + Model::UploadPartRequest::SetAdditionalCustomHeaderValue(headerName, headerValue); +} + Aws::String ComposeObjectRequest::SerializePayload() const { if (component_names.empty()) @@ -70,6 +84,7 @@ Aws::String ComposeObjectRequest::SerializePayload() const return payload_doc.ConvertToString(); } + void ComposeObjectRequest::AddQueryStringParameters(Aws::Http::URI & /*uri*/) const { } diff --git a/src/IO/S3/Requests.h b/src/IO/S3/Requests.h index bfb94a5a67e..7b4c3698f10 100644 --- a/src/IO/S3/Requests.h +++ b/src/IO/S3/Requests.h @@ -107,10 +107,20 @@ using ListObjectsV2Request = ExtendedRequest; using ListObjectsRequest = ExtendedRequest; using GetObjectRequest = ExtendedRequest; +class UploadPartRequest : public ExtendedRequest +{ +public: + void SetAdditionalCustomHeaderValue(const Aws::String& headerName, const Aws::String& headerValue) override; +}; + +class CompleteMultipartUploadRequest : public ExtendedRequest +{ +public: + void SetAdditionalCustomHeaderValue(const Aws::String& headerName, const Aws::String& headerValue) override; +}; + using CreateMultipartUploadRequest = ExtendedRequest; -using CompleteMultipartUploadRequest = ExtendedRequest; using AbortMultipartUploadRequest = ExtendedRequest; -using UploadPartRequest = ExtendedRequest; using UploadPartCopyRequest = ExtendedRequest; using PutObjectRequest = ExtendedRequest; diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index f438990fd1c..8b743e6351b 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -39,15 +39,11 @@ #include #include -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-parameter" #pragma clang diagnostic ignored "-Wsign-compare" -#endif #include -#ifdef __clang__ #pragma clang diagnostic pop -#endif #include diff --git a/src/IO/readFloatText.h b/src/IO/readFloatText.h index 51964636389..597f0a06fb9 100644 --- a/src/IO/readFloatText.h +++ b/src/IO/readFloatText.h @@ -6,14 +6,10 @@ #include #include -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunneeded-internal-declaration" -#endif #include -#ifdef __clang__ #pragma clang diagnostic pop -#endif /** Methods for reading floating point numbers from text with decimal representation. * There are "precise", "fast" and "simple" implementations. diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index ddfb01d2403..ad2a8cd65b5 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -294,6 +295,7 @@ struct ContextSharedPart : boost::noncopyable mutable MarkCachePtr index_mark_cache TSA_GUARDED_BY(mutex); /// Cache of marks in compressed files of MergeTree indices. mutable MMappedFileCachePtr mmap_cache TSA_GUARDED_BY(mutex); /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads. AsynchronousMetrics * asynchronous_metrics TSA_GUARDED_BY(mutex) = nullptr; /// Points to asynchronous metrics + mutable PageCachePtr page_cache TSA_GUARDED_BY(mutex); /// Userspace page cache. ProcessList process_list; /// Executing queries at the moment. SessionTracker session_tracker; GlobalOvercommitTracker global_overcommit_tracker; @@ -1251,7 +1253,7 @@ void Context::setUser(const UUID & user_id_, const std::optional() and other AccessControl's functions may require some IO work, - /// so Context::getLock() must be unlocked while we're doing this. + /// so Context::getLocalLock() and Context::getGlobalLock() must be unlocked while we're doing this. auto & access_control = getAccessControl(); auto user = access_control.read(user_id_); @@ -1381,7 +1383,7 @@ void Context::checkAccess(const AccessRightsElements & elements) const { return std::shared_ptr Context::getAccess() const { - /// A helper function to collect parameters for calculating access rights, called with Context::getLock() acquired. + /// A helper function to collect parameters for calculating access rights, called with Context::getLocalSharedLock() acquired. auto get_params = [this]() { /// If setUserID() was never called then this must be the global context with the full access. @@ -1408,7 +1410,8 @@ std::shared_ptr Context::getAccess() const } /// Calculate new access rights according to the collected parameters. - /// NOTE: AccessControl::getContextAccess() may require some IO work, so Context::getLock() must be unlocked while we're doing this. + /// NOTE: AccessControl::getContextAccess() may require some IO work, so Context::getLocalLock() + /// and Context::getGlobalLock() must be unlocked while we're doing this. auto res = getAccessControl().getContextAccess(*params); { @@ -2737,6 +2740,33 @@ void Context::clearUncompressedCache() const shared->uncompressed_cache->clear(); } +void Context::setPageCache(size_t bytes_per_chunk, size_t bytes_per_mmap, size_t bytes_total, bool use_madv_free, bool use_huge_pages) +{ + std::lock_guard lock(shared->mutex); + + if (shared->page_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Page cache has been already created."); + + shared->page_cache = std::make_shared(bytes_per_chunk, bytes_per_mmap, bytes_total, use_madv_free, use_huge_pages); +} + +PageCachePtr Context::getPageCache() const +{ + SharedLockGuard lock(shared->mutex); + return shared->page_cache; +} + +void Context::dropPageCache() const +{ + PageCachePtr cache; + { + SharedLockGuard lock(shared->mutex); + cache = shared->page_cache; + } + if (cache) + cache->dropCache(); +} + void Context::setMarkCache(const String & cache_policy, size_t max_cache_size_in_bytes, double size_ratio) { std::lock_guard lock(shared->mutex); @@ -5153,6 +5183,11 @@ ReadSettings Context::getReadSettings() const res.filesystem_cache_max_download_size = settings.filesystem_cache_max_download_size; res.skip_download_if_exceeds_query_cache = settings.skip_download_if_exceeds_query_cache; + res.page_cache = getPageCache(); + res.use_page_cache_for_disks_without_file_cache = settings.use_page_cache_for_disks_without_file_cache; + res.read_from_page_cache_if_exists_otherwise_bypass_cache = settings.read_from_page_cache_if_exists_otherwise_bypass_cache; + res.page_cache_inject_eviction = settings.page_cache_inject_eviction; + res.remote_read_min_bytes_for_seek = settings.remote_read_min_bytes_for_seek; /// Zero read buffer will not make progress. diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 28ebbdb8d81..b2310eaa85d 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -79,6 +79,7 @@ class RefreshSet; class Cluster; class Compiler; class MarkCache; +class PageCache; class MMappedFileCache; class UncompressedCache; class ProcessList; @@ -969,6 +970,10 @@ public: std::shared_ptr getUncompressedCache() const; void clearUncompressedCache() const; + void setPageCache(size_t bytes_per_chunk, size_t bytes_per_mmap, size_t bytes_total, bool use_madv_free, bool use_huge_pages); + std::shared_ptr getPageCache() const; + void dropPageCache() const; + void setMarkCache(const String & cache_policy, size_t max_cache_size_in_bytes, double size_ratio); void updateMarkCacheConfiguration(const Poco::Util::AbstractConfiguration & config); std::shared_ptr getMarkCache() const; diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 73487a0914a..77452936fbd 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1541,7 +1541,7 @@ size_t joinRightColumnsSwitchNullability( } else { - return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); + return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); } } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 86fd5b96ccd..eaa256a16b0 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -780,13 +780,30 @@ InterpreterSelectQuery::InterpreterSelectQuery( result_header = getSampleBlockImpl(); }; + + /// This is a hack to make sure we reanalyze if GlobalSubqueriesVisitor changed allow_experimental_parallel_reading_from_replicas + /// inside the query context (because it doesn't have write access to the main context) + UInt64 parallel_replicas_before_analysis + = context->hasQueryContext() ? context->getQueryContext()->getSettingsRef().allow_experimental_parallel_reading_from_replicas : 0; + /// Conditionally support AST-based PREWHERE optimization. analyze(shouldMoveToPrewhere() && (!settings.query_plan_optimize_prewhere || !settings.query_plan_enable_optimizations)); + bool need_analyze_again = false; bool can_analyze_again = false; + if (context->hasQueryContext()) { + /// As this query can't be executed with parallel replicas, we must reanalyze it + if (context->getQueryContext()->getSettingsRef().allow_experimental_parallel_reading_from_replicas + != parallel_replicas_before_analysis) + { + context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); + context->setSetting("max_parallel_replicas", UInt64{0}); + need_analyze_again = true; + } + /// Check number of calls of 'analyze' function. /// If it is too big, we will not analyze the query again not to have exponential blowup. std::atomic & current_query_analyze_count = context->getQueryContext()->kitchen_sink.analyze_counter; @@ -875,7 +892,7 @@ bool InterpreterSelectQuery::adjustParallelReplicasAfterAnalysis() { /// The query could use trivial count if it didn't use parallel replicas, so let's disable it and reanalyze context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); - context->setSetting("max_parallel_replicas", UInt64{0}); + context->setSetting("max_parallel_replicas", UInt64{1}); LOG_DEBUG(log, "Disabling parallel replicas to be able to use a trivial count optimization"); return true; } diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 292c538c8c7..4bb47a8c9e3 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -460,6 +461,13 @@ BlockIO InterpreterSystemQuery::execute() { throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Not implemented"); } + case Type::DROP_PAGE_CACHE: + { + getContext()->checkAccess(AccessType::SYSTEM_DROP_PAGE_CACHE); + + getContext()->dropPageCache(); + break; + } case Type::DROP_SCHEMA_CACHE: { getContext()->checkAccess(AccessType::SYSTEM_DROP_SCHEMA_CACHE); @@ -1201,6 +1209,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() case Type::DROP_INDEX_UNCOMPRESSED_CACHE: case Type::DROP_FILESYSTEM_CACHE: case Type::SYNC_FILESYSTEM_CACHE: + case Type::DROP_PAGE_CACHE: case Type::DROP_SCHEMA_CACHE: case Type::DROP_FORMAT_SCHEMA_CACHE: case Type::DROP_S3_CLIENT_CACHE: @@ -1209,12 +1218,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() break; } case Type::DROP_DISK_METADATA_CACHE: - case Type::DROP_DISTRIBUTED_CACHE: - case Type::STOP_VIRTUAL_PARTS_UPDATE: - case Type::START_VIRTUAL_PARTS_UPDATE: - { - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Only available in ClickHouse Cloud, https://clickhouse.cloud/"); - } + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Not implemented"); case Type::RELOAD_DICTIONARY: case Type::RELOAD_DICTIONARIES: case Type::RELOAD_EMBEDDED_DICTIONARIES: diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index bdf314f35b9..fe7ccd64ffe 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -9,6 +9,8 @@ #include #include +#include + #include #include @@ -77,6 +79,16 @@ void ServerAsynchronousMetrics::updateImpl(TimePoint update_time, TimePoint curr new_values["MarkCacheFiles"] = { mark_cache->count(), "Total number of mark files cached in the mark cache" }; } + if (auto page_cache = getContext()->getPageCache()) + { + auto rss = page_cache->getResidentSetSize(); + new_values["PageCacheBytes"] = { rss.page_cache_rss, "Userspace page cache memory usage in bytes" }; + new_values["PageCachePinnedBytes"] = { page_cache->getPinnedSize(), "Userspace page cache memory that's currently in use and can't be evicted" }; + + if (rss.unreclaimable_rss.has_value()) + new_values["UnreclaimableRSS"] = { *rss.unreclaimable_rss, "The amount of physical memory used by the server process, in bytes, excluding memory reclaimable by the OS (MADV_FREE)" }; + } + if (auto uncompressed_cache = getContext()->getUncompressedCache()) { new_values["UncompressedCacheBytes"] = { uncompressed_cache->sizeInBytes(), diff --git a/src/Interpreters/examples/hash_map_string.cpp b/src/Interpreters/examples/hash_map_string.cpp index f3ec104a5f7..f30a9a4cac1 100644 --- a/src/Interpreters/examples/hash_map_string.cpp +++ b/src/Interpreters/examples/hash_map_string.cpp @@ -20,9 +20,7 @@ #include #include -#if defined(__clang__) - #pragma clang diagnostic ignored "-Wgnu-anonymous-struct" -#endif +#pragma clang diagnostic ignored "-Wgnu-anonymous-struct" struct CompactStringRef diff --git a/src/Interpreters/tests/gtest_page_cache.cpp b/src/Interpreters/tests/gtest_page_cache.cpp new file mode 100644 index 00000000000..1e2688c0ca2 --- /dev/null +++ b/src/Interpreters/tests/gtest_page_cache.cpp @@ -0,0 +1,267 @@ +#include +#include +#include + +#ifdef OS_LINUX +#include +#endif + +using namespace DB; + +namespace ProfileEvents +{ + extern const Event PageCacheChunkMisses; + extern const Event PageCacheChunkShared; + extern const Event PageCacheChunkDataHits; + extern const Event PageCacheChunkDataPartialHits; + extern const Event PageCacheChunkDataMisses; +} + +#define CHECK(x) \ + do { \ + if (!(x)) \ + { \ + std::cerr << "check on line " << __LINE__ << " failed: " << #x << std::endl; \ + std::abort(); \ + } \ + } while (false) + +size_t estimateRAMSize() +{ +#ifdef OS_LINUX + struct sysinfo info; + int r = sysinfo(&info); + CHECK(r == 0); + return static_cast(info.totalram * info.mem_unit); +#else + return 128ul << 30; +#endif +} + +/// Do random reads and writes in PageCache from multiple threads, check that the data read matches the data written. +TEST(PageCache, DISABLED_Stress) +{ + /// There doesn't seem to be a reasonable way to simulate memory pressure or force the eviction of MADV_FREE-d pages. + /// So we actually map more virtual memory than we have RAM and fill it all up a few times. + /// This takes an eternity (a few minutes), but idk how else to hit MADV_FREE eviction. + /// Expect ~1 GB/s, bottlenecked by page faults. + size_t ram_size = estimateRAMSize(); + PageCache cache(2 << 20, 1 << 30, ram_size + ram_size / 10, /* use_madv_free */ true, /* use_huge_pages */ true); + + CHECK(cache.getResidentSetSize().page_cache_rss); + + const size_t num_keys = static_cast(cache.maxChunks() * 1.5); + const size_t pages_per_chunk = cache.chunkSize() / cache.pageSize(); + const size_t items_per_page = cache.pageSize() / 8; + + const size_t passes = 2; + const size_t step = 20; + const size_t num_threads = 20; + const size_t chunks_touched = num_keys * passes * num_threads / step; + std::atomic progress {0}; + std::atomic threads_finished {0}; + + std::atomic total_racing_writes {0}; + + auto thread_func = [&] + { + pcg64 rng(randomSeed()); + std::vector pinned; + + /// Stats. + size_t racing_writes = 0; + + for (size_t i = 0; i < num_keys * passes; i += step) + { + progress += 1; + + /// Touch the chunks sequentially + noise (to increase interference across threads), or at random 10% of the time. + size_t key_idx; + if (rng() % 10 == 0) + key_idx = std::uniform_int_distribution(0, num_keys - 1)(rng); + else + key_idx = (i + std::uniform_int_distribution(0, num_keys / 1000)(rng)) % num_keys; + + /// For some keys, always use detached_if_missing = true and check that cache always misses. + bool key_detached_if_missing = key_idx % 100 == 42; + bool detached_if_missing = key_detached_if_missing || i % 101 == 42; + + PageCacheKey key = key_idx * 0xcafebabeb0bad00dul; // a simple reversible hash (the constant can be any odd number) + + PinnedPageChunk chunk = cache.getOrSet(key, detached_if_missing, /* inject_eviction */ false); + + if (key_detached_if_missing) + CHECK(!chunk.getChunk()->pages_populated.any()); + + for (size_t page_idx = 0; page_idx < pages_per_chunk; ++page_idx) + { + bool populated = chunk.getChunk()->pages_populated.get(page_idx); + /// Generate page contents deterministically from key and page index. + size_t start = key_idx * page_idx; + if (start % 37 == 13) + { + /// Leave ~1/37 of the pages unpopulated. + CHECK(!populated); + } + else + { + /// We may write/read the same memory from multiple threads in parallel here. + std::atomic * items = reinterpret_cast *>(chunk.getChunk()->data + cache.pageSize() * page_idx); + if (populated) + { + for (size_t j = 0; j < items_per_page; ++j) + CHECK(items[j].load(std::memory_order_relaxed) == start + j); + } + else + { + for (size_t j = 0; j < items_per_page; ++j) + items[j].store(start + j, std::memory_order_relaxed); + if (!chunk.markPagePopulated(page_idx)) + racing_writes += 1; + } + } + } + + pinned.push_back(std::move(chunk)); + CHECK(cache.getPinnedSize() >= cache.chunkSize()); + /// Unpin 2 chunks on average. + while (rng() % 3 != 0 && !pinned.empty()) + { + size_t idx = rng() % pinned.size(); + if (idx != pinned.size() - 1) + pinned[idx] = std::move(pinned.back()); + pinned.pop_back(); + } + } + + total_racing_writes += racing_writes; + threads_finished += 1; + }; + + std::cout << fmt::format("doing {:.1f} passes over {:.1f} GiB of virtual memory\nthis will take a few minutes, progress printed every 10 seconds", + chunks_touched * 1. / cache.maxChunks(), cache.maxChunks() * cache.chunkSize() * 1. / (1ul << 30)) << std::endl; + + auto start_time = std::chrono::steady_clock::now(); + + std::vector threads; + for (size_t i = 0; i < num_threads; ++i) + threads.emplace_back(thread_func); + + for (size_t poll = 0;; ++poll) + { + if (threads_finished == num_threads) + break; + if (poll % 100 == 0) + std::cout << fmt::format("{:.3f}%", progress.load() * 100. / num_keys / passes / num_threads * step) << std::endl; + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } + for (std::thread & t : threads) + t.join(); + + auto end_time = std::chrono::steady_clock::now(); + double elapsed_seconds = std::chrono::duration_cast>(end_time - start_time).count(); + double touched_gib = chunks_touched * cache.chunkSize() * 1. / (1ul << 30); + std::cout << fmt::format("touched {:.1f} GiB in {:.1f} seconds, that's {:.3f} GiB/s", + touched_gib, elapsed_seconds, touched_gib / elapsed_seconds) << std::endl; + + auto & counters = CurrentThread::getProfileEvents(); + + std::cout << "stats:" + << "\nchunk misses: " << counters[ProfileEvents::PageCacheChunkMisses].load() + << "\nchunk shared: " << counters[ProfileEvents::PageCacheChunkShared].load() + << "\nchunk data misses: " << counters[ProfileEvents::PageCacheChunkDataMisses].load() + << "\nchunk data partial hits: " << counters[ProfileEvents::PageCacheChunkDataPartialHits].load() + << "\nchunk data hits: " << counters[ProfileEvents::PageCacheChunkDataHits].load() + << "\nracing page writes: " << total_racing_writes << std::endl; + + /// Check that we at least hit all the cases. + CHECK(counters[ProfileEvents::PageCacheChunkMisses].load() > 0); + CHECK(counters[ProfileEvents::PageCacheChunkShared].load() > 0); + CHECK(counters[ProfileEvents::PageCacheChunkDataMisses].load() > 0); + /// Partial hits are rare enough that sometimes this is zero, so don't check it. + /// That's good news because we don't need to implement downloading parts of a chunk. + /// CHECK(counters[ProfileEvents::PageCacheChunkDataPartialHits].load() > 0); + CHECK(counters[ProfileEvents::PageCacheChunkDataHits].load() > 0); + CHECK(total_racing_writes > 0); + CHECK(cache.getPinnedSize() == 0); + + size_t rss = cache.getResidentSetSize().page_cache_rss; + std::cout << "RSS: " << rss * 1. / (1ul << 30) << " GiB" << std::endl; + /// This can be flaky if the system has < 10% free memory. If this turns out to be a problem, feel free to remove or reduce. + CHECK(rss > ram_size / 10); + + cache.dropCache(); + +#ifdef OS_LINUX + /// MADV_DONTNEED is not synchronous, and we're freeing lots of pages. Let's give Linux a lot of time. + std::this_thread::sleep_for(std::chrono::seconds(10)); + size_t new_rss = cache.getResidentSetSize().page_cache_rss; + std::cout << "RSS after dropping cache: " << new_rss * 1. / (1ul << 30) << " GiB" << std::endl; + CHECK(new_rss < rss / 2); +#endif +} + +/// Benchmark that measures the PageCache overhead for cache hits. Doesn't touch the actual data, so +/// memory bandwidth mostly doesn't factor into this. +/// This measures the overhead of things like madvise(MADV_FREE) and probing the pages (restoreChunkFromLimbo()). +/// Disabled in CI, run manually with --gtest_also_run_disabled_tests --gtest_filter=PageCache.DISABLED_HitsBench +TEST(PageCache, DISABLED_HitsBench) +{ + /// Do a few runs, with and without MADV_FREE. + for (size_t num_threads = 1; num_threads <= 16; num_threads *= 2) + { + for (size_t run = 0; run < 8; ++ run) + { + bool use_madv_free = run % 2 == 1; + bool use_huge_pages = run % 4 / 2 == 1; + + PageCache cache(2 << 20, 1ul << 30, 20ul << 30, use_madv_free, use_huge_pages); + size_t passes = 3; + std::atomic total_misses {0}; + + /// Prepopulate all chunks. + for (size_t i = 0; i < cache.maxChunks(); ++i) + { + PageCacheKey key = i * 0xcafebabeb0bad00dul; + PinnedPageChunk chunk = cache.getOrSet(key, /* detache_if_missing */ false, /* inject_eviction */ false); + memset(chunk.getChunk()->data, 42, chunk.getChunk()->size); + chunk.markPrefixPopulated(cache.chunkSize()); + } + + auto thread_func = [&] + { + pcg64 rng(randomSeed()); + size_t misses = 0; + for (size_t i = 0; i < cache.maxChunks() * passes; ++i) + { + PageCacheKey key = rng() % cache.maxChunks() * 0xcafebabeb0bad00dul; + PinnedPageChunk chunk = cache.getOrSet(key, /* detache_if_missing */ false, /* inject_eviction */ false); + if (!chunk.isPrefixPopulated(cache.chunkSize())) + misses += 1; + } + total_misses += misses; + }; + + auto start_time = std::chrono::steady_clock::now(); + + std::vector threads; + for (size_t i = 0; i < num_threads; ++i) + threads.emplace_back(thread_func); + + for (std::thread & t : threads) + t.join(); + + auto end_time = std::chrono::steady_clock::now(); + double elapsed_seconds = std::chrono::duration_cast>(end_time - start_time).count(); + double fetched_gib = cache.chunkSize() * cache.maxChunks() * passes * 1. / (1ul << 30); + std::cout << fmt::format( + "threads {}, run {}, use_madv_free = {}, use_huge_pages = {}\nrequested {:.1f} GiB in {:.1f} seconds\n" + "that's {:.1f} GiB/s, or overhead of {:.3}us/{:.1}MiB\n", + num_threads, run, use_madv_free, use_huge_pages, fetched_gib, elapsed_seconds, fetched_gib / elapsed_seconds, + elapsed_seconds * 1e6 / cache.maxChunks() / passes, cache.chunkSize() * 1. / (1 << 20)) << std::endl; + + if (total_misses != 0) + std::cout << "!got " << total_misses.load() << " misses! perhaps your system doesn't have enough free memory, consider decreasing cache size in the benchmark code" << std::endl; + } + } +} diff --git a/src/NOTICE b/src/NOTICE index c68280b1529..4e5f66c65c9 100644 --- a/src/NOTICE +++ b/src/NOTICE @@ -13,18 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -Common/ErrorCodes.cpp -Common/UInt128.h -Core/Block.h -Core/Defines.h -Core/Settings.h -Databases/DatabasesCommon.cpp -IO/WriteBufferValidUTF8.cpp -Interpreters/InterpreterAlterQuery.cpp Interpreters/InterpreterCreateQuery.cpp Interpreters/InterpreterFactory.cpp Parsers/ASTAlterQuery.cpp -Parsers/ASTAlterQuery.h Parsers/ASTCreateQuery.cpp Parsers/ASTCreateQuery.h Parsers/ParserAlterQuery.cpp diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 605cc4ade42..a93ad1d1746 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -60,8 +60,6 @@ ASTPtr ASTAlterCommand::clone() const res->settings_resets = res->children.emplace_back(settings_resets->clone()).get(); if (select) res->select = res->children.emplace_back(select->clone()).get(); - if (values) - res->values = res->children.emplace_back(values->clone()).get(); if (rename_to) res->rename_to = res->children.emplace_back(rename_to->clone()).get(); @@ -518,7 +516,6 @@ void ASTAlterCommand::forEachPointerToChild(std::function f) f(reinterpret_cast(&settings_changes)); f(reinterpret_cast(&settings_resets)); f(reinterpret_cast(&select)); - f(reinterpret_cast(&values)); f(reinterpret_cast(&rename_to)); } diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h index 867ebf26194..1799b75fce4 100644 --- a/src/Parsers/ASTAlterQuery.h +++ b/src/Parsers/ASTAlterQuery.h @@ -166,9 +166,6 @@ public: /// For MODIFY_SQL_SECURITY IAST * sql_security = nullptr; - /// In ALTER CHANNEL, ADD, DROP, SUSPEND, RESUME, REFRESH, MODIFY queries, the list of live views is stored here - IAST * values = nullptr; - /// Target column name IAST * rename_to = nullptr; diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index e8ccb8e9377..1315ea5784c 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -348,13 +348,6 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : "") << quoteString(*attach_from_path); - if (live_view_periodic_refresh) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH" << (settings.hilite ? hilite_none : "") - << (settings.hilite ? hilite_keyword : "") << " PERIODIC REFRESH " << (settings.hilite ? hilite_none : "") - << *live_view_periodic_refresh; - } - formatOnCluster(settings); } else diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index aeb84d754e3..64e6bc8ce48 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -122,7 +122,6 @@ public: ASTDictionary * dictionary = nullptr; /// dictionary definition (layout, primary key, etc.) ASTRefreshStrategy * refresh_strategy = nullptr; // For CREATE MATERIALIZED VIEW ... REFRESH ... - std::optional live_view_periodic_refresh; /// For CREATE LIVE VIEW ... WITH [PERIODIC] REFRESH ... bool is_watermark_strictly_ascending{false}; /// STRICTLY ASCENDING WATERMARK STRATEGY FOR WINDOW VIEW bool is_watermark_ascending{false}; /// ASCENDING WATERMARK STRATEGY FOR WINDOW VIEW diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index 66f949ae3b5..63311a70e42 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -172,8 +172,6 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState & s case Type::START_PULLING_REPLICATION_LOG: case Type::STOP_CLEANUP: case Type::START_CLEANUP: - case Type::START_VIRTUAL_PARTS_UPDATE: - case Type::STOP_VIRTUAL_PARTS_UPDATE: { if (table) { @@ -296,12 +294,6 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState & s } break; } - case Type::DROP_DISTRIBUTED_CACHE: - { - if (!distributed_cache_servive_id.empty()) - settings.ostr << (settings.hilite ? hilite_none : "") << " " << distributed_cache_servive_id; - break; - } case Type::UNFREEZE: { print_keyword(" WITH NAME "); @@ -423,6 +415,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState & s case Type::STOP_THREAD_FUZZER: case Type::START_VIEWS: case Type::STOP_VIEWS: + case Type::DROP_PAGE_CACHE: break; case Type::UNKNOWN: case Type::END: diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index b6fa790315e..48be7f6b84f 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -30,8 +30,8 @@ public: DROP_QUERY_CACHE, DROP_COMPILED_EXPRESSION_CACHE, DROP_FILESYSTEM_CACHE, - DROP_DISTRIBUTED_CACHE, DROP_DISK_METADATA_CACHE, + DROP_PAGE_CACHE, DROP_SCHEMA_CACHE, DROP_FORMAT_SCHEMA_CACHE, DROP_S3_CLIENT_CACHE, @@ -99,8 +99,6 @@ public: STOP_VIEWS, CANCEL_VIEW, TEST_VIEW, - STOP_VIRTUAL_PARTS_UPDATE, - START_VIRTUAL_PARTS_UPDATE, END }; @@ -129,8 +127,6 @@ public: UInt64 seconds{}; String filesystem_cache_name; - String distributed_cache_servive_id; - std::string key_to_drop; std::optional offset_to_drop; diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 6dba5a9c31f..235d5782630 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -9,10 +9,8 @@ #include #include -#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wc99-extensions" -#endif namespace DB { @@ -297,6 +295,4 @@ protected: } -#ifdef __clang__ #pragma clang diagnostic pop -#endif diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index 495e91b96d5..b1cc7622e00 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -138,7 +138,6 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserList parser_reset_setting( std::make_unique(), std::make_unique(TokenType::Comma), /* allow_empty = */ false); - ParserNameList values_p; ParserSelectWithUnionQuery select_p; ParserSQLSecurity sql_security_p; ParserRefreshStrategy refresh_p; @@ -163,7 +162,6 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ASTPtr command_settings_changes; ASTPtr command_settings_resets; ASTPtr command_select; - ASTPtr command_values; ASTPtr command_rename_to; ASTPtr command_sql_security; @@ -944,8 +942,6 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->settings_resets = command->children.emplace_back(std::move(command_settings_resets)).get(); if (command_select) command->select = command->children.emplace_back(std::move(command_select)).get(); - if (command_values) - command->values = command->children.emplace_back(std::move(command_values)).get(); if (command_sql_security) command->sql_security = command->children.emplace_back(std::move(command_sql_security)).get(); if (command_rename_to) diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 3c86ed6b518..8ebadf4606f 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -917,15 +917,11 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e ASTPtr as_database; ASTPtr as_table; ASTPtr select; - ASTPtr live_view_periodic_refresh; ASTPtr sql_security; String cluster_str; bool attach = false; bool if_not_exists = false; - bool with_and = false; - bool with_timeout = false; - bool with_periodic_refresh = false; if (!s_create.ignore(pos, expected)) { @@ -949,23 +945,6 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e if (!table_name_p.parse(pos, table, expected)) return false; - if (ParserKeyword{"WITH"}.ignore(pos, expected)) - { - if (ParserKeyword{"REFRESH"}.ignore(pos, expected) || ParserKeyword{"PERIODIC REFRESH"}.ignore(pos, expected)) - { - if (!ParserNumber{}.parse(pos, live_view_periodic_refresh, expected)) - live_view_periodic_refresh = std::make_shared(static_cast(60)); - - with_periodic_refresh = true; - } - - else if (with_and) - return false; - - if (!with_timeout && !with_periodic_refresh) - return false; - } - if (ParserKeyword{"ON"}.ignore(pos, expected)) { if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) @@ -1028,9 +1007,6 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); - if (live_view_periodic_refresh) - query->live_view_periodic_refresh.emplace(live_view_periodic_refresh->as().value.safeGet()); - if (comment) query->set(query->comment, comment); diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index facf1f8b820..a50e65aa134 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -14,6 +14,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int SUPPORT_IS_DISABLED; +} + [[nodiscard]] static bool parseQueryWithOnClusterAndMaybeTable(std::shared_ptr & res, IParser::Pos & pos, Expected & expected, bool require_table, bool allow_string_literal) { @@ -392,8 +397,6 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & case Type::START_PULLING_REPLICATION_LOG: case Type::STOP_CLEANUP: case Type::START_CLEANUP: - case Type::STOP_VIRTUAL_PARTS_UPDATE: - case Type::START_VIRTUAL_PARTS_UPDATE: if (!parseQueryWithOnCluster(res, pos, expected)) return false; parseDatabaseAndTableAsAST(pos, expected, res->database, res->table); @@ -467,15 +470,6 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & return false; break; } - case Type::DROP_DISTRIBUTED_CACHE: - { - ParserLiteral parser; - ASTPtr ast; - if (!parser.parse(pos, ast, expected)) - return false; - res->distributed_cache_servive_id = ast->as()->value.safeGet(); - break; - } case Type::SYNC_FILESYSTEM_CACHE: { ParserLiteral path_parser; @@ -488,9 +482,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & } case Type::DROP_DISK_METADATA_CACHE: { - if (!parseQueryWithOnClusterAndTarget(res, pos, expected, SystemQueryTargetType::Disk)) - return false; - break; + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Not implemented"); } case Type::DROP_SCHEMA_CACHE: { diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index 345bec395b2..d27002197d2 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -23,7 +23,6 @@ struct PullingAsyncPipelineExecutor::Data std::atomic_bool is_finished = false; std::atomic_bool has_exception = false; ThreadFromGlobalPool thread; - Poco::Event finish_event; ~Data() { @@ -89,12 +88,10 @@ static void threadFunction( data.has_exception = true; /// Finish lazy format in case of exception. Otherwise thread.join() may hung. - if (data.lazy_format) - data.lazy_format->finalize(); + data.lazy_format->finalize(); } data.is_finished = true; - data.finish_event.set(); } @@ -129,20 +126,8 @@ bool PullingAsyncPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds) return false; } - if (lazy_format) - { - chunk = lazy_format->getChunk(milliseconds); - data->rethrowExceptionIfHas(); - return true; - } - - chunk.clear(); - - if (milliseconds) - data->finish_event.tryWait(milliseconds); - else - data->finish_event.wait(); - + chunk = lazy_format->getChunk(milliseconds); + data->rethrowExceptionIfHas(); return true; } @@ -230,14 +215,12 @@ void PullingAsyncPipelineExecutor::cancelWithExceptionHandling(CancelFunc && can Chunk PullingAsyncPipelineExecutor::getTotals() { - return lazy_format ? lazy_format->getTotals() - : Chunk(); + return lazy_format->getTotals(); } Chunk PullingAsyncPipelineExecutor::getExtremes() { - return lazy_format ? lazy_format->getExtremes() - : Chunk(); + return lazy_format->getExtremes(); } Block PullingAsyncPipelineExecutor::getTotalsBlock() @@ -264,15 +247,7 @@ Block PullingAsyncPipelineExecutor::getExtremesBlock() ProfileInfo & PullingAsyncPipelineExecutor::getProfileInfo() { - if (lazy_format) - return lazy_format->getProfileInfo(); - - static ProfileInfo profile_info; - static std::once_flag flag; - /// Calculate rows before limit here to avoid race. - std::call_once(flag, []() { profile_info.getRowsBeforeLimit(); }); - - return profile_info; + return lazy_format->getProfileInfo(); } } diff --git a/src/Processors/Executors/PushingAsyncPipelineExecutor.h b/src/Processors/Executors/PushingAsyncPipelineExecutor.h index 4b4b83a90b5..f976cd4c339 100644 --- a/src/Processors/Executors/PushingAsyncPipelineExecutor.h +++ b/src/Processors/Executors/PushingAsyncPipelineExecutor.h @@ -1,6 +1,5 @@ #pragma once #include -#include #include namespace DB diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 2f011567b90..958e0a326cf 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -57,7 +57,7 @@ namespace ErrorCodes { extern const int INCORRECT_QUERY; extern const int TABLE_WAS_NOT_DROPPED; - extern const int QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW; + extern const int NOT_IMPLEMENTED; extern const int SUPPORT_IS_DISABLED; extern const int UNSUPPORTED_METHOD; } @@ -86,14 +86,14 @@ SelectQueryDescription buildSelectQueryDescription(const ASTPtr & select_query, if (inner_select_with_union_query) { if (inner_select_with_union_query->list_of_selects->children.size() != 1) - throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW, "UNION is not supported for LIVE VIEW"); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "UNION is not supported for LIVE VIEW"); inner_query = inner_select_with_union_query->list_of_selects->children[0]; } auto * inner_select_query = inner_query->as(); if (!inner_select_query) - throw Exception(DB::ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW, + throw Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "LIVE VIEWs are only supported for queries from tables, " "but there is no table name in select query."); @@ -226,29 +226,9 @@ StorageLiveView::StorageLiveView( DatabaseCatalog::instance().addViewDependency(select_query_description.select_table_id, table_id_); - if (query.live_view_periodic_refresh) - { - is_periodically_refreshed = true; - periodic_live_view_refresh = Seconds {*query.live_view_periodic_refresh}; - } - blocks_ptr = std::make_shared(); blocks_metadata_ptr = std::make_shared(); active_ptr = std::make_shared(true); - - periodic_refresh_task = getContext()->getSchedulePool().createTask("LiveViewPeriodicRefreshTask", - [this] - { - try - { - periodicRefreshTaskFunc(); - } - catch (...) - { - tryLogCurrentException(log, "Exception in LiveView periodic refresh task in BackgroundSchedulePool"); - } - }); - periodic_refresh_task->deactivate(); } StorageLiveView::~StorageLiveView() @@ -285,17 +265,12 @@ void StorageLiveView::drop() void StorageLiveView::startup() { - if (is_periodically_refreshed) - periodic_refresh_task->activate(); } void StorageLiveView::shutdown(bool) { shutdown_called = true; - if (is_periodically_refreshed) - periodic_refresh_task->deactivate(); - DatabaseCatalog::instance().removeViewDependency(select_query_description.select_table_id, getStorageID()); } @@ -311,17 +286,7 @@ Pipe StorageLiveView::read( std::lock_guard lock(mutex); if (!(*blocks_ptr)) - { refreshImpl(lock); - } - else if (is_periodically_refreshed) - { - Seconds current_time = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); - Seconds blocks_time = std::chrono::duration_cast(getBlocksTime(lock).time_since_epoch()); - - if ((current_time - periodic_live_view_refresh) >= blocks_time) - refreshImpl(lock); - } return Pipe(std::make_shared(*blocks_ptr, getHeader())); } @@ -362,9 +327,6 @@ Pipe StorageLiveView::watch( if (!(*blocks_ptr)) refreshImpl(lock); - - if (is_periodically_refreshed) - scheduleNextPeriodicRefresh(lock); } processed_stage = QueryProcessingStage::Complete; @@ -746,39 +708,6 @@ bool StorageLiveView::getNewBlocks(const std::lock_guard & lock) return updated; } -void StorageLiveView::periodicRefreshTaskFunc() -{ - LOG_TRACE(log, "periodic refresh task"); - - std::lock_guard lock(mutex); - - if (hasActiveUsers(lock)) - scheduleNextPeriodicRefresh(lock); -} - -void StorageLiveView::scheduleNextPeriodicRefresh(const std::lock_guard & lock) -{ - Seconds current_time = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); - Seconds blocks_time = std::chrono::duration_cast(getBlocksTime(lock).time_since_epoch()); - - if ((current_time - periodic_live_view_refresh) >= blocks_time) - { - refreshImpl(lock); - blocks_time = std::chrono::duration_cast(getBlocksTime(lock).time_since_epoch()); - } - current_time = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); - - auto next_refresh_time = blocks_time + periodic_live_view_refresh; - - if (current_time >= next_refresh_time) - periodic_refresh_task->scheduleAfter(0); - else - { - auto schedule_time = std::chrono::duration_cast (next_refresh_time - current_time); - periodic_refresh_task->scheduleAfter(static_cast(schedule_time.count())); - } -} - void registerStorageLiveView(StorageFactory & factory) { factory.registerStorage("LiveView", [](const StorageFactory::Arguments & args) diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 6b8780cb81b..bf6b13fc837 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -21,6 +21,7 @@ limitations under the License. */ namespace DB { +using BlocksPtrs = std::shared_ptr>; struct BlocksMetadata { @@ -172,11 +173,6 @@ private: /// Read new data blocks that store query result bool getNewBlocks(const std::lock_guard & lock); - void periodicRefreshTaskFunc(); - - /// Must be called with mutex locked - void scheduleNextPeriodicRefresh(const std::lock_guard & lock); - SelectQueryDescription select_query_description; /// Query over the mergeable blocks to produce final result @@ -186,9 +182,6 @@ private: LoggerPtr log; - bool is_periodically_refreshed = false; - Seconds periodic_live_view_refresh; - /// Mutex to protect access to sample block and inner_blocks_query mutable std::mutex sample_block_lock; mutable Block sample_block; @@ -208,9 +201,6 @@ private: MergeableBlocksPtr mergeable_blocks; std::atomic shutdown_called = false; - - /// Periodic refresh task used when [PERIODIC] REFRESH is specified in create statement - BackgroundSchedulePool::TaskHolder periodic_refresh_task; }; } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 39ad28d3dae..a9bdceacef0 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1637,10 +1637,6 @@ bool IMergeTreeDataPart::assertHasValidVersionMetadata() const size_t file_size = getDataPartStorage().getFileSize(TXN_VERSION_METADATA_FILE_NAME); auto buf = getDataPartStorage().readFile(TXN_VERSION_METADATA_FILE_NAME, ReadSettings().adjustBufferSize(file_size), file_size, std::nullopt); - /// FIXME https://github.com/ClickHouse/ClickHouse/issues/48465 - if (dynamic_cast(buf.get())) - return true; - readStringUntilEOF(content, *buf); ReadBufferFromString str_buf{content}; VersionMetadata file; diff --git a/src/Storages/MergeTree/MergeTreeDataPartType.h b/src/Storages/MergeTree/MergeTreeDataPartType.h index 8b06da5167e..5096ee86db1 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartType.h +++ b/src/Storages/MergeTree/MergeTreeDataPartType.h @@ -26,10 +26,8 @@ static E parseEnum(const String & str) /// It's a bug in clang with three-way comparison operator /// https://github.com/llvm/llvm-project/issues/55919 -#ifdef __clang__ - #pragma clang diagnostic push - #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" /// Types of data part format. class MergeTreeDataPartType @@ -86,9 +84,7 @@ private: Value value; }; -#ifdef __clang__ - #pragma clang diagnostic pop -#endif +#pragma clang diagnostic pop struct MergeTreeDataPartFormat { diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index da90dbb4076..33406168974 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -68,14 +68,10 @@ #include -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" #include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif +#pragma clang diagnostic pop namespace fs = std::filesystem; @@ -727,7 +723,7 @@ std::unique_ptr StorageS3Source::createAsyncS3ReadBuffer( auto context = getContext(); auto read_buffer_creator = [this, read_settings, object_size] - (const std::string & path, size_t read_until_position) -> std::unique_ptr + (bool restricted_seek, const std::string & path) -> std::unique_ptr { return std::make_unique( client, @@ -738,21 +734,25 @@ std::unique_ptr StorageS3Source::createAsyncS3ReadBuffer( read_settings, /* use_external_buffer */true, /* offset */0, - read_until_position, - /* restricted_seek */true, + /* read_until_position */0, + restricted_seek, object_size); }; + auto modified_settings{read_settings}; + /// User's S3 object may change, don't cache it. + modified_settings.use_page_cache_for_disks_without_file_cache = false; + + /// FIXME: Changing this setting to default value breaks something around parquet reading + modified_settings.remote_read_min_bytes_for_seek = modified_settings.remote_fs_buffer_size; + auto s3_impl = std::make_unique( std::move(read_buffer_creator), StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, + "", read_settings, /* cache_log */nullptr, /* use_external_buffer */true); - auto modified_settings{read_settings}; - /// FIXME: Changing this setting to default value breaks something around parquet reading - modified_settings.remote_read_min_bytes_for_seek = modified_settings.remote_fs_buffer_size; - auto & pool_reader = context->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER); auto async_reader = std::make_unique( std::move(s3_impl), pool_reader, modified_settings, diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt index 796ca6bca22..e819e134706 100644 --- a/tests/analyzer_integration_broken_tests.txt +++ b/tests/analyzer_integration_broken_tests.txt @@ -3,5 +3,4 @@ test_concurrent_backups_s3/test.py::test_concurrent_backups test_distributed_type_object/test.py::test_distributed_type_object test_merge_table_over_distributed/test.py::test_global_in test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_over_distributed -test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster test_select_access_rights/test_main.py::test_alias_columns diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index 0672d3085fe..dc6284d20c5 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -13,11 +13,9 @@ 01952_optimize_distributed_group_by_sharding_key 02174_cte_scalar_cache_mv 02354_annoy -02493_inconsistent_hex_and_binary_number # Check after constants refactoring 02901_parallel_replicas_rollup # Flaky. Please don't delete them without fixing them: 01287_max_execution_speed 02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET 02404_memory_bound_merging -02479_race_condition_between_insert_and_droppin_mv diff --git a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh index 6ba0987010a..b72bce4a677 100644 --- a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh +++ b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh @@ -29,34 +29,45 @@ if [ -e "$PACKAGE.zip" ] && [ -z "$FORCE" ]; then [ -n "$REBUILD" ] || exit 0 fi +docker_cmd=( + docker run -i --net=host --rm --user="${UID}" -e HOME=/tmp --entrypoint=/bin/bash + --volume="${WORKDIR}/..:/ci" --workdir="/ci/${DIR_NAME}" "${DOCKER_IMAGE}" +) rm -rf "$PACKAGE" "$PACKAGE".zip mkdir "$PACKAGE" cp app.py "$PACKAGE" if [ -f requirements.txt ]; then VENV=lambda-venv rm -rf "$VENV" - docker run --net=host --rm --user="${UID}" -e HOME=/tmp --entrypoint=/bin/bash \ - --volume="${WORKDIR}/..:/ci" --workdir="/ci/${DIR_NAME}" "${DOCKER_IMAGE}" \ - -exc " - '$PY_EXEC' -m venv '$VENV' && - source '$VENV/bin/activate' && - pip install -r requirements.txt && - # To have consistent pyc files - find '$VENV/lib' -name '*.pyc' -delete - find '$VENV/lib' ! -type d -exec touch -t 201212121212 {} + - python -m compileall - " - cp -rT "$VENV/lib/$PY_EXEC/site-packages/" "$PACKAGE" - rm -r "$PACKAGE"/{pip,pip-*,setuptools,setuptools-*} - # zip stores metadata about timestamps - find "$PACKAGE" ! -type d -exec touch -t 201212121212 {} + + "${docker_cmd[@]}" -ex <& cl server_pid=$! trap cleanup EXIT +# Shellcheck wrongly process "trap" https://www.shellcheck.net/wiki/SC2317 +# shellcheck disable=SC2317 function cleanup() { - kill -9 $server_pid - kill -9 $client_pid + kill -9 "$server_pid" + kill -9 "$client_pid" echo "Test failed. Server log:" cat clickhouse-server.log diff --git a/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect b/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect index 2d595b0f492..1cc11f9bf9f 100755 --- a/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect +++ b/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect @@ -18,23 +18,23 @@ spawn bash send "source $basedir/../shell_config.sh\r" send "\$CLICKHOUSE_CLIENT --query 'select 0b'\r" -expect "DB::Exception: Missing columns: '0b' while processing query: 'SELECT `0b`', required columns: '0b'. (UNKNOWN_IDENTIFIER)" +expect "(UNKNOWN_IDENTIFIER)" send "\$CLICKHOUSE_CLIENT --query 'select 0b;'\r" -expect "DB::Exception: Missing columns: '0b' while processing query: 'SELECT `0b`', required columns: '0b'. (UNKNOWN_IDENTIFIER)" +expect "(UNKNOWN_IDENTIFIER)" send "\$CLICKHOUSE_CLIENT --query 'select 0b ;'\r" -expect "DB::Exception: Missing columns: '0b' while processing query: 'SELECT `0b`', required columns: '0b'. (UNKNOWN_IDENTIFIER)" +expect "(UNKNOWN_IDENTIFIER)" send "\$CLICKHOUSE_CLIENT --query 'select 0x'\r" -expect "DB::Exception: Missing columns: '0x' while processing query: 'SELECT `0x`', required columns: '0x'. (UNKNOWN_IDENTIFIER)" +expect "(UNKNOWN_IDENTIFIER)" send "\$CLICKHOUSE_CLIENT --query 'select 0x;'\r" -expect "DB::Exception: Missing columns: '0x' while processing query: 'SELECT `0x`', required columns: '0x'. (UNKNOWN_IDENTIFIER)" +expect "(UNKNOWN_IDENTIFIER)" send "\$CLICKHOUSE_CLIENT --query 'select 0x ;'\r" -expect "DB::Exception: Missing columns: '0x' while processing query: 'SELECT `0x`', required columns: '0x'. (UNKNOWN_IDENTIFIER)" +expect "(UNKNOWN_IDENTIFIER)" send "exit\r" expect eof diff --git a/tests/queries/0_stateless/02867_page_cache.reference b/tests/queries/0_stateless/02867_page_cache.reference new file mode 100644 index 00000000000..5502059508a --- /dev/null +++ b/tests/queries/0_stateless/02867_page_cache.reference @@ -0,0 +1,23 @@ +54975576145920 +PageCacheBytesUnpinnedRoundedToHugePages 1 +PageCacheBytesUnpinnedRoundedToPages 1 +PageCacheChunkMisses 1 +ReadBufferFromS3Bytes 1 +54975576145920 +PageCacheBytesUnpinnedRoundedToHugePages 1 +PageCacheBytesUnpinnedRoundedToPages 1 +PageCacheChunkDataHits 1 +54975576145920 +PageCacheBytesUnpinnedRoundedToHugePages 1 +PageCacheBytesUnpinnedRoundedToPages 1 +PageCacheChunkMisses 1 +ReadBufferFromS3Bytes 1 +54975576145920 +PageCacheBytesUnpinnedRoundedToHugePages 1 +PageCacheBytesUnpinnedRoundedToPages 1 +PageCacheChunkMisses 1 +ReadBufferFromS3Bytes 1 +54975576145920 +PageCacheBytesUnpinnedRoundedToHugePages 1 +PageCacheBytesUnpinnedRoundedToPages 1 +PageCacheChunkDataHits 1 diff --git a/tests/queries/0_stateless/02867_page_cache.sql b/tests/queries/0_stateless/02867_page_cache.sql new file mode 100644 index 00000000000..8765b30ebc3 --- /dev/null +++ b/tests/queries/0_stateless/02867_page_cache.sql @@ -0,0 +1,105 @@ +-- Tags: no-fasttest, no-parallel +-- no-fasttest because we need an S3 storage policy +-- no-parallel because we look at server-wide counters about page cache usage + +set use_page_cache_for_disks_without_file_cache = 1; +set page_cache_inject_eviction = 0; +set enable_filesystem_cache = 0; +set use_uncompressed_cache = 0; + +create table events_snapshot engine Memory as select * from system.events; +create view events_diff as + -- round all stats to 70 MiB to leave a lot of leeway for overhead + with if(event like '%Bytes%', 70*1024*1024, 35) as granularity, + -- cache hits counter can vary a lot depending on other settings: + -- e.g. if merge_tree_min_bytes_for_concurrent_read is small, multiple threads will read each chunk + -- so we just check that the value is not too low + if(event in ( + 'PageCacheBytesUnpinnedRoundedToPages', 'PageCacheBytesUnpinnedRoundedToHugePages', + 'PageCacheChunkDataHits'), 1, 1000) as clamp + select event, min2(intDiv(new.value - old.value, granularity), clamp) as diff + from system.events new + left outer join events_snapshot old + on old.event = new.event + where diff != 0 and + event in ( + 'ReadBufferFromS3Bytes', 'PageCacheChunkMisses', 'PageCacheChunkDataMisses', + 'PageCacheChunkDataHits', 'PageCacheChunkDataPartialHits', + 'PageCacheBytesUnpinnedRoundedToPages', 'PageCacheBytesUnpinnedRoundedToHugePages') + order by event; + +drop table if exists page_cache_03055; +create table page_cache_03055 (k Int64 CODEC(NONE)) engine MergeTree order by k settings storage_policy = 's3_cache'; + +-- Write an 80 MiB file (40 x 2 MiB chunks), and a few small files. +system stop merges page_cache_03055; +insert into page_cache_03055 select * from numbers(10485760) settings max_block_size=100000000, preferred_block_size_bytes=1000000000; + +select * from events_diff; +truncate table events_snapshot; +insert into events_snapshot select * from system.events; + +system start merges page_cache_03055; +optimize table page_cache_03055 final; +truncate table events_snapshot; +insert into events_snapshot select * from system.events; + +-- Cold read, should miss cache. (Populating cache on write is not implemented yet.) + +select sum(k) from page_cache_03055; + +select * from events_diff where event not in ('PageCacheChunkDataHits'); +truncate table events_snapshot; +insert into events_snapshot select * from system.events; + +-- Repeat read, should hit cache. + +select sum(k) from page_cache_03055; + +select * from events_diff; +truncate table events_snapshot; +insert into events_snapshot select * from system.events; + +-- Drop cache and read again, should miss. Also don't write to cache. + +system drop page cache; + +select sum(k) from page_cache_03055 settings read_from_page_cache_if_exists_otherwise_bypass_cache = 1; + +-- Data could be read multiple times because we're not writing to cache. +select event, if(event in ('PageCacheChunkMisses', 'ReadBufferFromS3Bytes'), diff >= 1, diff) from events_diff where event not in ('PageCacheChunkDataHits'); +truncate table events_snapshot; +insert into events_snapshot select * from system.events; + +-- Repeat read, should still miss, but populate cache. + +select sum(k) from page_cache_03055; + +select * from events_diff where event not in ('PageCacheChunkDataHits'); +truncate table events_snapshot; +insert into events_snapshot select * from system.events; + +-- Read again, hit the cache. + +select sum(k) from page_cache_03055 settings read_from_page_cache_if_exists_otherwise_bypass_cache = 1; + +select * from events_diff; +truncate table events_snapshot; +insert into events_snapshot select * from system.events; + + +-- Known limitation: cache is not invalidated if a table is dropped and created again at the same path. +-- set allow_deprecated_database_ordinary=1; +-- create database test_03055 engine = Ordinary; +-- create table test_03055.t (k Int64) engine MergeTree order by k settings storage_policy = 's3_cache'; +-- insert into test_03055.t values (1); +-- select * from test_03055.t; +-- drop table test_03055.t; +-- create table test_03055.t (k Int64) engine MergeTree order by k settings storage_policy = 's3_cache'; +-- insert into test_03055.t values (2); +-- select * from test_03055.t; + + +drop table events_snapshot; +drop table page_cache_03055; +drop view events_diff; diff --git a/tests/queries/0_stateless/02972_parallel_replicas_cte.reference b/tests/queries/0_stateless/02972_parallel_replicas_cte.reference index 449fe3d34e3..bbb5a960463 100644 --- a/tests/queries/0_stateless/02972_parallel_replicas_cte.reference +++ b/tests/queries/0_stateless/02972_parallel_replicas_cte.reference @@ -1,3 +1,6 @@ 990000 990000 10 +990000 +1 +1000000 diff --git a/tests/queries/0_stateless/02972_parallel_replicas_cte.sql b/tests/queries/0_stateless/02972_parallel_replicas_cte.sql index 51ce18784da..c9ab83ff9ad 100644 --- a/tests/queries/0_stateless/02972_parallel_replicas_cte.sql +++ b/tests/queries/0_stateless/02972_parallel_replicas_cte.sql @@ -1,5 +1,6 @@ DROP TABLE IF EXISTS pr_1; DROP TABLE IF EXISTS pr_2; +DROP TABLE IF EXISTS numbers_1e6; CREATE TABLE pr_1 (`a` UInt32) ENGINE = MergeTree ORDER BY a PARTITION BY a % 10 AS SELECT 10 * intDiv(number, 10) + 1 FROM numbers(1_000_000); @@ -28,5 +29,53 @@ SETTINGS allow_experimental_analyzer = 0, allow_experimental_parallel_reading_fr SELECT count() FROM pr_2 JOIN numbers(10) as pr_1 ON pr_2.a = pr_1.number SETTINGS allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3; +-- Parallel replicas detection should work inside subqueries +SELECT * +FROM +( + WITH filtered_groups AS (SELECT a FROM pr_1 WHERE a >= 10000) + SELECT count() FROM pr_2 INNER JOIN filtered_groups ON pr_2.a = filtered_groups.a +) +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3; + +-- Subquery + subquery +SELECT count() +FROM +( + SELECT c + 1 + FROM + ( + WITH filtered_groups AS (SELECT a FROM pr_1 WHERE a >= 10000) + SELECT count() as c FROM pr_2 INNER JOIN filtered_groups ON pr_2.a = filtered_groups.a + ) +) +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3; + +CREATE TABLE numbers_1e6 +( + `n` UInt64 +) +ENGINE = MergeTree +ORDER BY n +AS SELECT * FROM numbers(1_000_000); + +-- Same but nested CTE's +WITH + cte1 AS + ( + SELECT n + FROM numbers_1e6 + ), + cte2 AS + ( + SELECT n + FROM numbers_1e6 + WHERE n IN (cte1) + ) +SELECT count() +FROM cte2 +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3; + +DROP TABLE IF EXISTS numbers_1e6; DROP TABLE IF EXISTS pr_1; DROP TABLE IF EXISTS pr_2; diff --git a/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.reference b/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.reference new file mode 100644 index 00000000000..93d120dac01 --- /dev/null +++ b/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.reference @@ -0,0 +1,2 @@ +Too large +Wrong data diff --git a/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.sh b/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.sh new file mode 100755 index 00000000000..93290f62c58 --- /dev/null +++ b/tests/queries/0_stateless/03003_codec_multiple_buffer_overflow.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo -ne 'checksumchecksum\x91\xa4\x0a\x00\x00\x41\x00\x00\x20\x41\x41\x41\x40\x41\x00\x41\x41\x41\x41\x40\x41\x00\x00\x00\x00\x00\x0c\x00\x20\x41\x41\xbe\x22\x41\x41\x41\x41\x41\x00\x00\x00\x00\x00\x01\xfe\x7f\x00\x00\x41\x00\x00\x00\x41\x92\x6b\x00\x41\x41\x0b\x00\x00\x00\x00\x00\x41\x92\x6b\x00\x41\x41\x0b\x00\x00\x82\x82\x82\x82\x63\x82\xff\xff\xff\xff\xff\xff\xff\xff\x95\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x41\x41\x41\x41\x41\x41\x41\x41\x41\x40\x08\x08\x08\x08\x08\x08\x00\x06\x00\x00\x00\x08\x00\x20\x00\x00\xef\xff\xff\xff\xe1\x40\x26\x41\x00\x1d\x01\x00\x00\x41\x42\x0b\xff\xff\xff\xe4\x41\x41\x4e\x41\x41\x06\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x7e\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x00\x04\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x9c\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x4f\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x6c\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\xa9\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x4f\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x5c\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x6c\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x9b\x8f\x8f\x8f\x20\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f' | + ${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}&decompress=1&http_native_compression_disable_checksumming_on_decompress=1" --data-binary @- | grep -o -F 'Too large' + +echo -ne 'checksumchecksum\x91\x2b\x01\x00\x00\xbe\xe1\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x00\x04\x04\x04\x00\x08\x7f\x00\x01\x06\x82\x82\x82\x82\x82\x82\x82\x80\x41\x41\x41\x41\x41\x9a\x75\x6d\x63\x68\x65\x63\x6b\x73\x6d\x63\x68\x65\x63\x6b\x73\x75\x00\x00\x00\x41\x41\x41\x42\x64\x41\x41\x41\x0c\x00\x1c\x41\x41\xbe\x22\x41\x41\x00\x00\x00\x00\x11\x00\x41\x41\x75\x00\x00\x00\x41\x41\x41\x42\x64\x41\x41\x41\x0c\x00\x20\x41\x41\xbe\x22\x41\x41\x41\x41\x41\x00\x00\x00\x00\x00\x01\x14\xff\x7f\x00\x41\x00\x00\x00\x00\x00\x00\x41\x41\x75\x00\x00\x00\x41\x41\x41\x42\x64\x41\x61\x41\x0c\x00\x20\x41\x41\xbe\x22\x41\x41\x41\x00\x41\x14\x14\x41\x14\x14\x14\x14\x14\x14\x14\x14\x14\x14\x14\x0f\x0f\x0f\x0f\x0f\x41\x41\x41\x41\x64\x00\x30\x00\xcf\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x41\x41\x0b\x00\x00\x00\x41\x41\x41\xe8\x1f\xe1\x00\x01\x00\xff\x00\x41\x41\xbf\x41\x41\x40\x40\xe1\x00\x00\x00\x00\x1a\x00\x20\x00\x00\x00\x41\x00\x00\x00\x42\x64\x41\x41\x41\x0c\x00\x1c\x41\x41\xbe\x22\x41\x41\x00\x00\x00\x00\x00\x00\x41\x41\x75\x00\x00\x00\x41\x41\x41\x42\x64\x00\x00\x0b\x00\xe6\xff\x00\x00\x00\x00\x00' | + ${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}&decompress=1&http_native_compression_disable_checksumming_on_decompress=1" --data-binary @- | grep -o -F 'Wrong data' diff --git a/tests/queries/0_stateless/03003_enum_and_string_compatible.reference b/tests/queries/0_stateless/03003_enum_and_string_compatible.reference new file mode 100644 index 00000000000..acf5fe0d423 --- /dev/null +++ b/tests/queries/0_stateless/03003_enum_and_string_compatible.reference @@ -0,0 +1 @@ +['Hello','Goodbye','test'] diff --git a/tests/queries/0_stateless/03003_enum_and_string_compatible.sql b/tests/queries/0_stateless/03003_enum_and_string_compatible.sql new file mode 100644 index 00000000000..0abba6741ac --- /dev/null +++ b/tests/queries/0_stateless/03003_enum_and_string_compatible.sql @@ -0,0 +1 @@ +WITH 'Hello'::Enum8('Hello', 'World') AS enum1, 'test'::Enum8('test', 'best') AS enum2 SELECT [enum1, 'Goodbye', enum2]; diff --git a/tests/queries/0_stateless/03003_sql_json_nonsense.reference b/tests/queries/0_stateless/03003_sql_json_nonsense.reference new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/tests/queries/0_stateless/03003_sql_json_nonsense.reference @@ -0,0 +1 @@ + diff --git a/tests/queries/0_stateless/03003_sql_json_nonsense.sql b/tests/queries/0_stateless/03003_sql_json_nonsense.sql new file mode 100644 index 00000000000..9b7beb42cf3 --- /dev/null +++ b/tests/queries/0_stateless/03003_sql_json_nonsense.sql @@ -0,0 +1 @@ +SELECT JSON_QUERY('{"x":1}', '$[\'hello\']', materialize(toLowCardinality('x'))); diff --git a/utils/check-style/shellcheck-run.sh b/utils/check-style/shellcheck-run.sh index bdb0f681c31..5930e537703 100755 --- a/utils/check-style/shellcheck-run.sh +++ b/utils/check-style/shellcheck-run.sh @@ -2,13 +2,13 @@ ROOT_PATH=$(git rev-parse --show-toplevel) NPROC=$(($(nproc) + 3)) # Check sh tests with Shellcheck -( cd "$ROOT_PATH/tests/queries/0_stateless/" && \ - find "$ROOT_PATH/tests/queries/"{0_stateless,1_stateful} -name '*.sh' -print0 | \ - xargs -0 -P "$NPROC" -n 20 shellcheck --check-sourced --external-sources --severity info --exclude SC1071,SC2086,SC2016 -) +find "$ROOT_PATH/tests/queries/"{0_stateless,1_stateful} -name '*.sh' -print0 | \ + xargs -0 -P "$NPROC" -n 20 shellcheck --check-sourced --external-sources --source-path=SCRIPTDIR \ + --severity info --exclude SC1071,SC2086,SC2016 # Check docker scripts with shellcheck -find "$ROOT_PATH/docker" -executable -type f -exec file -F' ' --mime-type {} \; | \ - awk -F' ' '$2==" text/x-shellscript" {print $1}' | \ +# Do not check sourced files, since it causes broken --source-path=SCRIPTDIR +find "$ROOT_PATH/docker" -type f -exec file -F' ' --mime-type {} + | \ + awk '$2=="text/x-shellscript" {print $1}' | \ grep -v "compare.sh" | \ - xargs -P "$NPROC" -n 20 shellcheck + xargs -P "$NPROC" -n 20 shellcheck --external-sources --source-path=SCRIPTDIR diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 23fc0032056..572ceddf590 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v24.2.1.2248-stable 2024-02-29 v24.1.5.6-stable 2024-02-14 v24.1.4.20-stable 2024-02-14 v24.1.3.31-stable 2024-02-09