diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 7901008a7db..6dd28215d78 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -1254,6 +1254,228 @@ jobs: # shellcheck disable=SC2046 docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestS3Debug0: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_s3_storage_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (debug, s3 storage) + REPO_COPY=${{runner.temp}}/stateless_s3_storage_debug/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=0 + RUN_BY_HASH_TOTAL=3 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestS3Debug1: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_s3_storage_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (debug, s3 storage) + REPO_COPY=${{runner.temp}}/stateless_s3_storage_debug/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=3 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestS3Debug2: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_s3_storage_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (debug, s3 storage) + REPO_COPY=${{runner.temp}}/stateless_s3_storage_debug/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=2 + RUN_BY_HASH_TOTAL=3 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestS3Tsan0: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_s3_storage_tsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (tsan, s3 storage) + REPO_COPY=${{runner.temp}}/stateless_s3_storage_tsan/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=0 + RUN_BY_HASH_TOTAL=3 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestS3Tsan1: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_s3_storage_tsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (tsan, s3 storage) + REPO_COPY=${{runner.temp}}/stateless_s3_storage_tsan/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=3 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestS3Tsan2: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_s3_storage_tsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (tsan, s3 storage) + REPO_COPY=${{runner.temp}}/stateless_s3_storage_tsan/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=2 + RUN_BY_HASH_TOTAL=3 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestAarch64: needs: [BuilderDebAarch64] runs-on: [self-hosted, func-tester-aarch64] @@ -3388,6 +3610,12 @@ jobs: - FunctionalStatefulTestMsan - FunctionalStatefulTestUBsan - FunctionalStatelessTestReleaseS3 + - FunctionalStatelessTestS3Debug0 + - FunctionalStatelessTestS3Debug1 + - FunctionalStatelessTestS3Debug2 + - FunctionalStatelessTestS3Tsan0 + - FunctionalStatelessTestS3Tsan1 + - FunctionalStatelessTestS3Tsan2 - StressTestDebug - StressTestAsan - StressTestTsan diff --git a/base/glibc-compatibility/memcpy/memcpy.h b/base/glibc-compatibility/memcpy/memcpy.h index ff27c970bac..0930dfb5c67 100644 --- a/base/glibc-compatibility/memcpy/memcpy.h +++ b/base/glibc-compatibility/memcpy/memcpy.h @@ -214,4 +214,3 @@ tail: return ret; } - diff --git a/contrib/NuRaft b/contrib/NuRaft index bdba298189e..1be805e7cb2 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit bdba298189e29995892de78dcecf64d127444e81 +Subproject commit 1be805e7cb2494aa8170015493474379b0362dfc diff --git a/contrib/datasketches-cpp b/contrib/datasketches-cpp index 7d73d7610db..7abd49bb2e7 160000 --- a/contrib/datasketches-cpp +++ b/contrib/datasketches-cpp @@ -1 +1 @@ -Subproject commit 7d73d7610db31d4e1ecde0fb3a7ee90ef371207f +Subproject commit 7abd49bb2e72bf9a5029993d31dcb1872da88292 diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt index a501c4df64f..6f42a479588 100644 --- a/contrib/libcxx-cmake/CMakeLists.txt +++ b/contrib/libcxx-cmake/CMakeLists.txt @@ -54,9 +54,8 @@ set(SRCS add_library(cxx ${SRCS}) set_target_properties(cxx PROPERTIES FOLDER "contrib/libcxx-cmake") -target_include_directories(cxx SYSTEM BEFORE PUBLIC - $ - $/src) +target_include_directories(cxx SYSTEM BEFORE PRIVATE $) +target_include_directories(cxx SYSTEM BEFORE PUBLIC $) target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI) # Enable capturing stack traces for all exceptions. diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index ba0a27c9801..b9b0c5c2c6c 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -83,5 +83,8 @@ RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \ --yes --no-install-recommends \ && apt-get clean +# for external_symbolizer_path +RUN ln -s /usr/bin/llvm-symbolizer-15 /usr/bin/llvm-symbolizer + COPY build.sh / CMD ["bash", "-c", "/build.sh 2>&1"] diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index b9e8b89cd92..f4102a6ccaf 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -31,9 +31,6 @@ ARG deb_location_url="" # set non-empty single_binary_location_url to create docker image # from a single binary url (useful for non-standard builds - with sanitizers, for arm64). -# for example (run on aarch64 server): -# docker build . --network host --build-arg single_binary_location_url="https://builds.clickhouse.com/master/aarch64/clickhouse" -t altinity/clickhouse-server:master-testing-arm -# note: clickhouse-odbc-bridge is not supported there. ARG single_binary_location_url="" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index d4da5f0f38c..16372230d91 100755 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -37,7 +37,6 @@ if [ -n "$ERROR_LOG_PATH" ]; then ERROR_LOG_DIR="$(dirname "$ERROR_LOG_PATH")"; FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=format_schema_path || true)" # There could be many disks declared in config -readarray -t FILESYSTEM_CACHE_PATHS < <(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key='storage_configuration.disks.*.data_cache_path' || true) readarray -t DISKS_PATHS < <(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key='storage_configuration.disks.*.path' || true) CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}" @@ -51,7 +50,6 @@ for dir in "$DATA_DIR" \ "$TMP_DIR" \ "$USER_PATH" \ "$FORMAT_SCHEMA_PATH" \ - "${FILESYSTEM_CACHE_PATHS[@]}" \ "${DISKS_PATHS[@]}" do # check if variable not empty diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index f8ecdf1aa21..43dbe08d765 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -3,6 +3,9 @@ # shellcheck disable=SC2086 # shellcheck disable=SC2024 +# Avoid overlaps with previous runs +dmesg --clear + set -x # Thread Fuzzer allows to check more permutations of possible thread scheduling @@ -38,8 +41,10 @@ function install_packages() function configure() { + export ZOOKEEPER_FAULT_INJECTION=1 # install test configs export USE_DATABASE_ORDINARY=1 + export EXPORT_S3_STORAGE_POLICIES=1 /usr/share/clickhouse-test/config/install.sh # we mount tests folder from repo to /usr/share @@ -183,11 +188,11 @@ install_packages package_folder configure azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & -./setup_minio.sh stateful # to have a proper environment +./setup_minio.sh stateless # to have a proper environment start -# shellcheck disable=SC2086 # No quotes because I want to split it into words. +shellcheck disable=SC2086 # No quotes because I want to split it into words. /s3downloader --url-prefix "$S3_URL" --dataset-names $DATASETS chmod 777 -R /var/lib/clickhouse clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary" @@ -200,12 +205,36 @@ start clickhouse-client --query "SHOW TABLES FROM datasets" clickhouse-client --query "SHOW TABLES FROM test" -clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" -clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" -clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" -clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits" + +clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" +clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" +clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8, VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32, Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String, EndURL String, LinkURL String, IsDownload UInt8, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, PlaceID Int32, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), IsYandex UInt8, GoalReachesDepth Int32, GoalReachesURL Int32, GoalReachesAny Int32, SocialSourceNetworkID UInt8, SocialSourcePage String, MobilePhoneModel String, ClientEventTime DateTime, RegionID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RemoteIP UInt32, RemoteIP6 FixedString(16), IPNetworkID UInt32, SilverlightVersion3 UInt32, CodeVersion UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, UserAgentMajor UInt16, UserAgentMinor UInt16, WindowClientWidth UInt16, WindowClientHeight UInt16, SilverlightVersion2 UInt8, SilverlightVersion4 UInt16, FlashVersion3 UInt16, FlashVersion4 UInt16, ClientTimeZone Int16, OS UInt8, UserAgent UInt8, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, NetMajor UInt8, NetMinor UInt8, MobilePhone UInt8, SilverlightVersion1 UInt8, Age UInt8, Sex UInt8, Income UInt8, JavaEnable UInt8, CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, BrowserLanguage UInt16, BrowserCountry UInt16, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), Params Array(String), Goals Nested(ID UInt32, Serial UInt32, EventTime DateTime, Price Int64, OrderID String, CurrencyID UInt32), WatchIDs Array(UInt64), ParamSumPrice Int64, ParamCurrency FixedString(3), ParamCurrencyID UInt16, ClickLogID UInt64, ClickEventID Int32, ClickGoodEvent Int32, ClickEventTime DateTime, ClickPriorityID Int32, ClickPhraseID Int32, ClickPageID Int32, ClickPlaceID Int32, ClickTypeID Int32, ClickResourceID Int32, ClickCost UInt32, ClickClientIP UInt32, ClickDomainID UInt32, ClickURL String, ClickAttempt UInt8, ClickOrderID UInt32, ClickBannerID UInt32, ClickMarketCategoryID UInt32, ClickMarketPP UInt32, ClickMarketCategoryName String, ClickMarketPPName String, ClickAWAPSCampaignName String, ClickPageName String, ClickTargetType UInt16, ClickTargetPhraseID UInt64, ClickContextType UInt8, ClickSelectType Int8, ClickOptions String, ClickGroupBannerID Int32, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, FirstVisit DateTime, PredLastVisit Date, LastVisit Date, TotalVisits UInt32, TraficSource Nested(ID Int8, SearchEngineID UInt16, AdvEngineID UInt8, PlaceID UInt16, SocialSourceNetworkID UInt8, Domain String, SearchPhrase String, SocialSourcePage String), Attendance FixedString(16), CLID UInt32, YCLID UInt64, NormalizedRefererHash UInt64, SearchPhraseHash UInt64, RefererDomainHash UInt64, NormalizedStartURLHash UInt64, StartURLDomainHash UInt64, NormalizedEndURLHash UInt64, TopLevelDomain UInt64, URLScheme UInt64, OpenstatServiceNameHash UInt64, OpenstatCampaignIDHash UInt64, OpenstatAdIDHash UInt64, OpenstatSourceIDHash UInt64, UTMSourceHash UInt64, UTMMediumHash UInt64, UTMCampaignHash UInt64, UTMContentHash UInt64, UTMTermHash UInt64, FromHash UInt64, WebVisorEnabled UInt8, WebVisorActivity UInt32, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), Market Nested(Type UInt8, GoalID UInt32, OrderID String, OrderPrice Int64, PP UInt32, DirectPlaceID UInt32, DirectOrderID UInt32, DirectBannerID UInt32, GoodID String, GoodName String, GoodQuantity Int32, GoodPrice Int64), IslandID FixedString(16)) ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" + +clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" +clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" +clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" + +clickhouse-client --query "DROP TABLE datasets.visits_v1 SYNC" +clickhouse-client --query "DROP TABLE datasets.hits_v1 SYNC" + clickhouse-client --query "SHOW TABLES FROM test" +clickhouse-client --query "SYSTEM STOP THREAD FUZZER" + +stop + +# Let's enable S3 storage by default +export USE_S3_STORAGE_FOR_MERGE_TREE=1 +configure + +# But we still need default disk because some tables loaded only into it +sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml | sed "s|s3|s3default|" > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp +mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml + +start + ./stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" \ && echo -e 'Test script exit code\tOK' >> /test_output/test_results.tsv \ || echo -e 'Test script failed\tFAIL' >> /test_output/test_results.tsv @@ -255,6 +284,14 @@ zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-serve # Remove file logical_errors.txt if it's empty [ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt +# No such key errors +zgrep -Ea "Code: 499.*The specified key does not exist" /var/log/clickhouse-server/clickhouse-server*.log > /test_output/no_such_key_errors.txt \ + && echo -e 'S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'No lost s3 keys\tOK' >> /test_output/test_results.tsv + +# Remove file no_such_key_errors.txt if it's empty +[ -s /test_output/no_such_key_errors.txt ] || rm /test_output/no_such_key_errors.txt + # Crash zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \ && echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ diff --git a/docker/test/stress/stress b/docker/test/stress/stress index 64cca4beb3a..7f3f38bd8f5 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -168,7 +168,7 @@ def prepare_for_hung_check(drop_databases): for db in databases: if db == "system": continue - command = make_query_command(f"DROP DATABASE {db}") + command = make_query_command(f'DETACH DATABASE {db}') # we don't wait for drop Popen(command, shell=True) break diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 0ec3f09ab7f..683124feaa0 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -17,7 +17,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ python3-pip \ shellcheck \ yamllint \ - && pip3 install black boto3 codespell dohq-artifactory PyGithub unidiff pylint==2.6.2 \ + && pip3 install black==22.8.0 boto3 codespell==2.2.1 dohq-artifactory PyGithub unidiff pylint==2.6.2 \ && apt-get clean \ && rm -rf /root/.cache/pip diff --git a/docs/en/development/build-osx.md b/docs/en/development/build-osx.md index 97e4e4ddde1..12f74feb272 100644 --- a/docs/en/development/build-osx.md +++ b/docs/en/development/build-osx.md @@ -37,7 +37,7 @@ sudo xcode-select --install ``` bash brew update -brew install cmake ninja libtool gettext llvm gcc binutils grep findutils +brew install ccache cmake ninja libtool gettext llvm gcc binutils grep findutils ``` ## Checkout ClickHouse Sources {#checkout-clickhouse-sources} diff --git a/docs/en/engines/database-engines/replicated.md b/docs/en/engines/database-engines/replicated.md index 554345a3c15..f0ef1e981fe 100644 --- a/docs/en/engines/database-engines/replicated.md +++ b/docs/en/engines/database-engines/replicated.md @@ -12,7 +12,7 @@ One ClickHouse server can have multiple replicated databases running and updatin ## Creating a Database {#creating-a-database} ``` sql - CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_name') [SETTINGS ...] +CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_name') [SETTINGS ...] ``` **Engine Parameters** @@ -21,9 +21,7 @@ One ClickHouse server can have multiple replicated databases running and updatin - `shard_name` — Shard name. Database replicas are grouped into shards by `shard_name`. - `replica_name` — Replica name. Replica names must be different for all replicas of the same shard. -:::warning For [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) tables if no arguments provided, then default arguments are used: `/clickhouse/tables/{uuid}/{shard}` and `{replica}`. These can be changed in the server settings [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) and [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). Macro `{uuid}` is unfolded to table's uuid, `{shard}` and `{replica}` are unfolded to values from server config, not from database engine arguments. But in the future, it will be possible to use `shard_name` and `replica_name` of Replicated database. -::: ## Specifics and Recommendations {#specifics-and-recommendations} diff --git a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md index 0eb3331f471..73dea4b0085 100644 --- a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md +++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md @@ -16,12 +16,14 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], ... -) ENGINE = EmbeddedRocksDB([ttl]) PRIMARY KEY(primary_key_name) +) ENGINE = EmbeddedRocksDB([ttl, rocksdb_dir, read_only]) PRIMARY KEY(primary_key_name) ``` Engine parameters: - `ttl` - time to live for values. TTL is accepted in seconds. If TTL is 0, regular RocksDB instance is used (without TTL). +- `rocksdb_dir` - path to the directory of an existed RocksDB or the destination path of the created RocksDB. Open the table with the specified `rocksdb_dir`. +- `read_only` - when `read_only` is set to true, read-only mode is used. For storage with TTL, compaction will not be triggered (neither manual nor automatic), so no expired entries are removed. - `primary_key_name` – any column name in the column list. - `primary key` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a `rocksdb key`. - columns other than the primary key will be serialized in binary as `rocksdb` value in corresponding order. diff --git a/docs/en/engines/table-engines/special/generate.md b/docs/en/engines/table-engines/special/generate.md index d03d6dc9d13..e42429a1b10 100644 --- a/docs/en/engines/table-engines/special/generate.md +++ b/docs/en/engines/table-engines/special/generate.md @@ -15,7 +15,7 @@ Usage examples: ## Usage in ClickHouse Server {#usage-in-clickhouse-server} ``` sql -ENGINE = GenerateRandom(random_seed, max_string_length, max_array_length) +ENGINE = GenerateRandom([random_seed] [,max_string_length] [,max_array_length]) ``` The `max_array_length` and `max_string_length` parameters specify maximum length of all diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index 433300eefa4..6e4c8c4b94e 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -94,6 +94,21 @@ It is also possible for `Flat`, `Hashed`, `ComplexKeyHashed` dictionaries to onl - If the source is HTTP then `update_field` will be added as a query parameter with the last update time as the parameter value. - If the source is Executable then `update_field` will be added as an executable script argument with the last update time as the argument value. - If the source is ClickHouse, MySQL, PostgreSQL, ODBC there will be an additional part of `WHERE`, where `update_field` is compared as greater or equal with the last update time. + - Per default, this `WHERE`-condition is checked at the highest level of the SQL-Query. Alternatively, the condition can be checked in any other `WHERE`-clause within the query using the `{condition}`-keyword. Example: + ```sql + ... + SOURCE(CLICKHOUSE(... + update_field 'added_time' + QUERY ' + SELECT my_arr.1 AS x, my_arr.2 AS y, creation_time + FROM ( + SELECT arrayZip(x_arr, y_arr) AS my_arr, creation_time + FROM dictionary_source + WHERE {condition} + )' + )) + ... + ``` If `update_field` option is set, additional option `update_lag` can be set. Value of `update_lag` option is subtracted from previous update time before request updated data. diff --git a/docs/en/sql-reference/functions/uniqtheta-functions.md b/docs/en/sql-reference/functions/uniqtheta-functions.md new file mode 100644 index 00000000000..b2d3712abfc --- /dev/null +++ b/docs/en/sql-reference/functions/uniqtheta-functions.md @@ -0,0 +1,94 @@ +--- +slug: /en/sql-reference/functions/uniqtheta-functions +--- + +# uniqTheta Functions + +uniqTheta functions work for two uniqThetaSketch objects to do set operation calculations such as ∪ / ∩ / × (union/intersect/not), it is to return a new uniqThetaSketch object contain the result. + +A uniqThetaSketch object is to be constructed by aggregation function uniqTheta with -State. + +UniqThetaSketch is a data structure storage of approximate values set. +For more information on RoaringBitmap, see: [Theta Sketch Framework](https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html). + +## uniqThetaUnion + +Two uniqThetaSketch objects to do union calculation(set operation ∪), the result is a new uniqThetaSketch. + +``` sql +uniqThetaUnion(uniqThetaSketch,uniqThetaSketch) +``` + +**Arguments** + +- `uniqThetaSketch` – uniqThetaSketch object. + +**Example** + +``` sql +select finalizeAggregation(uniqThetaUnion(a, b)) as a_union_b, finalizeAggregation(a) as a_cardinality, finalizeAggregation(b) as b_cardinality +from +(select arrayReduce('uniqThetaState',[1,2]) as a, arrayReduce('uniqThetaState',[2,3,4]) as b ); +``` + +``` text +┌─a_union_b─┬─a_cardinality─┬─b_cardinality─┐ +│ 4 │ 2 │ 3 │ +└───────────┴───────────────┴───────────────┘ +``` + +## uniqThetaIntersect + +Two uniqThetaSketch objects to do intersect calculation(set operation ∩), the result is a new uniqThetaSketch. + +``` sql +uniqThetaIntersect(uniqThetaSketch,uniqThetaSketch) +``` + +**Arguments** + +- `uniqThetaSketch` – uniqThetaSketch object. + +**Example** + +``` sql +select finalizeAggregation(uniqThetaIntersect(a, b)) as a_intersect_b, finalizeAggregation(a) as a_cardinality, finalizeAggregation(b) as b_cardinality +from +(select arrayReduce('uniqThetaState',[1,2]) as a, arrayReduce('uniqThetaState',[2,3,4]) as b ); +``` + +``` text +┌─a_intersect_b─┬─a_cardinality─┬─b_cardinality─┐ +│ 1 │ 2 │ 3 │ +└───────────────┴───────────────┴───────────────┘ +``` + +## uniqThetaNot + +Two uniqThetaSketch objects to do a_not_b calculation(set operation ×), the result is a new uniqThetaSketch. + +``` sql +uniqThetaNot(uniqThetaSketch,uniqThetaSketch) +``` + +**Arguments** + +- `uniqThetaSketch` – uniqThetaSketch object. + +**Example** + +``` sql +select finalizeAggregation(uniqThetaNot(a, b)) as a_not_b, finalizeAggregation(a) as a_cardinality, finalizeAggregation(b) as b_cardinality +from +(select arrayReduce('uniqThetaState',[2,3,4]) as a, arrayReduce('uniqThetaState',[1,2]) as b ); +``` + +``` text +┌─a_not_b─┬─a_cardinality─┬─b_cardinality─┐ +│ 2 │ 3 │ 2 │ +└─────────┴───────────────┴───────────────┘ +``` + +**See Also** + +- [uniqThetaSketch](../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) diff --git a/docs/zh/development/tests.md b/docs/zh/development/tests.md index ca9300597c7..e6d5cf66de9 100644 --- a/docs/zh/development/tests.md +++ b/docs/zh/development/tests.md @@ -1,338 +1,297 @@ --- -slug: /zh/development/tests +slug: /en/development/tests +sidebar_position: 70 +sidebar_label: Testing +title: ClickHouse Testing +description: Most of ClickHouse features can be tested with functional tests and they are mandatory to use for every change in ClickHouse code that can be tested that way. --- -# ClickHouse 测试 {#clickhouse-testing} -## 功能测试 {#functional-tests} +## Functional Tests -功能测试使用起来最简单方便. 大多数 ClickHouse 特性都可以通过功能测试进行测试, 并且对于可以通过功能测试进行测试的 ClickHouse 代码的每一个更改, 都必须使用这些特性 +Functional tests are the most simple and convenient to use. Most of ClickHouse features can be tested with functional tests and they are mandatory to use for every change in ClickHouse code that can be tested that way. -每个功能测试都会向正在运行的 ClickHouse 服务器发送一个或多个查询, 并将结果与参考进行比较. +Each functional test sends one or multiple queries to the running ClickHouse server and compares the result with reference. -测试位于 `查询` 目录中. 有两个子目录: `无状态` 和 `有状态`. 无状态测试在没有任何预加载测试数据的情况下运行查询 - 它们通常在测试本身内即时创建小型合成数据集. 状态测试需要来自 Yandex.Metrica 的预加载测试数据, 它对公众开放. +Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from ClickHouse and it is available to general public. -每个测试可以是两种类型之一: `.sql` 和 `.sh`. `.sql` 测试是简单的 SQL 脚本, 它通过管道传输到 `clickhouse-client --multiquery --testmode`. `.sh` 测试是一个自己运行的脚本. SQL 测试通常比 `.sh` 测试更可取. 仅当您必须测试某些无法从纯 SQL 中执行的功能时才应使用 `.sh` 测试, 例如将一些输入数据传送到 `clickhouse-client` 或测试 `clickhouse-local`. +Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`. -### 在本地运行测试 {#functional-test-locally} +### Running a Test Locally {#functional-test-locally} -在本地启动ClickHouse服务器, 监听默认端口(9000). 例如, 要运行测试 `01428_hash_set_nan_key`, 请切换到存储库文件夹并运行以下命令: +Start the ClickHouse server locally, listening on the default port (9000). To +run, for example, the test `01428_hash_set_nan_key`, change to the repository +folder and run the following command: ``` PATH=$PATH: tests/clickhouse-test 01428_hash_set_nan_key ``` -有关更多选项, 请参阅`tests/clickhouse-test --help`. 您可以简单地运行所有测试或运行由测试名称中的子字符串过滤的测试子集:`./clickhouse-test substring`. 还有并行或随机顺序运行测试的选项. +For more options, see `tests/clickhouse-test --help`. You can simply run all tests or run subset of tests filtered by substring in test name: `./clickhouse-test substring`. There are also options to run tests in parallel or in randomized order. -### 添加新测试 {#adding-new-test} +### Adding a New Test -添加新的测试, 在 `queries/0_stateless` 目录下创建 `.sql` 或 `.sh` 文件, 手动检查, 然后通过以下方式生成`.reference`文件:`clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` 或 `./00000_test.sh > ./00000_test.reference`. +To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client --multiquery < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. -测试应仅使用(创建、删除等)`test` 数据库中假定已预先创建的表; 测试也可以使用临时表. +Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables. -### 选择测试名称 {#choosing-test-name} +### Choosing the Test Name -测试名称以五位数前缀开头, 后跟描述性名称, 例如 `00422_hash_function_constexpr.sql`. 要选择前缀, 请找到目录中已存在的最大前缀, 并将其加一. 在此期间, 可能会添加一些具有相同数字前缀的其他测试, 但这没关系并且不会导致任何问题, 您以后不必更改它. +The name of the test starts with a five-digit prefix followed by a descriptive name, such as `00422_hash_function_constexpr.sql`. To choose the prefix, find the largest prefix already present in the directory, and increment it by one. In the meantime, some other tests might be added with the same numeric prefix, but this is OK and does not lead to any problems, you don't have to change it later. -一些测试的名称中标有 `zookeeper`、`shard` 或 `long` . `zookeeper` 用于使用 ZooKeeper 的测试. `shard` 用于需要服务器监听 `127.0.0.*` 的测试; `distributed` 或 `global` 具有相同的含义. `long` 用于运行时间稍长于一秒的测试. Yo你可以分别使用 `--no-zookeeper`、`--no-shard` 和 `--no-long` 选项禁用这些测试组. 如果需要 ZooKeeper 或分布式查询,请确保为您的测试名称添加适当的前缀. +Some tests are marked with `zookeeper`, `shard` or `long` in their names. `zookeeper` is for tests that are using ZooKeeper. `shard` is for tests that requires server to listen `127.0.0.*`; `distributed` or `global` have the same meaning. `long` is for tests that run slightly longer that one second. You can disable these groups of tests using `--no-zookeeper`, `--no-shard` and `--no-long` options, respectively. Make sure to add a proper prefix to your test name if it needs ZooKeeper or distributed queries. -### 检查必须发生的错误 {#checking-error-must-occur} +### Checking for an Error that Must Occur -有时您想测试是否因不正确的查询而发生服务器错误. 我们支持在 SQL 测试中对此进行特殊注释, 形式如下: +Sometimes you want to test that a server error occurs for an incorrect query. We support special annotations for this in SQL tests, in the following form: ``` select x; -- { serverError 49 } ``` -此测试确保服务器返回关于未知列“x”的错误代码为 49. 如果没有错误, 或者错误不同, 则测试失败. 如果您想确保错误发生在客户端, 请改用 `clientError` 注释. +This test ensures that the server returns an error with code 49 about unknown column `x`. If there is no error, or the error is different, the test will fail. If you want to ensure that an error occurs on the client side, use `clientError` annotation instead. -不要检查错误消息的特定措辞, 它将来可能会发生变化, 并且测试将不必要地中断. 只检查错误代码. 如果现有的错误代码不足以满足您的需求, 请考虑添加一个新的. +Do not check for a particular wording of error message, it may change in the future, and the test will needlessly break. Check only the error code. If the existing error code is not precise enough for your needs, consider adding a new one. -### 测试分布式查询 {#testing-distributed-query} +### Testing a Distributed Query -如果你想在功能测试中使用分布式查询, 你可以使用 `127.0.0.{1..2}` 的地址, 以便服务器查询自己; 或者您可以在服务器配置文件中使用预定义的测试集群, 例如`test_shard_localhost`. 请记住在测试名称中添加 `shard` 或 `distributed` 字样, 以便它以正确的配置在 CI 中运行, 其中服务器配置为支持分布式查询. +If you want to use distributed queries in functional tests, you can leverage `remote` table function with `127.0.0.{1..2}` addresses for the server to query itself; or you can use predefined test clusters in server configuration file like `test_shard_localhost`. Remember to add the words `shard` or `distributed` to the test name, so that it is run in CI in correct configurations, where the server is configured to support distributed queries. -## 已知错误 {#known-bugs} +## Known Bugs {#known-bugs} -如果我们知道一些可以通过功能测试轻松重现的错误, 我们将准备好的功能测试放在 `tests/queries/bugs` 目录中. 修复错误后, 这些测试将移至 `tests/queries/0_stateless` . +If we know some bugs that can be easily reproduced by functional tests, we place prepared functional tests in `tests/queries/bugs` directory. These tests will be moved to `tests/queries/0_stateless` when bugs are fixed. -## 集成测试 {#integration-tests} +## Integration Tests {#integration-tests} -集成测试允许在集群配置中测试 ClickHouse 以及 ClickHouse 与其他服务器(如 MySQL、Postgres、MongoDB)的交互. 它们可以用来模拟网络分裂、丢包等情况. 这些测试在Docker下运行, 并使用各种软件创建多个容器. +Integration tests allow testing ClickHouse in clustered configuration and ClickHouse interaction with other servers like MySQL, Postgres, MongoDB. They are useful to emulate network splits, packet drops, etc. These tests are run under Docker and create multiple containers with various software. -有关如何运行这些测试, 请参阅 `tests/integration/README.md` . +See `tests/integration/README.md` on how to run these tests. -注意, ClickHouse与第三方驱动程序的集成没有经过测试. 另外, 我们目前还没有JDBC和ODBC驱动程序的集成测试. +Note that integration of ClickHouse with third-party drivers is not tested. Also, we currently do not have integration tests with our JDBC and ODBC drivers. -## 单元测试 {#unit-tests} +## Unit Tests {#unit-tests} -当您想测试的不是 ClickHouse 整体, 而是单个独立库或类时,单元测试很有用. 您可以使用 `ENABLE_TESTS` CMake 选项启用或禁用测试构建. 单元测试(和其他测试程序)位于代码中的 `tests` 子目录中. 要运行单元测试, 请键入 `ninja test` 。有些测试使用 `gtest` , 但有些程序在测试失败时会返回非零退出码. +Unit tests are useful when you want to test not the ClickHouse as a whole, but a single isolated library or class. You can enable or disable build of tests with `ENABLE_TESTS` CMake option. Unit tests (and other test programs) are located in `tests` subdirectories across the code. To run unit tests, type `ninja test`. Some tests use `gtest`, but some are just programs that return non-zero exit code on test failure. -如果代码已经被功能测试覆盖了, 就没有必要进行单元测试(而且功能测试通常更易于使用). +It’s not necessary to have unit tests if the code is already covered by functional tests (and functional tests are usually much more simple to use). -例如, 您可以通过直接调用可执行文件来运行单独的 gtest 检查: +You can run individual gtest checks by calling the executable directly, for example: ```bash $ ./src/unit_tests_dbms --gtest_filter=LocalAddress* ``` -## 性能测试 {#performance-tests} +## Performance Tests {#performance-tests} -性能测试允许测量和比较 ClickHouse 的某些孤立部分在合成查询上的性能. 测试位于 `tests/performance`. 每个测试都由带有测试用例描述的 `.xml` 文件表示. 测试使用 `docker/tests/performance-comparison` 工具运行. 请参阅自述文件以进行调用. +Performance tests allow to measure and compare performance of some isolated part of ClickHouse on synthetic queries. Performance tests are located at `tests/performance/`. Each test is represented by an `.xml` file with a description of the test case. Tests are run with `docker/test/performance-comparison` tool . See the readme file for invocation. -每个测试在循环中运行一个或多个查询(可能带有参数组合). 一些测试可以包含预加载测试数据集的先决条件. +Each test run one or multiple queries (possibly with combinations of parameters) in a loop. -如果您希望在某些场景中提高ClickHouse的性能,并且如果可以在简单的查询中观察到改进,那么强烈建议编写性能测试。在测试期间使用 `perf top` 或其他perf工具总是有意义的. +If you want to improve performance of ClickHouse in some scenario, and if improvements can be observed on simple queries, it is highly recommended to write a performance test. Also, it is recommended to write performance tests when you add or modify SQL functions which are relatively isolated and not too obscure. It always makes sense to use `perf top` or other `perf` tools during your tests. -## 测试工具和脚本 {#test-tools-and-scripts} +## Test Tools and Scripts {#test-tools-and-scripts} - `tests` 目录中的一些程序不是准备好的测试,而是测试工具. 例如, 对于 `Lexer`, 有一个工具 `src/Parsers/tests/lexer` , 它只是对标准输入进行标记化并将着色结果写入标准输出. 您可以将这些类型的工具用作代码示例以及用于探索和手动测试. +Some programs in `tests` directory are not prepared tests, but are test tools. For example, for `Lexer` there is a tool `src/Parsers/tests/lexer` that just do tokenization of stdin and writes colorized result to stdout. You can use these kind of tools as a code examples and for exploration and manual testing. -## 其他测试 {#miscellaneous-tests} +## Miscellaneous Tests {#miscellaneous-tests} -在 `tests/external_models` 中有机器学习模型的测试. 这些测试不会更新, 必须转移到集成测试. +There are tests for machine learned models in `tests/external_models`. These tests are not updated and must be transferred to integration tests. -仲裁插入有单独的测试. 该测试在不同的服务器上运行 ClickHouse 集群并模拟各种故障情况:网络分裂、丢包(ClickHouse 节点之间、ClickHouse 和 ZooKeeper 之间、ClickHouse 服务器和客户端之间等)、`kill -9`、`kill -STOP` 和 `kill -CONT` , 比如 [Jepsen](https://aphyr.com/tags/Jepsen). 然后测试检查所有已确认的插入是否已写入并且所有被拒绝的插入均未写入. +There is separate test for quorum inserts. This test run ClickHouse cluster on separate servers and emulate various failure cases: network split, packet drop (between ClickHouse nodes, between ClickHouse and ZooKeeper, between ClickHouse server and client, etc.), `kill -9`, `kill -STOP` and `kill -CONT` , like [Jepsen](https://aphyr.com/tags/Jepsen). Then the test checks that all acknowledged inserts was written and all rejected inserts was not. -在 ClickHouse 开源之前, Quorum 测试是由单独的团队编写的. 这个团队不再与ClickHouse合作. 测试碰巧是用Java编写的. 由于这些原因, 必须重写仲裁测试并将其转移到集成测试. +Quorum test was written by separate team before ClickHouse was open-sourced. This team no longer work with ClickHouse. Test was accidentally written in Java. For these reasons, quorum test must be rewritten and moved to integration tests. -## 手动测试 {#manual-testing} +## Manual Testing {#manual-testing} -当您开发一个新特性时, 手动测试它也是合理的. 您可以按照以下步骤进行操作: +When you develop a new feature, it is reasonable to also test it manually. You can do it with the following steps: -构建 ClickHouse. 从终端运行 ClickHouse:将目录更改为 `programs/clickhouse-server` 并使用 `./clickhouse-server` 运行它. 默认情况下, 它将使用当前目录中的配置(`config.xml`、`users.xml` 和`config.d` 和`users.d` 目录中的文件). 要连接到 ClickHouse 服务器, 请运行 `programs/clickhouse-client/clickhouse-client` . +Build ClickHouse. Run ClickHouse from the terminal: change directory to `programs/clickhouse-server` and run it with `./clickhouse-server`. It will use configuration (`config.xml`, `users.xml` and files within `config.d` and `users.d` directories) from the current directory by default. To connect to ClickHouse server, run `programs/clickhouse-client/clickhouse-client`. -请注意, 所有 clickhouse 工具(服务器、客户端等)都只是指向名为 `clickhouse` 的单个二进制文件的符号链接. 你可以在 `programs/clickhouse` 找到这个二进制文件. 所有工具也可以作为 `clickhouse tool` 而不是 `clickhouse-tool` 调用. +Note that all clickhouse tools (server, client, etc) are just symlinks to a single binary named `clickhouse`. You can find this binary at `programs/clickhouse`. All tools can also be invoked as `clickhouse tool` instead of `clickhouse-tool`. -或者, 您可以安装 ClickHouse 包: 从 Yandex 存储库稳定发布, 或者您可以在 ClickHouse 源根目录中使用 `./release` 为自己构建包. 然后使用 `sudo service clickhouse-server start` 启动服务器(或停止以停止服务器). 在 `/etc/clickhouse-server/clickhouse-server.log` 中查找日志. +Alternatively you can install ClickHouse package: either stable release from ClickHouse repository or you can build package for yourself with `./release` in ClickHouse sources root. Then start the server with `sudo clickhouse start` (or stop to stop the server). Look for logs at `/etc/clickhouse-server/clickhouse-server.log`. -当您的系统上已经安装了 ClickHouse 时,您可以构建一个新的 `clickhouse` 二进制文件并替换现有的二进制文件: +When ClickHouse is already installed on your system, you can build a new `clickhouse` binary and replace the existing binary: ``` bash -$ sudo service clickhouse-server stop +$ sudo clickhouse stop $ sudo cp ./clickhouse /usr/bin/ -$ sudo service clickhouse-server start +$ sudo clickhouse start ``` -您也可以停止系统 clickhouse-server 并使用相同的配置运行您自己的服务器, 但登录到终端: +Also you can stop system clickhouse-server and run your own with the same configuration but with logging to terminal: ``` bash -$ sudo service clickhouse-server stop +$ sudo clickhouse stop $ sudo -u clickhouse /usr/bin/clickhouse server --config-file /etc/clickhouse-server/config.xml ``` -使用 gdb 的示例: +Example with gdb: ``` bash $ sudo -u clickhouse gdb --args /usr/bin/clickhouse server --config-file /etc/clickhouse-server/config.xml ``` -如果系统 clickhouse-server 已经在运行并且你不想停止它, 你可以在你的 `config.xml` 中更改端口号(或在 `config.d` 目录中的文件中覆盖它们), 提供适当的数据路径, 并运行它. +If the system clickhouse-server is already running and you do not want to stop it, you can change port numbers in your `config.xml` (or override them in a file in `config.d` directory), provide appropriate data path, and run it. -`clickhouse` 二进制文件几乎没有依赖关系, 可以在广泛的 Linux 发行版中使用. 要在服务器上快速而肮脏地测试您的更改, 您可以简单地将新构建的 `clickhouse` 二进制文件 `scp` 到您的服务器, 然后按照上面的示例运行它. +`clickhouse` binary has almost no dependencies and works across wide range of Linux distributions. To quick and dirty test your changes on a server, you can simply `scp` your fresh built `clickhouse` binary to your server and then run it as in examples above. -## 测试环境 {#testing-environment} +## Build Tests {#build-tests} -在发布稳定版之前, 我们将其部署在测试环境中.测试环境是一个集群,处理 [Yandex.Metrica](https://metrica.yandex.com/) 数据的 1/39 部分. 我们与 Yandex.Metrica 团队共享我们的测试环境. ClickHouse无需在现有数据上停机即可升级. 我们首先看到的是, 数据被成功地处理了, 没有滞后于实时, 复制继续工作, Yandex.Metrica 团队没有发现任何问题. 第一次检查可以通过以下方式进行: +Build tests allow to check that build is not broken on various alternative configurations and on some foreign systems. These tests are automated as well. -``` sql -SELECT hostName() AS h, any(version()), any(uptime()), max(UTCEventTime), count() FROM remote('example01-01-{1..3}t', merge, hits) WHERE EventDate >= today() - 2 GROUP BY h ORDER BY h; -``` +Examples: +- cross-compile for Darwin x86_64 (Mac OS X) +- cross-compile for FreeBSD x86_64 +- cross-compile for Linux AArch64 +- build on Ubuntu with libraries from system packages (discouraged) +- build with shared linking of libraries (discouraged) -在某些情况下, 我们还会部署到 Yandex 中我们朋友团队的测试环境:Market、Cloud 等. 此外, 我们还有一些用于开发目的的硬件服务器. +For example, build with system packages is bad practice, because we cannot guarantee what exact version of packages a system will have. But this is really needed by Debian maintainers. For this reason we at least have to support this variant of build. Another example: shared linking is a common source of trouble, but it is needed for some enthusiasts. -## 负载测试 {#load-testing} +Though we cannot run all tests on all variant of builds, we want to check at least that various build variants are not broken. For this purpose we use build tests. -部署到测试环境后, 我们使用来自生产集群的查询运行负载测试. 这是手动完成的. +We also test that there are no translation units that are too long to compile or require too much RAM. -确保您在生产集群上启用了 `query_log`. +We also test that there are no too large stack frames. -收集一天或更长时间的查询日志: +## Testing for Protocol Compatibility {#testing-for-protocol-compatibility} -``` bash -$ clickhouse-client --query="SELECT DISTINCT query FROM system.query_log WHERE event_date = today() AND query LIKE '%ym:%' AND query NOT LIKE '%system.query_log%' AND type = 2 AND is_initial_query" > queries.tsv -``` +When we extend ClickHouse network protocol, we test manually that old clickhouse-client works with new clickhouse-server and new clickhouse-client works with old clickhouse-server (simply by running binaries from corresponding packages). -这是一个复杂的例子. `type = 2` 将过滤成功执行的查询. `query LIKE '%ym:%'` 是从 Yandex.Metrica 中选择相关查询. `is_initial_query` 是只选择客户端发起的查询, 而不是 ClickHouse 本身(作为分布式查询处理的一部分). +We also test some cases automatically with integrational tests: +- if data written by old version of ClickHouse can be successfully read by the new version; +- do distributed queries work in a cluster with different ClickHouse versions. -`scp` 将此日志记录到您的测试集群并按如下方式运行它: +## Help from the Compiler {#help-from-the-compiler} -``` bash -$ clickhouse benchmark --concurrency 16 < queries.tsv -``` +Main ClickHouse code (that is located in `dbms` directory) is built with `-Wall -Wextra -Werror` and with some additional enabled warnings. Although these options are not enabled for third-party libraries. -(可能你还想指定一个 `--user`) +Clang has even more useful warnings - you can look for them with `-Weverything` and pick something to default build. -然后把它留到晚上或周末, 去休息一下. +For production builds, clang is used, but we also test make gcc builds. For development, clang is usually more convenient to use. You can build on your own machine with debug mode (to save battery of your laptop), but please note that compiler is able to generate more warnings with `-O3` due to better control flow and inter-procedure analysis. When building with clang in debug mode, debug version of `libc++` is used that allows to catch more errors at runtime. -您应该检查 `clickhouse-server` 没有崩溃, 内存占用是有限的, 且性能不会随着时间的推移而降低. +## Sanitizers {#sanitizers} -由于查询和环境的高度可变性, 没有记录和比较精确的查询执行时间. +### Address sanitizer +We run functional, integration, stress and unit tests under ASan on per-commit basis. -## 构建测试 {#build-tests} +### Thread sanitizer +We run functional, integration, stress and unit tests under TSan on per-commit basis. -构建测试允许检查在各种可选配置和一些外部系统上的构建是否被破坏. 这些测试也是自动化的. +### Memory sanitizer +We run functional, integration, stress and unit tests under MSan on per-commit basis. -示例: -- Darwin x86_64 (Mac OS X) 交叉编译 -- FreeBSD x86_64 交叉编译 -- Linux AArch64 交叉编译 -- 使用系统包中的库在 Ubuntu 上构建(不鼓励) -- 使用库的共享链接构建(不鼓励) - -例如, 使用系统包构建是不好的做法, 因为我们无法保证系统将拥有哪个确切版本的包. 但这确实是 Debian 维护者所需要的. 出于这个原因, 我们至少必须支持这种构建变体. 另一个例子: 共享链接是一个常见的麻烦来源, 但对于一些爱好者来说是需要的. - -虽然我们无法对所有构建变体运行所有测试, 但我们希望至少检查各种构建变体没有被破坏. 为此, 我们使用构建测试. - -我们还测试了那些太长而无法编译或需要太多RAM的没有翻译单元. - -我们还测试没有太大的堆栈帧. - -## 协议兼容性测试 {#testing-for-protocol-compatibility} - -当我们扩展 ClickHouse 网络协议时, 我们手动测试旧的 clickhouse-client 与新的 clickhouse-server 一起工作, 而新的 clickhouse-client 与旧的 clickhouse-server 一起工作(只需从相应的包中运行二进制文件). - -我们还使用集成测试自动测试一些案例: -- 旧版本ClickHouse写入的数据是否可以被新版本成功读取; -- 在具有不同 ClickHouse 版本的集群中执行分布式查询. - -## 编译器的帮助 {#help-from-the-compiler} - -主要的 ClickHouse 代码(位于 `dbms` 目录中)是用 `-Wall -Wextra -Werror` 和一些额外的启用警告构建的. 虽然没有为第三方库启用这些选项. - -Clang 有更多有用的警告 - 你可以用 `-Weverything` 寻找它们并选择一些东西来默认构建. - -对于生产构建, 使用 clang, 但我们也测试 make gcc 构建. 对于开发, clang 通常使用起来更方便. 您可以使用调试模式在自己的机器上构建(以节省笔记本电脑的电池), 但请注意, 由于更好的控制流和过程间分析, 编译器能够使用 `-O3` 生成更多警告. 在调试模式下使用 clang 构建时, 使用调试版本的 `libc++` 允许在运行时捕获更多错误. - -## 地址清理器 {#sanitizers} - -### 地址清理器 -我们在ASan上运行功能测试、集成测试、压力测试和单元测试. - -### 线程清理器 -我们在TSan下运行功能测试、集成测试、压力测试和单元测试. - -### 内存清理器 -我们在MSan上运行功能测试、集成测试、压力测试和单元测试. - -### 未定义的行为清理器 -我们在UBSan下运行功能测试、集成测试、压力测试和单元测试. 某些第三方库的代码未针对 UB 进行清理. +### Undefined behaviour sanitizer +We run functional, integration, stress and unit tests under UBSan on per-commit basis. The code of some third-party libraries is not sanitized for UB. ### Valgrind (Memcheck) -我们曾经在 Valgrind 下通宵运行功能测试, 但不再这样做了. 这需要几个小时. 目前在`re2`库中有一个已知的误报, 见[这篇文章](https://research.swtch.com/sparse). +We used to run functional tests under Valgrind overnight, but don't do it anymore. It takes multiple hours. Currently there is one known false positive in `re2` library, see [this article](https://research.swtch.com/sparse). -## 模糊测试 {#fuzzing} +## Fuzzing {#fuzzing} -ClickHouse 模糊测试是使用 [libFuzzer](https://llvm.org/docs/LibFuzzer.html) 和随机 SQL 查询实现的. 所有模糊测试都应使用sanitizers(地址和未定义)进行. +ClickHouse fuzzing is implemented both using [libFuzzer](https://llvm.org/docs/LibFuzzer.html) and random SQL queries. +All the fuzz testing should be performed with sanitizers (Address and Undefined). -LibFuzzer 用于库代码的隔离模糊测试. Fuzzer 作为测试代码的一部分实现, 并具有 `_fuzzer` 名称后缀. -Fuzzer 示例可以在 `src/Parsers/tests/lexer_fuzzer.cpp` 中找到. LibFuzzer 特定的配置、字典和语料库存储在 `tests/fuzz` 中. -我们鼓励您为处理用户输入的每个功能编写模糊测试. +LibFuzzer is used for isolated fuzz testing of library code. Fuzzers are implemented as part of test code and have “_fuzzer” name postfixes. +Fuzzer example can be found at `src/Parsers/fuzzers/lexer_fuzzer.cpp`. LibFuzzer-specific configs, dictionaries and corpus are stored at `tests/fuzz`. +We encourage you to write fuzz tests for every functionality that handles user input. -默认情况下不构建模糊器. 要构建模糊器, 应设置` -DENABLE_FUZZING=1` 和 `-DENABLE_TESTS=1` 选项. -我们建议在构建模糊器时禁用 Jemalloc. 用于将 ClickHouse fuzzing 集成到 Google OSS-Fuzz 的配置可以在 `docker/fuzz` 中找到. +Fuzzers are not built by default. To build fuzzers both `-DENABLE_FUZZING=1` and `-DENABLE_TESTS=1` options should be set. +We recommend to disable Jemalloc while building fuzzers. Configuration used to integrate ClickHouse fuzzing to +Google OSS-Fuzz can be found at `docker/fuzz`. -我们还使用简单的模糊测试来生成随机SQL查询, 并检查服务器在执行这些查询时是否会死亡. -你可以在 `00746_sql_fuzzy.pl` 中找到它. 这个测试应该连续运行(通宵或更长时间). +We also use simple fuzz test to generate random SQL queries and to check that the server does not die executing them. +You can find it in `00746_sql_fuzzy.pl`. This test should be run continuously (overnight and longer). -我们还使用复杂的基于 AST 的查询模糊器, 它能够找到大量的极端情况. 它在查询 AST 中进行随机排列和替换. 它会记住先前测试中的 AST 节点, 以使用它们对后续测试进行模糊测试, 同时以随机顺序处理它们. 您可以在 [这篇博客文章](https://clickhouse.com/blog/en/2021/fuzzing-clickhouse/) 中了解有关此模糊器的更多信息. +We also use sophisticated AST-based query fuzzer that is able to find huge amount of corner cases. It does random permutations and substitutions in queries AST. It remembers AST nodes from previous tests to use them for fuzzing of subsequent tests while processing them in random order. You can learn more about this fuzzer in [this blog article](https://clickhouse.com/blog/en/2021/fuzzing-clickhouse/). -## 压力测试 {#stress-test} +## Stress test -压力测试是另一种模糊测试. 它使用单个服务器以随机顺序并行运行所有功能测试. 不检查测试结果. +Stress tests are another case of fuzzing. It runs all functional tests in parallel in random order with a single server. Results of the tests are not checked. -经检查: -- 服务器不会崩溃,不会触发调试或清理程序陷阱; -- 没有死锁; -- 数据库结构一致; -- 服务器可以在测试后成功停止并重新启动,没有异常; +It is checked that: +- server does not crash, no debug or sanitizer traps are triggered; +- there are no deadlocks; +- the database structure is consistent; +- server can successfully stop after the test and start again without exceptions. -有五种变体 (Debug, ASan, TSan, MSan, UBSan). +There are five variants (Debug, ASan, TSan, MSan, UBSan). -## 线程模糊器 {#thread-fuzzer} +## Thread Fuzzer -Thread Fuzzer(请不要与 Thread Sanitizer 混淆)是另一种允许随机化线程执行顺序的模糊测试. 它有助于找到更多特殊情况. +Thread Fuzzer (please don't mix up with Thread Sanitizer) is another kind of fuzzing that allows to randomize thread order of execution. It helps to find even more special cases. -## 安全审计 {#security-audit} +## Security Audit -Yandex安全团队的人员从安全的角度对ClickHouse的功能做了一些基本的概述. +Our Security Team did some basic overview of ClickHouse capabilities from the security standpoint. -## 静态分析仪 {#static-analyzers} +## Static Analyzers {#static-analyzers} -我们在每次提交的基础上运行 `clang-tidy`. `clang-static-analyzer` 检查也被启用. `clang-tidy` 也用于一些样式检查. +We run `clang-tidy` on per-commit basis. `clang-static-analyzer` checks are also enabled. `clang-tidy` is also used for some style checks. -我们已经评估了 `clang-tidy`、`Coverity`、`cppcheck`、`PVS-Studio`、`tscancode`、`CodeQL`. 您将在 `tests/instructions/` 目录中找到使用说明. 你也可以阅读[俄文文章](https://habr.com/company/yandex/blog/342018/). +We have evaluated `clang-tidy`, `Coverity`, `cppcheck`, `PVS-Studio`, `tscancode`, `CodeQL`. You will find instructions for usage in `tests/instructions/` directory. -如果你使用 `CLion` 作为 IDE, 你可以利用一些开箱即用的 `clang-tidy` 检查 +If you use `CLion` as an IDE, you can leverage some `clang-tidy` checks out of the box. -我们还使用 `shellcheck` 对shell脚本进行静态分析. +We also use `shellcheck` for static analysis of shell scripts. -## 硬化 {#hardening} +## Hardening {#hardening} -在调试版本中, 我们使用自定义分配器执行用户级分配的 ASLR. +In debug build we are using custom allocator that does ASLR of user-level allocations. -我们还手动保护在分配后预期为只读的内存区域. +We also manually protect memory regions that are expected to be readonly after allocation. -在调试构建中, 我们还需要对libc进行自定义, 以确保不会调用 "有害的" (过时的、不安全的、非线程安全的)函数. +In debug build we also involve a customization of libc that ensures that no "harmful" (obsolete, insecure, not thread-safe) functions are called. -Debug 断言被广泛使用. +Debug assertions are used extensively. -在调试版本中,如果抛出带有 "逻辑错误" 代码(暗示错误)的异常, 则程序会过早终止. 它允许在发布版本中使用异常, 但在调试版本中使其成为断言. +In debug build, if exception with "logical error" code (implies a bug) is being thrown, the program is terminated prematurely. It allows to use exceptions in release build but make it an assertion in debug build. -jemalloc 的调试版本用于调试版本. -libc++ 的调试版本用于调试版本. +Debug version of jemalloc is used for debug builds. +Debug version of libc++ is used for debug builds. -## 运行时完整性检查 +## Runtime Integrity Checks -对存储在磁盘上的数据是校验和. MergeTree 表中的数据同时以三种方式进行校验和*(压缩数据块、未压缩数据块、跨块的总校验和). 客户端和服务器之间或服务器之间通过网络传输的数据也会进行校验和. 复制确保副本上的数据位相同. +Data stored on disk is checksummed. Data in MergeTree tables is checksummed in three ways simultaneously* (compressed data blocks, uncompressed data blocks, the total checksum across blocks). Data transferred over network between client and server or between servers is also checksummed. Replication ensures bit-identical data on replicas. -需要防止硬件故障(存储介质上的位腐烂、服务器上 RAM 中的位翻转、网络控制器 RAM 中的位翻转、网络交换机 RAM 中的位翻转、客户端 RAM 中的位翻转、线路上的位翻转). 请注意,比特位操作很常见, 即使对于 ECC RAM 和 TCP 校验和(如果您每天设法运行数千台处理 PB 数据的服务器, 也可能发生比特位操作. [观看视频(俄语)](https://www.youtube.com/watch?v=ooBAQIe0KlQ). +It is required to protect from faulty hardware (bit rot on storage media, bit flips in RAM on server, bit flips in RAM of network controller, bit flips in RAM of network switch, bit flips in RAM of client, bit flips on the wire). Note that bit flips are common and likely to occur even for ECC RAM and in presence of TCP checksums (if you manage to run thousands of servers processing petabytes of data each day). [See the video (russian)](https://www.youtube.com/watch?v=ooBAQIe0KlQ). -ClickHouse 提供诊断功能, 可帮助运维工程师找到故障硬件. +ClickHouse provides diagnostics that will help ops engineers to find faulty hardware. -\* 它并不慢. +\* and it is not slow. -## 代码风格 {#code-style} +## Code Style {#code-style} -[此处](style.md)描述了代码样式规则. +Code style rules are described [here](style.md). -要检查一些常见的样式违规,您可以使用 `utils/check-style` 脚本. +To check for some common style violations, you can use `utils/check-style` script. -要强制使用正确的代码样式, 您可以使用 `clang-format`. 文件 `.clang-format` 位于源根目录. 它大多与我们的实际代码风格相对应. 但是不建议将 `clang-format` 应用于现有文件, 因为它会使格式变得更糟. 您可以使用可以在 clang 源代码库中找到的 `clang-format-diff` 工具. +To force proper style of your code, you can use `clang-format`. File `.clang-format` is located at the sources root. It mostly corresponding with our actual code style. But it’s not recommended to apply `clang-format` to existing files because it makes formatting worse. You can use `clang-format-diff` tool that you can find in clang source repository. -或者, 您可以尝试使用 `uncrustify` 工具来重新格式化您的代码. 配置位于源根目录中的 `uncrustify.cfg` 中. 它比 `clang-format` 测试更少. +Alternatively you can try `uncrustify` tool to reformat your code. Configuration is in `uncrustify.cfg` in the sources root. It is less tested than `clang-format`. -`CLion` 有自己的代码格式化程序, 必须根据我们的代码风格进行调整. +`CLion` has its own code formatter that has to be tuned for our code style. -我们还使用 `codespell` 来查找代码中的拼写错误.它也是自动化的. +We also use `codespell` to find typos in code. It is automated as well. -## Metrica B2B 测试 {#metrica-b2b-tests} +## Test Coverage {#test-coverage} -每个 ClickHouse 版本都使用 Yandex Metrica 和 AppMetrica 引擎进行测试. ClickHouse 的测试版和稳定版部署在 VM 上, 并使用 Metrica 引擎的小副本运行, 该引擎处理输入数据的固定样本. 然后将两个 Metrica 引擎实例的结果放在一起比较. - -这些测试由单独的团队自动化. 由于移动部件数量众多, 测试在大多数情况下都因完全不相关的原因而失败, 这些原因很难弄清楚. 这些测试很可能对我们有负面价值. 尽管如此, 这些测试在数百次中被证明是有用的. - -## 测试覆盖率 {#test-coverage} - -我们还跟踪测试覆盖率, 但仅针对功能测试和 clickhouse-server. 它每天进行. +We also track test coverage but only for functional tests and only for clickhouse-server. It is performed on daily basis. ## Tests for Tests -有自动检测薄片测试. 它运行所有新测试100次(用于功能测试)或10次(用于集成测试). 如果至少有一次测试失败,它就被认为是脆弱的. +There is automated check for flaky tests. It runs all new tests 100 times (for functional tests) or 10 times (for integration tests). If at least single time the test failed, it is considered flaky. ## Testflows -[Testflows](https://testflows.com/) 是一个企业级的测试框架. Altinity 使用它进行一些测试, 我们在 CI 中运行这些测试. +[Testflows](https://testflows.com/) is an enterprise-grade open-source testing framework, which is used to test a subset of ClickHouse. -## Yandex 检查 (only for Yandex employees) +## Test Automation {#test-automation} -这些检查将ClickHouse代码导入到Yandex内部的单一存储库中, 所以ClickHouse代码库可以被Yandex的其他产品(YT和YDB)用作库. 请注意, clickhouse-server本身并不是由内部回购构建的, Yandex应用程序使用的是未经修改的开源构建的. +We run tests with [GitHub Actions](https://github.com/features/actions). -## 测试自动化 {#test-automation} +Build jobs and tests are run in Sandbox on per commit basis. Resulting packages and test results are published in GitHub and can be downloaded by direct links. Artifacts are stored for several months. When you send a pull request on GitHub, we tag it as “can be tested” and our CI system will build ClickHouse packages (release, debug, with address sanitizer, etc) for you. -我们使用 Yandex 内部 CI 和名为 "Sandbox" 的作业自动化系统运行测试. +We do not use Travis CI due to the limit on time and computational power. +We do not use Jenkins. It was used before and now we are happy we are not using Jenkins. -在每次提交的基础上, 构建作业和测试都在沙箱中运行. 生成的包和测试结果发布在GitHub上, 可以通过直接链接下载. 产物要保存几个月. 当你在GitHub上发送一个pull请求时, 我们会把它标记为 "可以测试" , 我们的CI系统会为你构建ClickHouse包(发布、调试、使用地址清理器等). - -由于时间和计算能力的限制, 我们不使用 Travis CI. -我们不用Jenkins. 以前用过, 现在我们很高兴不用Jenkins了. - -[原始文章](https://clickhouse.com/docs/en/development/tests/) +[Original article](https://clickhouse.com/docs/en/development/tests/) diff --git a/docs/zh/sql-reference/data-types/lowcardinality.md b/docs/zh/sql-reference/data-types/lowcardinality.md index e089a7f9d41..717c3c979a4 100644 --- a/docs/zh/sql-reference/data-types/lowcardinality.md +++ b/docs/zh/sql-reference/data-types/lowcardinality.md @@ -55,6 +55,5 @@ ORDER BY id ## 参考 -- [高效低基数类型](https://www.altinity.com/blog/2019/3/27/low-cardinality). - [使用低基数类型减少ClickHouse的存储成本 – 来自Instana工程师的分享](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/). -- [字符优化 (俄语视频分享)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [英语分享](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf). \ No newline at end of file +- [字符优化 (俄语视频分享)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [英语分享](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf). diff --git a/docs/zh/sql-reference/statements/create.md b/docs/zh/sql-reference/statements/create.md index aef21a704b5..af77f4750b5 100644 --- a/docs/zh/sql-reference/statements/create.md +++ b/docs/zh/sql-reference/statements/create.md @@ -121,8 +121,6 @@ ENGINE = ... ``` -如果指定了编解ec,则默认编解码器不适用。 编解码器可以组合在一个流水线中,例如, `CODEC(Delta, ZSTD)`. 要为您的项目选择最佳的编解码器组合,请通过类似于Altinity中描述的基准测试 [新编码提高ClickHouse效率](https://www.altinity.com/blog/2019/7/new-encodings-to-improve-clickhouse) 文章. - !!! warning "警告" 您无法使用外部实用程序解压缩ClickHouse数据库文件,如 `lz4`. 相反,使用特殊的 [ツ环板compressorョツ嘉ッツ偲](https://github.com/ClickHouse/ClickHouse/tree/master/programs/compressor) 实用程序。 diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index 6891d2113a5..95bf89b0255 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -38,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -1239,7 +1241,6 @@ try if (options.count("help") || !options.count("seed") - || !options.count("structure") || !options.count("input-format") || !options.count("output-format")) { @@ -1259,7 +1260,11 @@ try UInt64 seed = sipHash64(options["seed"].as()); - std::string structure = options["structure"].as(); + std::string structure; + + if (options.count("structure")) + structure = options["structure"].as(); + std::string input_format = options["input-format"].as(); std::string output_format = options["output-format"].as(); @@ -1287,32 +1292,51 @@ try markov_model_params.determinator_sliding_window_size = options["determinator-sliding-window-size"].as(); /// Create the header block - std::vector structure_vals; - boost::split(structure_vals, structure, boost::algorithm::is_any_of(" ,"), boost::algorithm::token_compress_on); - - if (structure_vals.size() % 2 != 0) - throw Exception("Odd number of elements in section structure: must be a list of name type pairs", ErrorCodes::LOGICAL_ERROR); + SharedContextHolder shared_context = Context::createShared(); + auto context = Context::createGlobal(shared_context.get()); + auto context_const = WithContext(context).getContext(); + context->makeGlobalContext(); Block header; - const DataTypeFactory & data_type_factory = DataTypeFactory::instance(); - for (size_t i = 0, size = structure_vals.size(); i < size; i += 2) + ColumnsDescription schema_columns; + + if (structure.empty()) + { + ReadBufferIterator read_buffer_iterator = [&](ColumnsDescription &) + { + auto file = std::make_unique(STDIN_FILENO); + + /// stdin must be seekable + auto res = lseek(file->getFD(), 0, SEEK_SET); + if (-1 == res) + throwFromErrno("Input must be seekable file (it will be read twice).", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + + return file; + }; + + schema_columns = readSchemaFromFormat(input_format, {}, read_buffer_iterator, false, context_const); + } + else + { + schema_columns = parseColumnsListFromString(structure, context_const); + } + + auto schema_columns_info = schema_columns.getOrdinary(); + + for (auto & info : schema_columns_info) { ColumnWithTypeAndName column; - column.name = structure_vals[i]; - column.type = data_type_factory.get(structure_vals[i + 1]); + column.name = info.name; + column.type = info.type; column.column = column.type->createColumn(); header.insert(std::move(column)); } - SharedContextHolder shared_context = Context::createShared(); - auto context = Context::createGlobal(shared_context.get()); - context->makeGlobalContext(); - ReadBufferFromFileDescriptor file_in(STDIN_FILENO); WriteBufferFromFileDescriptor file_out(STDOUT_FILENO); - if (load_from_file.empty()) + if (load_from_file.empty() || structure.empty()) { /// stdin must be seekable auto res = lseek(file_in.getFD(), 0, SEEK_SET); diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index d788270ecf9..93df877ab8e 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1036,7 +1036,7 @@ int Server::main(const std::vector & /*args*/) try { LOG_DEBUG( - log, "Initiailizing merge tree metadata cache lru_cache_size:{} continue_if_corrupted:{}", size, continue_if_corrupted); + log, "Initializing merge tree metadata cache lru_cache_size:{} continue_if_corrupted:{}", size, continue_if_corrupted); global_context->initializeMergeTreeMetadataCache(path_str + "/" + "rocksdb", size); } catch (...) @@ -1089,7 +1089,7 @@ int Server::main(const std::vector & /*args*/) } } - LOG_DEBUG(log, "Initiailizing interserver credentials."); + LOG_DEBUG(log, "Initializing interserver credentials."); global_context->updateInterserverCredentials(config()); if (config().has("macros")) diff --git a/src/AggregateFunctions/ThetaSketchData.h b/src/AggregateFunctions/ThetaSketchData.h index f46836ad189..cd17719a45a 100644 --- a/src/AggregateFunctions/ThetaSketchData.h +++ b/src/AggregateFunctions/ThetaSketchData.h @@ -9,6 +9,8 @@ #include #include #include +#include +#include namespace DB @@ -80,6 +82,58 @@ public: u->update(rhs.sk_union->get_result()); } + void intersect(const ThetaSketchData & rhs) + { + datasketches::theta_union * u = getSkUnion(); + + if (sk_update) + { + u->update(*sk_update); + sk_update.reset(nullptr); + } + + datasketches::theta_intersection theta_intersection; + + theta_intersection.update(u->get_result()); + + if (rhs.sk_update) + theta_intersection.update(*rhs.sk_update); + else if (rhs.sk_union) + theta_intersection.update(rhs.sk_union->get_result()); + + sk_union.reset(nullptr); + u = getSkUnion(); + u->update(theta_intersection.get_result()); + } + + void aNotB(const ThetaSketchData & rhs) + { + datasketches::theta_union * u = getSkUnion(); + + if (sk_update) + { + u->update(*sk_update); + sk_update.reset(nullptr); + } + + datasketches::theta_a_not_b a_not_b; + + if (rhs.sk_update) + { + datasketches::compact_theta_sketch result = a_not_b.compute(u->get_result(), *rhs.sk_update); + sk_union.reset(nullptr); + u = getSkUnion(); + u->update(result); + } + else if (rhs.sk_union) + { + datasketches::compact_theta_sketch result = a_not_b.compute(u->get_result(), rhs.sk_union->get_result()); + sk_union.reset(nullptr); + u = getSkUnion(); + u->update(result); + } + } + /// You can only call for an empty object. void read(DB::ReadBuffer & in) { diff --git a/src/Common/ConcurrencyControl.h b/src/Common/ConcurrencyControl.h index 6f37bb45c84..72bf9899618 100644 --- a/src/Common/ConcurrencyControl.h +++ b/src/Common/ConcurrencyControl.h @@ -250,7 +250,7 @@ private: } } - SlotCount available(std::unique_lock &) + SlotCount available(std::unique_lock &) const { if (cur_concurrency < max_concurrency) return max_concurrency - cur_concurrency; diff --git a/src/Common/CurrentMemoryTracker.cpp b/src/Common/CurrentMemoryTracker.cpp index 921c244da21..720df07efb9 100644 --- a/src/Common/CurrentMemoryTracker.cpp +++ b/src/Common/CurrentMemoryTracker.cpp @@ -52,15 +52,10 @@ void CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded) if (current_thread) { Int64 will_be = current_thread->untracked_memory + size; - Int64 limit = current_thread->untracked_memory_limit + current_thread->untracked_memory_limit_increase; - if (will_be > limit) + if (will_be > current_thread->untracked_memory_limit) { - /// Increase limit before track. If tracker throws out-of-limit we would be able to alloc up to untracked_memory_limit bytes - /// more. It could be useful to enlarge Exception message in rethrow logic. - current_thread->untracked_memory_limit_increase = current_thread->untracked_memory_limit; memory_tracker->allocImpl(will_be, throw_if_memory_exceeded); - current_thread->untracked_memory_limit_increase = 0; current_thread->untracked_memory = 0; } else diff --git a/src/Common/OvercommitTracker.h b/src/Common/OvercommitTracker.h index 64fb6cdc926..598b877ef3c 100644 --- a/src/Common/OvercommitTracker.h +++ b/src/Common/OvercommitTracker.h @@ -61,7 +61,7 @@ enum class QueryCancellationState // Usually it's hard to set some reasonable hard memory limit // (especially, the default value). This class introduces new -// mechanisim for the limiting of memory usage. +// mechanism for the limiting of memory usage. // Soft limit represents guaranteed amount of memory query/user // may use. It's allowed to exceed this limit. But if hard limit // is reached, query with the biggest overcommit ratio @@ -82,7 +82,7 @@ protected: virtual void pickQueryToExcludeImpl() = 0; // This mutex is used to disallow concurrent access - // to picked_tracker and cancelation_state variables. + // to picked_tracker and cancellation_state variables. std::mutex overcommit_m; std::condition_variable cv; diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 5f6de294c51..519fd95a266 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -286,6 +286,18 @@ The server successfully detected this situation and will download merged part fr M(S3WriteRequestsThrottling, "Number of 429 and 503 errors in POST, DELETE, PUT and PATCH requests to S3 storage.") \ M(S3WriteRequestsRedirects, "Number of redirects in POST, DELETE, PUT and PATCH requests to S3 storage.") \ \ + M(DiskS3ReadMicroseconds, "Time of GET and HEAD requests to DiskS3 storage.") \ + M(DiskS3ReadRequestsCount, "Number of GET and HEAD requests to DiskS3 storage.") \ + M(DiskS3ReadRequestsErrors, "Number of non-throttling errors in GET and HEAD requests to DiskS3 storage.") \ + M(DiskS3ReadRequestsThrottling, "Number of 429 and 503 errors in GET and HEAD requests to DiskS3 storage.") \ + M(DiskS3ReadRequestsRedirects, "Number of redirects in GET and HEAD requests to DiskS3 storage.") \ + \ + M(DiskS3WriteMicroseconds, "Time of POST, DELETE, PUT and PATCH requests to DiskS3 storage.") \ + M(DiskS3WriteRequestsCount, "Number of POST, DELETE, PUT and PATCH requests to DiskS3 storage.") \ + M(DiskS3WriteRequestsErrors, "Number of non-throttling errors in POST, DELETE, PUT and PATCH requests to DiskS3 storage.") \ + M(DiskS3WriteRequestsThrottling, "Number of 429 and 503 errors in POST, DELETE, PUT and PATCH requests to DiskS3 storage.") \ + M(DiskS3WriteRequestsRedirects, "Number of redirects in POST, DELETE, PUT and PATCH requests to DiskS3 storage.") \ + \ M(ReadBufferFromS3Microseconds, "Time spend in reading from S3.") \ M(ReadBufferFromS3Bytes, "Bytes read from S3.") \ M(ReadBufferFromS3RequestsErrors, "Number of exceptions while reading from S3.") \ diff --git a/src/Common/Stopwatch.h b/src/Common/Stopwatch.h index 9e48c33f70d..cabc6d8ba1e 100644 --- a/src/Common/Stopwatch.h +++ b/src/Common/Stopwatch.h @@ -152,4 +152,3 @@ private: /// Most significant bit is a lock. When it is set, compareAndRestartDeferred method will return false. UInt64 nanoseconds(UInt64 prev_time) const { return clock_gettime_ns_adjusted(prev_time, clock_type) & 0x7FFFFFFFFFFFFFFFULL; } }; - diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index e4c28a5a615..b414a9bccf5 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -133,8 +133,6 @@ public: Int64 untracked_memory = 0; /// Each thread could new/delete memory in range of (-untracked_memory_limit, untracked_memory_limit) without access to common counters. Int64 untracked_memory_limit = 4 * 1024 * 1024; - /// Increase limit in case of exception. - Int64 untracked_memory_limit_increase = 0; /// Statistics of read and write rows/bytes Progress progress_in; diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index c6aa954688b..a94e367cd70 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -80,7 +80,7 @@ enum class Error : int32_t ZUNIMPLEMENTED = -6, /// Operation is unimplemented ZOPERATIONTIMEOUT = -7, /// Operation timeout ZBADARGUMENTS = -8, /// Invalid arguments - ZINVALIDSTATE = -9, /// Invliad zhandle state + ZINVALIDSTATE = -9, /// Invalid zhandle state /** API errors. * This is never thrown by the server, it shouldn't be used other than @@ -428,6 +428,12 @@ public: Exception(const Error code_, const std::string & path); /// NOLINT Exception(const Exception & exc); + template + Exception(const Error code_, fmt::format_string fmt, Args &&... args) + : Exception(fmt::format(fmt, std::forward(args)...), code_) + { + } + const char * name() const noexcept override { return "Coordination::Exception"; } const char * className() const noexcept override { return "Coordination::Exception"; } Exception * clone() const override { return new Exception(*this); } @@ -439,7 +445,7 @@ public: /** Usage scenario: * - create an object and issue commands; * - you provide callbacks for your commands; callbacks are invoked in internal thread and must be cheap: - * for example, just signal a condvar / fulfull a promise. + * for example, just signal a condvar / fulfill a promise. * - you also may provide callbacks for watches; they are also invoked in internal thread and must be cheap. * - whenever you receive exception with ZSESSIONEXPIRED code or method isExpired returns true, * the ZooKeeper instance is no longer usable - you may only destroy it and probably create another. diff --git a/src/Common/ZooKeeper/TestKeeper.cpp b/src/Common/ZooKeeper/TestKeeper.cpp index 3af5dfcc177..098dc522eeb 100644 --- a/src/Common/ZooKeeper/TestKeeper.cpp +++ b/src/Common/ZooKeeper/TestKeeper.cpp @@ -507,15 +507,15 @@ ResponsePtr TestKeeperSyncRequest::createResponse() const { return std::make_sha ResponsePtr TestKeeperMultiRequest::createResponse() const { return std::make_shared(); } -TestKeeper::TestKeeper(const String & root_path_, Poco::Timespan operation_timeout_) - : root_path(root_path_), operation_timeout(operation_timeout_) +TestKeeper::TestKeeper(const zkutil::ZooKeeperArgs & args_) + : args(args_) { container.emplace("/", Node()); - if (!root_path.empty()) + if (!args.chroot.empty()) { - if (root_path.back() == '/') - root_path.pop_back(); + if (args.chroot.back() == '/') + args.chroot.pop_back(); } processing_thread = ThreadFromGlobalPool([this] { processingThread(); }); @@ -547,7 +547,7 @@ void TestKeeper::processingThread() { RequestInfo info; - UInt64 max_wait = static_cast(operation_timeout.totalMilliseconds()); + UInt64 max_wait = static_cast(args.operation_timeout_ms); if (requests_queue.tryPop(info, max_wait)) { if (expired) @@ -556,7 +556,7 @@ void TestKeeper::processingThread() ++zxid; - info.request->addRootPath(root_path); + info.request->addRootPath(args.chroot); auto [response, _] = info.request->process(container, zxid); if (info.watch) @@ -580,7 +580,7 @@ void TestKeeper::processingThread() if (response->error == Error::ZOK) info.request->processWatches(watches, list_watches); - response->removeRootPath(root_path); + response->removeRootPath(args.chroot); if (info.callback) info.callback(*response); } @@ -689,7 +689,7 @@ void TestKeeper::pushRequest(RequestInfo && request) if (expired) throw Exception("Session expired", Error::ZSESSIONEXPIRED); - if (!requests_queue.tryPush(std::move(request), operation_timeout.totalMilliseconds())) + if (!requests_queue.tryPush(std::move(request), args.operation_timeout_ms)) throw Exception("Cannot push request to queue within operation timeout", Error::ZOPERATIONTIMEOUT); } catch (...) diff --git a/src/Common/ZooKeeper/TestKeeper.h b/src/Common/ZooKeeper/TestKeeper.h index 5fcd00b01b0..aad5131fcb5 100644 --- a/src/Common/ZooKeeper/TestKeeper.h +++ b/src/Common/ZooKeeper/TestKeeper.h @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -33,7 +34,7 @@ using TestKeeperRequestPtr = std::shared_ptr; class TestKeeper final : public IKeeper { public: - TestKeeper(const String & root_path_, Poco::Timespan operation_timeout_); + TestKeeper(const zkutil::ZooKeeperArgs & args_); ~TestKeeper() override; bool isExpired() const override { return expired; } @@ -123,10 +124,7 @@ private: Container container; - String root_path; - ACLs default_acls; - - Poco::Timespan operation_timeout; + zkutil::ZooKeeperArgs args; std::mutex push_request_mutex; std::atomic expired{false}; diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 96abf3b543a..6fcd3b52f16 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -6,20 +6,18 @@ #include #include +#include #include #include #include #include "Common/ZooKeeper/IKeeper.h" #include #include -#include #include #include -#define ZOOKEEPER_CONNECTION_TIMEOUT_MS 1000 - namespace fs = std::filesystem; namespace DB @@ -49,25 +47,19 @@ static void check(Coordination::Error code, const std::string & path) } -void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_, - int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const GetPriorityForLoadBalancing & get_priority_load_balancing_) -{ - log = &Poco::Logger::get("ZooKeeper"); - hosts = hosts_; - identity = identity_; - session_timeout_ms = session_timeout_ms_; - operation_timeout_ms = operation_timeout_ms_; - chroot = chroot_; - implementation = implementation_; - get_priority_load_balancing = get_priority_load_balancing_; +void ZooKeeper::init(ZooKeeperArgs args_) - if (implementation == "zookeeper") +{ + args = std::move(args_); + log = &Poco::Logger::get("ZooKeeper"); + + if (args.implementation == "zookeeper") { - if (hosts.empty()) + if (args.hosts.empty()) throw KeeperException("No hosts passed to ZooKeeper constructor.", Coordination::Error::ZBADARGUMENTS); Coordination::ZooKeeper::Nodes nodes; - nodes.reserve(hosts.size()); + nodes.reserve(args.hosts.size()); /// Shuffle the hosts to distribute the load among ZooKeeper nodes. std::vector shuffled_hosts = shuffleHosts(); @@ -108,33 +100,23 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_ throw KeeperException("Cannot use any of provided ZooKeeper nodes", Coordination::Error::ZBADARGUMENTS); } - impl = std::make_unique( - nodes, - chroot, - identity_.empty() ? "" : "digest", - identity_, - Poco::Timespan(0, session_timeout_ms_ * 1000), - Poco::Timespan(0, ZOOKEEPER_CONNECTION_TIMEOUT_MS * 1000), - Poco::Timespan(0, operation_timeout_ms_ * 1000), - zk_log); + impl = std::make_unique(nodes, args, zk_log); - if (chroot.empty()) - LOG_TRACE(log, "Initialized, hosts: {}", fmt::join(hosts, ",")); + if (args.chroot.empty()) + LOG_TRACE(log, "Initialized, hosts: {}", fmt::join(args.hosts, ",")); else - LOG_TRACE(log, "Initialized, hosts: {}, chroot: {}", fmt::join(hosts, ","), chroot); + LOG_TRACE(log, "Initialized, hosts: {}, chroot: {}", fmt::join(args.hosts, ","), args.chroot); } - else if (implementation == "testkeeper") + else if (args.implementation == "testkeeper") { - impl = std::make_unique( - chroot, - Poco::Timespan(0, operation_timeout_ms_ * 1000)); + impl = std::make_unique(args); } else { - throw DB::Exception("Unknown implementation of coordination service: " + implementation, DB::ErrorCodes::NOT_IMPLEMENTED); + throw DB::Exception("Unknown implementation of coordination service: " + args.implementation, DB::ErrorCodes::NOT_IMPLEMENTED); } - if (!chroot.empty()) + if (!args.chroot.empty()) { /// Here we check that zk root exists. /// This check is clumsy. The reason is we do this request under common mutex, and never want to hung here. @@ -144,7 +126,7 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_ /// This should not happen now, when memory tracker is disabled. /// But let's keep it just in case (it is also easy to backport). auto future = asyncExists("/"); - auto res = future.wait_for(std::chrono::milliseconds(operation_timeout_ms)); + auto res = future.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)); if (res != std::future_status::ready) throw KeeperException("Cannot check if zookeeper root exists.", Coordination::Error::ZOPERATIONTIMEOUT); @@ -153,18 +135,30 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_ throw KeeperException(code, "/"); if (code == Coordination::Error::ZNONODE) - throw KeeperException("ZooKeeper root doesn't exist. You should create root node " + chroot + " before start.", Coordination::Error::ZNONODE); + throw KeeperException("ZooKeeper root doesn't exist. You should create root node " + args.chroot + " before start.", Coordination::Error::ZNONODE); } } +ZooKeeper::ZooKeeper(const ZooKeeperArgs & args_, std::shared_ptr zk_log_) +{ + zk_log = std::move(zk_log_); + init(args_); +} + +ZooKeeper::ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr zk_log_) + : zk_log(std::move(zk_log_)) +{ + init(ZooKeeperArgs(config, config_name)); +} + std::vector ZooKeeper::shuffleHosts() const { - std::function get_priority = get_priority_load_balancing.getPriorityFunc(get_priority_load_balancing.load_balancing, 0, hosts.size()); + std::function get_priority = args.get_priority_load_balancing.getPriorityFunc(args.get_priority_load_balancing.load_balancing, 0, args.hosts.size()); std::vector shuffle_hosts; - for (size_t i = 0; i < hosts.size(); ++i) + for (size_t i = 0; i < args.hosts.size(); ++i) { ShuffleHost shuffle_host; - shuffle_host.host = hosts[i]; + shuffle_host.host = args.hosts[i]; if (get_priority) shuffle_host.priority = get_priority(i); shuffle_host.randomize(); @@ -181,125 +175,16 @@ std::vector ZooKeeper::shuffleHosts() const return shuffle_hosts; } -ZooKeeper::ZooKeeper(const std::string & hosts_string, const std::string & identity_, int32_t session_timeout_ms_, - int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_, - std::shared_ptr zk_log_, const GetPriorityForLoadBalancing & get_priority_load_balancing_) -{ - zk_log = std::move(zk_log_); - Strings hosts_strings; - splitInto<','>(hosts_strings, hosts_string); - - init(implementation_, hosts_strings, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_, get_priority_load_balancing_); -} - -ZooKeeper::ZooKeeper(const Strings & hosts_, const std::string & identity_, int32_t session_timeout_ms_, - int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_, - std::shared_ptr zk_log_, const GetPriorityForLoadBalancing & get_priority_load_balancing_) -{ - zk_log = std::move(zk_log_); - init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_, get_priority_load_balancing_); -} - -struct ZooKeeperArgs -{ - ZooKeeperArgs(const Poco::Util::AbstractConfiguration & config, const std::string & config_name) - { - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(config_name, keys); - - session_timeout_ms = Coordination::DEFAULT_SESSION_TIMEOUT_MS; - operation_timeout_ms = Coordination::DEFAULT_OPERATION_TIMEOUT_MS; - implementation = "zookeeper"; - for (const auto & key : keys) - { - if (startsWith(key, "node")) - { - hosts.push_back( - (config.getBool(config_name + "." + key + ".secure", false) ? "secure://" : "") + - config.getString(config_name + "." + key + ".host") + ":" - + config.getString(config_name + "." + key + ".port", "2181") - ); - } - else if (key == "session_timeout_ms") - { - session_timeout_ms = config.getInt(config_name + "." + key); - } - else if (key == "operation_timeout_ms") - { - operation_timeout_ms = config.getInt(config_name + "." + key); - } - else if (key == "identity") - { - identity = config.getString(config_name + "." + key); - } - else if (key == "root") - { - chroot = config.getString(config_name + "." + key); - } - else if (key == "implementation") - { - implementation = config.getString(config_name + "." + key); - } - else if (key == "zookeeper_load_balancing") - { - String load_balancing_str = config.getString(config_name + "." + key); - /// Use magic_enum to avoid dependency from dbms (`SettingFieldLoadBalancingTraits::fromString(...)`) - auto load_balancing = magic_enum::enum_cast(Poco::toUpper(load_balancing_str)); - if (!load_balancing) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unknown load balancing: {}", load_balancing_str); - get_priority_load_balancing.load_balancing = *load_balancing; - } - else - throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::Error::ZBADARGUMENTS); - } - - if (!chroot.empty()) - { - if (chroot.front() != '/') - throw KeeperException(std::string("Root path in config file should start with '/', but got ") + chroot, Coordination::Error::ZBADARGUMENTS); - if (chroot.back() == '/') - chroot.pop_back(); - } - - /// init get_priority_load_balancing - get_priority_load_balancing.hostname_differences.resize(hosts.size()); - const String & local_hostname = getFQDNOrHostName(); - for (size_t i = 0; i < hosts.size(); ++i) - { - const String & node_host = hosts[i].substr(0, hosts[i].find_last_of(':')); - get_priority_load_balancing.hostname_differences[i] = DB::getHostNameDifference(local_hostname, node_host); - } - } - - Strings hosts; - std::string identity; - int session_timeout_ms; - int operation_timeout_ms; - std::string chroot; - std::string implementation; - GetPriorityForLoadBalancing get_priority_load_balancing; -}; - -ZooKeeper::ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr zk_log_) - : zk_log(std::move(zk_log_)) -{ - ZooKeeperArgs args(config, config_name); - init(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot, args.get_priority_load_balancing); -} bool ZooKeeper::configChanged(const Poco::Util::AbstractConfiguration & config, const std::string & config_name) const { - ZooKeeperArgs args(config, config_name); + ZooKeeperArgs new_args(config, config_name); // skip reload testkeeper cause it's for test and data in memory - if (args.implementation == implementation && implementation == "testkeeper") + if (new_args.implementation == args.implementation && args.implementation == "testkeeper") return false; - if (args.get_priority_load_balancing != get_priority_load_balancing) - return true; - - return std::tie(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot, args.get_priority_load_balancing) - != std::tie(implementation, hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, args.get_priority_load_balancing); + return args != new_args; } @@ -318,7 +203,7 @@ Coordination::Error ZooKeeper::getChildrenImpl(const std::string & path, Strings { auto future_result = asyncTryGetChildrenNoThrow(path, watch_callback, list_request_type); - if (future_result.wait_for(std::chrono::milliseconds(operation_timeout_ms)) != std::future_status::ready) + if (future_result.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready) { impl->finalize(fmt::format("Operation timeout on {} {}", toString(Coordination::OpNum::List), path)); return Coordination::Error::ZOPERATIONTIMEOUT; @@ -385,7 +270,7 @@ Coordination::Error ZooKeeper::createImpl(const std::string & path, const std::s { auto future_result = asyncTryCreateNoThrow(path, data, mode); - if (future_result.wait_for(std::chrono::milliseconds(operation_timeout_ms)) != std::future_status::ready) + if (future_result.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready) { impl->finalize(fmt::format("Operation timeout on {} {}", toString(Coordination::OpNum::Create), path)); return Coordination::Error::ZOPERATIONTIMEOUT; @@ -455,7 +340,7 @@ Coordination::Error ZooKeeper::removeImpl(const std::string & path, int32_t vers auto future_result = asyncTryRemoveNoThrow(path, version); - if (future_result.wait_for(std::chrono::milliseconds(operation_timeout_ms)) != std::future_status::ready) + if (future_result.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready) { impl->finalize(fmt::format("Operation timeout on {} {}", toString(Coordination::OpNum::Remove), path)); return Coordination::Error::ZOPERATIONTIMEOUT; @@ -487,7 +372,7 @@ Coordination::Error ZooKeeper::existsImpl(const std::string & path, Coordination { auto future_result = asyncTryExistsNoThrow(path, watch_callback); - if (future_result.wait_for(std::chrono::milliseconds(operation_timeout_ms)) != std::future_status::ready) + if (future_result.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready) { impl->finalize(fmt::format("Operation timeout on {} {}", toString(Coordination::OpNum::Exists), path)); return Coordination::Error::ZOPERATIONTIMEOUT; @@ -521,7 +406,7 @@ Coordination::Error ZooKeeper::getImpl(const std::string & path, std::string & r { auto future_result = asyncTryGetNoThrow(path, watch_callback); - if (future_result.wait_for(std::chrono::milliseconds(operation_timeout_ms)) != std::future_status::ready) + if (future_result.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready) { impl->finalize(fmt::format("Operation timeout on {} {}", toString(Coordination::OpNum::Get), path)); return Coordination::Error::ZOPERATIONTIMEOUT; @@ -593,7 +478,7 @@ Coordination::Error ZooKeeper::setImpl(const std::string & path, const std::stri { auto future_result = asyncTrySetNoThrow(path, data, version); - if (future_result.wait_for(std::chrono::milliseconds(operation_timeout_ms)) != std::future_status::ready) + if (future_result.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready) { impl->finalize(fmt::format("Operation timeout on {} {}", toString(Coordination::OpNum::Set), path)); return Coordination::Error::ZOPERATIONTIMEOUT; @@ -645,7 +530,7 @@ Coordination::Error ZooKeeper::multiImpl(const Coordination::Requests & requests auto future_result = asyncTryMultiNoThrow(requests); - if (future_result.wait_for(std::chrono::milliseconds(operation_timeout_ms)) != std::future_status::ready) + if (future_result.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready) { impl->finalize(fmt::format("Operation timeout on {} {}", toString(Coordination::OpNum::Multi), requests[0]->getPath())); return Coordination::Error::ZOPERATIONTIMEOUT; @@ -679,7 +564,7 @@ Coordination::Error ZooKeeper::syncImpl(const std::string & path, std::string & { auto future_result = asyncTrySyncNoThrow(path); - if (future_result.wait_for(std::chrono::milliseconds(operation_timeout_ms)) != std::future_status::ready) + if (future_result.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready) { impl->finalize(fmt::format("Operation timeout on {} {}", toString(Coordination::OpNum::Sync), path)); return Coordination::Error::ZOPERATIONTIMEOUT; @@ -884,7 +769,7 @@ void ZooKeeper::waitForEphemeralToDisappearIfAny(const std::string & path) if (!tryGet(path, content, nullptr, eph_node_disappeared)) return; - int32_t timeout_ms = 3 * session_timeout_ms; + int32_t timeout_ms = 3 * args.session_timeout_ms; if (!eph_node_disappeared->tryWait(timeout_ms)) throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Ephemeral node {} still exists after {}s, probably it's owned by someone else. " @@ -894,7 +779,7 @@ void ZooKeeper::waitForEphemeralToDisappearIfAny(const std::string & path) ZooKeeperPtr ZooKeeper::startNewSession() const { - return std::make_shared(hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, implementation, zk_log, get_priority_load_balancing); + return std::make_shared(args, zk_log); } diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index c9b5dc69499..12aa4471ad7 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include @@ -72,24 +72,11 @@ using GetPriorityForLoadBalancing = DB::GetPriorityForLoadBalancing; class ZooKeeper { public: + using Ptr = std::shared_ptr; - /// hosts_string -- comma separated [secure://]host:port list - explicit ZooKeeper(const std::string & hosts_string, const std::string & identity_ = "", - int32_t session_timeout_ms_ = Coordination::DEFAULT_SESSION_TIMEOUT_MS, - int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS, - const std::string & chroot_ = "", - const std::string & implementation_ = "zookeeper", - std::shared_ptr zk_log_ = nullptr, - const GetPriorityForLoadBalancing & get_priority_load_balancing_ = {}); + ZooKeeper(const ZooKeeperArgs & args_, std::shared_ptr zk_log_ = nullptr); - explicit ZooKeeper(const Strings & hosts_, const std::string & identity_ = "", - int32_t session_timeout_ms_ = Coordination::DEFAULT_SESSION_TIMEOUT_MS, - int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS, - const std::string & chroot_ = "", - const std::string & implementation_ = "zookeeper", - std::shared_ptr zk_log_ = nullptr, - const GetPriorityForLoadBalancing & get_priority_load_balancing_ = {}); /** Config of the form: @@ -337,8 +324,7 @@ public: private: friend class EphemeralNodeHolder; - void init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_, - int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const GetPriorityForLoadBalancing & get_priority_load_balancing_); + void init(ZooKeeperArgs args_); /// The following methods don't any throw exceptions but return error codes. Coordination::Error createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & path_created); @@ -358,20 +344,13 @@ private: std::unique_ptr impl; - Strings hosts; - std::string identity; - int32_t session_timeout_ms; - int32_t operation_timeout_ms; - std::string chroot; - std::string implementation; + ZooKeeperArgs args; std::mutex mutex; Poco::Logger * log = nullptr; std::shared_ptr zk_log; - GetPriorityForLoadBalancing get_priority_load_balancing; - AtomicStopwatch session_uptime; }; diff --git a/src/Common/ZooKeeper/ZooKeeperArgs.cpp b/src/Common/ZooKeeper/ZooKeeperArgs.cpp new file mode 100644 index 00000000000..fe2f6957490 --- /dev/null +++ b/src/Common/ZooKeeper/ZooKeeperArgs.cpp @@ -0,0 +1,108 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} +} + +namespace zkutil +{ + +ZooKeeperArgs::ZooKeeperArgs(const Poco::Util::AbstractConfiguration & config, const String & config_name) +{ + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(config_name, keys); + + for (const auto & key : keys) + { + if (key.starts_with("node")) + { + hosts.push_back( + (config.getBool(config_name + "." + key + ".secure", false) ? "secure://" : "") + + config.getString(config_name + "." + key + ".host") + ":" + config.getString(config_name + "." + key + ".port", "2181")); + } + else if (key == "session_timeout_ms") + { + session_timeout_ms = config.getInt(config_name + "." + key); + } + else if (key == "operation_timeout_ms") + { + operation_timeout_ms = config.getInt(config_name + "." + key); + } + else if (key == "connection_timeout_ms") + { + connection_timeout_ms = config.getInt(config_name + "." + key); + } + else if (key == "send_fault_probability") + { + send_fault_probability = config.getDouble(config_name + "." + key); + } + else if (key == "recv_fault_probability") + { + recv_fault_probability = config.getDouble(config_name + "." + key); + } + else if (key == "identity") + { + identity = config.getString(config_name + "." + key); + if (!identity.empty()) + auth_scheme = "digest"; + } + else if (key == "root") + { + chroot = config.getString(config_name + "." + key); + } + else if (key == "implementation") + { + implementation = config.getString(config_name + "." + key); + } + else if (key == "zookeeper_load_balancing") + { + String load_balancing_str = config.getString(config_name + "." + key); + /// Use magic_enum to avoid dependency from dbms (`SettingFieldLoadBalancingTraits::fromString(...)`) + auto load_balancing = magic_enum::enum_cast(Poco::toUpper(load_balancing_str)); + if (!load_balancing) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unknown load balancing: {}", load_balancing_str); + get_priority_load_balancing.load_balancing = *load_balancing; + } + else + throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::Error::ZBADARGUMENTS); + } + + if (!chroot.empty()) + { + if (chroot.front() != '/') + throw KeeperException( + Coordination::Error::ZBADARGUMENTS, + "Root path in config file should start with '/', but got {}", chroot); + if (chroot.back() == '/') + chroot.pop_back(); + } + + if (session_timeout_ms < 0 || operation_timeout_ms < 0 || connection_timeout_ms < 0) + throw KeeperException("Timeout cannot be negative", Coordination::Error::ZBADARGUMENTS); + + /// init get_priority_load_balancing + get_priority_load_balancing.hostname_differences.resize(hosts.size()); + const String & local_hostname = getFQDNOrHostName(); + for (size_t i = 0; i < hosts.size(); ++i) + { + const String & node_host = hosts[i].substr(0, hosts[i].find_last_of(':')); + get_priority_load_balancing.hostname_differences[i] = DB::getHostNameDifference(local_hostname, node_host); + } +} + +ZooKeeperArgs::ZooKeeperArgs(const String & hosts_string) +{ + splitInto<','>(hosts, hosts_string); +} + +} diff --git a/src/Common/ZooKeeper/ZooKeeperArgs.h b/src/Common/ZooKeeper/ZooKeeperArgs.h new file mode 100644 index 00000000000..b5c7b293506 --- /dev/null +++ b/src/Common/ZooKeeper/ZooKeeperArgs.h @@ -0,0 +1,37 @@ +#pragma once +#include +#include +#include + +namespace Poco::Util +{ + class AbstractConfiguration; +} + +namespace zkutil +{ + +struct ZooKeeperArgs +{ + ZooKeeperArgs(const Poco::Util::AbstractConfiguration & config, const String & config_name); + + /// hosts_string -- comma separated [secure://]host:port list + ZooKeeperArgs(const String & hosts_string); + ZooKeeperArgs() = default; + bool operator == (const ZooKeeperArgs &) const = default; + + String implementation = "zookeeper"; + Strings hosts; + String auth_scheme; + String identity; + String chroot; + int32_t connection_timeout_ms = Coordination::DEFAULT_CONNECTION_TIMEOUT_MS; + int32_t session_timeout_ms = Coordination::DEFAULT_SESSION_TIMEOUT_MS; + int32_t operation_timeout_ms = Coordination::DEFAULT_OPERATION_TIMEOUT_MS; + float send_fault_probability = 0; + float recv_fault_probability = 0; + + DB::GetPriorityForLoadBalancing get_priority_load_balancing; +}; + +} diff --git a/src/Common/ZooKeeper/ZooKeeperConstants.h b/src/Common/ZooKeeper/ZooKeeperConstants.h index 44f8437f12c..4066407dc59 100644 --- a/src/Common/ZooKeeper/ZooKeeperConstants.h +++ b/src/Common/ZooKeeper/ZooKeeperConstants.h @@ -56,5 +56,6 @@ static constexpr int32_t DEFAULT_SESSION_TIMEOUT_MS = 30000; static constexpr int32_t DEFAULT_MIN_SESSION_TIMEOUT_MS = 10000; static constexpr int32_t DEFAULT_MAX_SESSION_TIMEOUT_MS = 100000; static constexpr int32_t DEFAULT_OPERATION_TIMEOUT_MS = 10000; +static constexpr int32_t DEFAULT_CONNECTION_TIMEOUT_MS = 1000; } diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 1d0f1fdb1a2..ece6ce7513a 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -276,15 +276,15 @@ void ZooKeeper::read(T & x) Coordination::read(x, *in); } -static void removeRootPath(String & path, const String & root_path) +static void removeRootPath(String & path, const String & chroot) { - if (root_path.empty()) + if (chroot.empty()) return; - if (path.size() <= root_path.size()) - throw Exception("Received path is not longer than root_path", Error::ZDATAINCONSISTENCY); + if (path.size() <= chroot.size()) + throw Exception(Error::ZDATAINCONSISTENCY, "Received path is not longer than chroot"); - path = path.substr(root_path.size()); + path = path.substr(chroot.size()); } ZooKeeper::~ZooKeeper() @@ -308,27 +308,20 @@ ZooKeeper::~ZooKeeper() ZooKeeper::ZooKeeper( const Nodes & nodes, - const String & root_path_, - const String & auth_scheme, - const String & auth_data, - Poco::Timespan session_timeout_, - Poco::Timespan connection_timeout, - Poco::Timespan operation_timeout_, + const zkutil::ZooKeeperArgs & args_, std::shared_ptr zk_log_) - : root_path(root_path_), - session_timeout(session_timeout_), - operation_timeout(std::min(operation_timeout_, session_timeout_)) + : args(args_) { log = &Poco::Logger::get("ZooKeeperClient"); std::atomic_store(&zk_log, std::move(zk_log_)); - if (!root_path.empty()) + if (!args.chroot.empty()) { - if (root_path.back() == '/') - root_path.pop_back(); + if (args.chroot.back() == '/') + args.chroot.pop_back(); } - if (auth_scheme.empty()) + if (args.auth_scheme.empty()) { ACL acl; acl.permissions = ACL::All; @@ -345,10 +338,22 @@ ZooKeeper::ZooKeeper( default_acls.emplace_back(std::move(acl)); } - connect(nodes, connection_timeout); - if (!auth_scheme.empty()) - sendAuth(auth_scheme, auth_data); + /// It makes sense (especially, for async requests) to inject a fault in two places: + /// pushRequest (before request is sent) and receiveEvent (after request was executed). + if (0 < args.send_fault_probability && args.send_fault_probability <= 1) + { + send_inject_fault.emplace(args.send_fault_probability); + } + if (0 < args.recv_fault_probability && args.recv_fault_probability <= 1) + { + recv_inject_fault.emplace(args.recv_fault_probability); + } + + connect(nodes, args.connection_timeout_ms * 1000); + + if (!args.auth_scheme.empty()) + sendAuth(args.auth_scheme, args.identity); send_thread = ThreadFromGlobalPool([this] { sendThread(); }); receive_thread = ThreadFromGlobalPool([this] { receiveThread(); }); @@ -364,7 +369,7 @@ void ZooKeeper::connect( Poco::Timespan connection_timeout) { if (nodes.empty()) - throw Exception("No nodes passed to ZooKeeper constructor", Error::ZBADARGUMENTS); + throw Exception(Error::ZBADARGUMENTS, "No nodes passed to ZooKeeper constructor"); static constexpr size_t num_tries = 3; bool connected = false; @@ -394,8 +399,8 @@ void ZooKeeper::connect( socket.connect(node.address, connection_timeout); socket_address = socket.peerAddress(); - socket.setReceiveTimeout(operation_timeout); - socket.setSendTimeout(operation_timeout); + socket.setReceiveTimeout(args.operation_timeout_ms * 1000); + socket.setSendTimeout(args.operation_timeout_ms * 1000); socket.setNoDelay(true); in.emplace(socket); @@ -453,7 +458,7 @@ void ZooKeeper::connect( } message << fail_reasons.str() << "\n"; - throw Exception(message.str(), Error::ZCONNECTIONLOSS); + throw Exception(Error::ZCONNECTIONLOSS, message.str()); } else { @@ -466,7 +471,7 @@ void ZooKeeper::sendHandshake() { int32_t handshake_length = 44; int64_t last_zxid_seen = 0; - int32_t timeout = session_timeout.totalMilliseconds(); + int32_t timeout = args.session_timeout_ms; int64_t previous_session_id = 0; /// We don't support session restore. So previous session_id is always zero. constexpr int32_t passwd_len = 16; std::array passwd {}; @@ -491,7 +496,7 @@ void ZooKeeper::receiveHandshake() read(handshake_length); if (handshake_length != SERVER_HANDSHAKE_LENGTH) - throw Exception("Unexpected handshake length received: " + DB::toString(handshake_length), Error::ZMARSHALLINGERROR); + throw Exception(Error::ZMARSHALLINGERROR, "Unexpected handshake length received: {}", handshake_length); read(protocol_version_read); if (protocol_version_read != ZOOKEEPER_PROTOCOL_VERSION) @@ -500,15 +505,15 @@ void ZooKeeper::receiveHandshake() /// It's better for faster failover than just connection drop. /// Implemented in clickhouse-keeper. if (protocol_version_read == KEEPER_PROTOCOL_VERSION_CONNECTION_REJECT) - throw Exception("Keeper server rejected the connection during the handshake. Possibly it's overloaded, doesn't see leader or stale", Error::ZCONNECTIONLOSS); + throw Exception(Error::ZCONNECTIONLOSS, "Keeper server rejected the connection during the handshake. Possibly it's overloaded, doesn't see leader or stale"); else - throw Exception("Unexpected protocol version: " + DB::toString(protocol_version_read), Error::ZMARSHALLINGERROR); + throw Exception(Error::ZMARSHALLINGERROR, "Unexpected protocol version: {}", protocol_version_read); } read(timeout); - if (timeout != session_timeout.totalMilliseconds()) + if (timeout != args.session_timeout_ms) /// Use timeout from server. - session_timeout = timeout * Poco::Timespan::MILLISECONDS; + args.session_timeout_ms = timeout; read(session_id); read(passwd); @@ -535,17 +540,15 @@ void ZooKeeper::sendAuth(const String & scheme, const String & data) read(err); if (read_xid != AUTH_XID) - throw Exception("Unexpected event received in reply to auth request: " + DB::toString(read_xid), - Error::ZMARSHALLINGERROR); + throw Exception(Error::ZMARSHALLINGERROR, "Unexpected event received in reply to auth request: {}", read_xid); int32_t actual_length = in->count() - count_before_event; if (length != actual_length) - throw Exception("Response length doesn't match. Expected: " + DB::toString(length) + ", actual: " + DB::toString(actual_length), - Error::ZMARSHALLINGERROR); + throw Exception(Error::ZMARSHALLINGERROR, "Response length doesn't match. Expected: {}, actual: {}", length, actual_length); if (err != Error::ZOK) - throw Exception("Error received in reply to auth request. Code: " + DB::toString(static_cast(err)) + ". Message: " + String(errorMessage(err)), - Error::ZMARSHALLINGERROR); + throw Exception(Error::ZMARSHALLINGERROR, "Error received in reply to auth request. Code: {}. Message: {}", + static_cast(err), errorMessage(err)); } @@ -562,14 +565,14 @@ void ZooKeeper::sendThread() auto prev_bytes_sent = out->count(); auto now = clock::now(); - auto next_heartbeat_time = prev_heartbeat_time + std::chrono::milliseconds(session_timeout.totalMilliseconds() / 3); + auto next_heartbeat_time = prev_heartbeat_time + std::chrono::milliseconds(args.session_timeout_ms / 3); if (next_heartbeat_time > now) { /// Wait for the next request in queue. No more than operation timeout. No more than until next heartbeat time. UInt64 max_wait = std::min( static_cast(std::chrono::duration_cast(next_heartbeat_time - now).count()), - static_cast(operation_timeout.totalMilliseconds())); + static_cast(args.operation_timeout_ms)); RequestInfo info; if (requests_queue.tryPop(info, max_wait)) @@ -594,7 +597,7 @@ void ZooKeeper::sendThread() break; } - info.request->addRootPath(root_path); + info.request->addRootPath(args.chroot); info.request->probably_sent = true; info.request->write(*out); @@ -633,13 +636,13 @@ void ZooKeeper::receiveThread() try { - Int64 waited = 0; + Int64 waited_us = 0; while (!requests_queue.isFinished()) { auto prev_bytes_received = in->count(); clock::time_point now = clock::now(); - UInt64 max_wait = operation_timeout.totalMicroseconds(); + UInt64 max_wait_us = args.operation_timeout_ms * 1000; std::optional earliest_operation; { @@ -648,30 +651,32 @@ void ZooKeeper::receiveThread() { /// Operations are ordered by xid (and consequently, by time). earliest_operation = operations.begin()->second; - auto earliest_operation_deadline = earliest_operation->time + std::chrono::microseconds(operation_timeout.totalMicroseconds()); + auto earliest_operation_deadline = earliest_operation->time + std::chrono::microseconds(args.operation_timeout_ms * 1000); if (now > earliest_operation_deadline) - throw Exception("Operation timeout (deadline already expired) for path: " + earliest_operation->request->getPath(), Error::ZOPERATIONTIMEOUT); - max_wait = std::chrono::duration_cast(earliest_operation_deadline - now).count(); + throw Exception(Error::ZOPERATIONTIMEOUT, "Operation timeout (deadline already expired) for path: {}", + earliest_operation->request->getPath()); + max_wait_us = std::chrono::duration_cast(earliest_operation_deadline - now).count(); } } - if (in->poll(max_wait)) + if (in->poll(max_wait_us)) { if (requests_queue.isFinished()) break; receiveEvent(); - waited = 0; + waited_us = 0; } else { if (earliest_operation) { - throw Exception("Operation timeout (no response) for request " + toString(earliest_operation->request->getOpNum()) + " for path: " + earliest_operation->request->getPath(), Error::ZOPERATIONTIMEOUT); + throw Exception(Error::ZOPERATIONTIMEOUT, "Operation timeout (no response) for request {} for path: {}", + earliest_operation->request->getOpNum(), earliest_operation->request->getPath()); } - waited += max_wait; - if (waited >= session_timeout.totalMicroseconds()) - throw Exception("Nothing is received in session timeout", Error::ZOPERATIONTIMEOUT); + waited_us += max_wait_us; + if (waited_us >= args.session_timeout_ms * 1000) + throw Exception(Error::ZOPERATIONTIMEOUT, "Nothing is received in session timeout"); } @@ -703,10 +708,13 @@ void ZooKeeper::receiveEvent() ZooKeeperResponsePtr response; UInt64 elapsed_ms = 0; + if (unlikely(recv_inject_fault) && recv_inject_fault.value()(thread_local_rng)) + throw Exception(Error::ZSESSIONEXPIRED, "Session expired (fault injected on recv)"); + if (xid == PING_XID) { if (err != Error::ZOK) - throw Exception("Received error in heartbeat response: " + String(errorMessage(err)), Error::ZRUNTIMEINCONSISTENCY); + throw Exception(Error::ZRUNTIMEINCONSISTENCY, "Received error in heartbeat response: {}", errorMessage(err)); response = std::make_shared(); } @@ -781,7 +789,7 @@ void ZooKeeper::receiveEvent() else { response->readImpl(*in); - response->removeRootPath(root_path); + response->removeRootPath(args.chroot); } /// Instead of setting the watch in sendEvent, set it in receiveEvent because need to check the response. /// The watch shouldn't be set if the node does not exist and it will never exist like sequential ephemeral nodes. @@ -801,9 +809,9 @@ void ZooKeeper::receiveEvent() { CurrentMetrics::add(CurrentMetrics::ZooKeeperWatch); - /// The key of wathces should exclude the root_path + /// The key of wathces should exclude the args.chroot String req_path = request_info.request->getPath(); - removeRootPath(req_path, root_path); + removeRootPath(req_path, args.chroot); std::lock_guard lock(watches_mutex); watches[req_path].emplace_back(std::move(request_info.watch)); } @@ -811,7 +819,7 @@ void ZooKeeper::receiveEvent() int32_t actual_length = in->count() - count_before_event; if (length != actual_length) - throw Exception("Response length doesn't match. Expected: " + DB::toString(length) + ", actual: " + DB::toString(actual_length), Error::ZMARSHALLINGERROR); + throw Exception(Error::ZMARSHALLINGERROR, "Response length doesn't match. Expected: {}, actual: {}", length, actual_length); logOperationIfNeeded(request_info.request, response, /* finalize= */ false, elapsed_ms); //-V614 } @@ -1035,9 +1043,9 @@ void ZooKeeper::pushRequest(RequestInfo && info) { info.request->xid = next_xid.fetch_add(1); if (info.request->xid == CLOSE_XID) - throw Exception("xid equal to close_xid", Error::ZSESSIONEXPIRED); + throw Exception(Error::ZSESSIONEXPIRED, "xid equal to close_xid"); if (info.request->xid < 0) - throw Exception("XID overflow", Error::ZSESSIONEXPIRED); + throw Exception(Error::ZSESSIONEXPIRED, "XID overflow"); if (auto * multi_request = dynamic_cast(info.request.get())) { @@ -1046,12 +1054,15 @@ void ZooKeeper::pushRequest(RequestInfo && info) } } - if (!requests_queue.tryPush(std::move(info), operation_timeout.totalMilliseconds())) + if (unlikely(send_inject_fault) && send_inject_fault.value()(thread_local_rng)) + throw Exception(Error::ZSESSIONEXPIRED, "Session expired (fault injected on send)"); + + if (!requests_queue.tryPush(std::move(info), args.operation_timeout_ms)) { if (requests_queue.isFinished()) - throw Exception("Session expired", Error::ZSESSIONEXPIRED); + throw Exception(Error::ZSESSIONEXPIRED, "Session expired"); - throw Exception("Cannot push request to queue within operation timeout", Error::ZOPERATIONTIMEOUT); + throw Exception(Error::ZOPERATIONTIMEOUT, "Cannot push request to queue within operation timeout"); } } catch (...) @@ -1079,7 +1090,7 @@ void ZooKeeper::initApiVersion() }; get(keeper_api_version_path, std::move(callback), {}); - if (future.wait_for(std::chrono::milliseconds(operation_timeout.totalMilliseconds())) != std::future_status::ready) + if (future.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready) { LOG_TRACE(log, "Failed to get API version: timeout"); return; @@ -1220,7 +1231,7 @@ void ZooKeeper::list( if (keeper_api_version < Coordination::KeeperApiVersion::WITH_FILTERED_LIST) { if (list_request_type != ListRequestType::ALL) - throw Exception("Filtered list request type cannot be used because it's not supported by the server", Error::ZBADARGUMENTS); + throw Exception(Error::ZBADARGUMENTS, "Filtered list request type cannot be used because it's not supported by the server"); request = std::make_shared(); } @@ -1299,8 +1310,8 @@ void ZooKeeper::close() RequestInfo request_info; request_info.request = std::make_shared(std::move(request)); - if (!requests_queue.tryPush(std::move(request_info), operation_timeout.totalMilliseconds())) - throw Exception("Cannot push close request to queue within operation timeout", Error::ZOPERATIONTIMEOUT); + if (!requests_queue.tryPush(std::move(request_info), args.operation_timeout_ms)) + throw Exception(Error::ZOPERATIONTIMEOUT, "Cannot push close request to queue within operation timeout"); ProfileEvents::increment(ProfileEvents::ZooKeeperClose); } diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h index e00250c1517..6b70f8bc753 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -27,6 +28,7 @@ #include #include #include +#include /** ZooKeeper C++ library, a replacement for libzookeeper. @@ -111,12 +113,7 @@ public: */ ZooKeeper( const Nodes & nodes, - const String & root_path, - const String & auth_scheme, - const String & auth_data, - Poco::Timespan session_timeout_, - Poco::Timespan connection_timeout, - Poco::Timespan operation_timeout_, + const zkutil::ZooKeeperArgs & args_, std::shared_ptr zk_log_); ~ZooKeeper() override; @@ -201,11 +198,12 @@ public: void setZooKeeperLog(std::shared_ptr zk_log_); private: - String root_path; ACLs default_acls; - Poco::Timespan session_timeout; - Poco::Timespan operation_timeout; + zkutil::ZooKeeperArgs args; + + std::optional send_inject_fault; + std::optional recv_inject_fault; Poco::Net::StreamSocket socket; /// To avoid excessive getpeername(2) calls. diff --git a/src/Common/ZooKeeper/examples/zkutil_test_async.cpp b/src/Common/ZooKeeper/examples/zkutil_test_async.cpp index 17258c529ff..eafa0e27691 100644 --- a/src/Common/ZooKeeper/examples/zkutil_test_async.cpp +++ b/src/Common/ZooKeeper/examples/zkutil_test_async.cpp @@ -5,7 +5,7 @@ int main(int argc, char ** argv) try { - zkutil::ZooKeeper zookeeper{"localhost:2181"}; + zkutil::ZooKeeper zookeeper{zkutil::ZooKeeperArgs("localhost:2181")}; auto nodes = zookeeper.getChildren("/tmp"); diff --git a/src/Common/ZooKeeper/examples/zkutil_test_commands.cpp b/src/Common/ZooKeeper/examples/zkutil_test_commands.cpp index 490c834eab9..095a0dde2e7 100644 --- a/src/Common/ZooKeeper/examples/zkutil_test_commands.cpp +++ b/src/Common/ZooKeeper/examples/zkutil_test_commands.cpp @@ -16,7 +16,7 @@ try return 1; } - ZooKeeper zk(argv[1], "", 5000); + ZooKeeper zk{zkutil::ZooKeeperArgs(argv[1])}; std::cout << "create path" << std::endl; zk.create("/test", "old", zkutil::CreateMode::Persistent); diff --git a/src/Common/ZooKeeper/examples/zkutil_test_commands_new_lib.cpp b/src/Common/ZooKeeper/examples/zkutil_test_commands_new_lib.cpp index 09b94a34b78..021f444386a 100644 --- a/src/Common/ZooKeeper/examples/zkutil_test_commands_new_lib.cpp +++ b/src/Common/ZooKeeper/examples/zkutil_test_commands_new_lib.cpp @@ -40,7 +40,8 @@ try } - ZooKeeper zk(nodes, {}, {}, {}, {5, 0}, {0, 50000}, {0, 50000}, nullptr); + zkutil::ZooKeeperArgs args; + ZooKeeper zk(nodes, args, nullptr); Poco::Event event(true); diff --git a/src/Compression/LZ4_decompress_faster.cpp b/src/Compression/LZ4_decompress_faster.cpp index 269c1a681c5..f2084f34274 100644 --- a/src/Compression/LZ4_decompress_faster.cpp +++ b/src/Compression/LZ4_decompress_faster.cpp @@ -478,11 +478,7 @@ template <> void inline copyOverlap<32, true>(UInt8 * op, const UInt8 *& match, /// See also https://stackoverflow.com/a/30669632 template -bool NO_INLINE decompressImpl( - const char * const source, - char * const dest, - size_t source_size, - size_t dest_size) +bool NO_INLINE decompressImpl(const char * const source, char * const dest, size_t source_size, size_t dest_size) { const UInt8 * ip = reinterpret_cast(source); UInt8 * op = reinterpret_cast(dest); @@ -515,6 +511,18 @@ bool NO_INLINE decompressImpl( const unsigned token = *ip++; length = token >> 4; + + UInt8 * copy_end; + size_t real_length; + + /// It might be true fairly often for well-compressed columns. + /// ATST it may hurt performance in other cases because this condition is hard to predict (especially if the number of zeros is ~50%). + /// In such cases this `if` will significantly increase number of mispredicted instructions. But seems like it results in a + /// noticeable slowdown only for implementations with `copy_amount` > 8. Probably because they use havier instructions. + if constexpr (copy_amount == 8) + if (length == 0) + goto decompress_match; + if (length == 0x0F) { if (unlikely(ip + 1 >= input_end)) @@ -524,7 +532,7 @@ bool NO_INLINE decompressImpl( /// Copy literals. - UInt8 * copy_end = op + length; + copy_end = op + length; /// input: Hello, world /// ^-ip @@ -541,7 +549,7 @@ bool NO_INLINE decompressImpl( return false; // Due to implementation specifics the copy length is always a multiple of copy_amount - size_t real_length = 0; + real_length = 0; static_assert(copy_amount == 8 || copy_amount == 16 || copy_amount == 32); if constexpr (copy_amount == 8) @@ -552,9 +560,9 @@ bool NO_INLINE decompressImpl( real_length = (((length >> 5) + 1) * 32); if (unlikely(ip + real_length >= input_end + ADDITIONAL_BYTES_AT_END_OF_BUFFER)) - return false; + return false; - wildCopy(op, ip, copy_end); /// Here we can write up to copy_amount - 1 bytes after buffer. + wildCopy(op, ip, copy_end); /// Here we can write up to copy_amount - 1 bytes after buffer. if (copy_end == output_end) return true; @@ -562,6 +570,8 @@ bool NO_INLINE decompressImpl( ip += length; op = copy_end; + decompress_match: + if (unlikely(ip + 1 >= input_end)) return false; diff --git a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp index b5347280497..96ae50bbbcf 100644 --- a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp @@ -29,7 +29,7 @@ ReadBufferFromAzureBlobStorage::ReadBufferFromAzureBlobStorage( size_t max_single_download_retries_, bool use_external_buffer_, size_t read_until_position_) - : ReadBufferFromFileBase(read_settings_.remote_fs_buffer_size, nullptr, 0) + : ReadBufferFromFileBase(use_external_buffer_ ? 0 : read_settings_.remote_fs_buffer_size, nullptr, 0) , blob_container_client(blob_container_client_) , path(path_) , max_single_read_retries(max_single_read_retries_) diff --git a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp index 3f7b378dee4..26947af23ec 100644 --- a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp @@ -1,6 +1,7 @@ #include "ReadIndirectBufferFromRemoteFS.h" #include +#include namespace DB @@ -13,8 +14,8 @@ namespace ErrorCodes ReadIndirectBufferFromRemoteFS::ReadIndirectBufferFromRemoteFS( - std::shared_ptr impl_) - : ReadBufferFromFileBase(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0) + std::shared_ptr impl_, const ReadSettings & settings) + : ReadBufferFromFileBase(settings.remote_fs_buffer_size, nullptr, 0) , impl(impl_) { } diff --git a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h index fcd463a92c8..996e69296a6 100644 --- a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h +++ b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h @@ -9,6 +9,7 @@ namespace DB { class ReadBufferFromRemoteFSGather; +struct ReadSettings; /** * Reads data from S3/HDFS/Web using stored paths in metadata. @@ -18,7 +19,7 @@ class ReadIndirectBufferFromRemoteFS : public ReadBufferFromFileBase { public: - explicit ReadIndirectBufferFromRemoteFS(std::shared_ptr impl_); + explicit ReadIndirectBufferFromRemoteFS(std::shared_ptr impl_, const ReadSettings & settings); off_t seek(off_t offset_, int whence) override; diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 40f68b86e9d..09e5c3d32dc 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -112,7 +112,7 @@ std::unique_ptr AzureObjectStorage::readObjects( /// NOL } else { - auto buf = std::make_unique(std::move(reader_impl)); + auto buf = std::make_unique(std::move(reader_impl), disk_read_settings); return std::make_unique(std::move(buf), settings_ptr->min_bytes_for_seek); } } diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 00ef01645cd..4e9dea7f481 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -253,6 +253,13 @@ void DiskObjectStorage::removeSharedFile(const String & path, bool delete_metada transaction->commit(); } +void DiskObjectStorage::removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) +{ + auto transaction = createObjectStorageTransaction(); + transaction->removeSharedFiles(files, keep_all_batch_data, file_names_remove_metadata_only); + transaction->commit(); +} + UInt32 DiskObjectStorage::getRefCount(const String & path) const { return metadata_storage->getHardlinkCount(path); diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index 34056f17b3c..14fb84d7a15 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -92,6 +92,8 @@ public: void removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override; + void removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override; + MetadataStoragePtr getMetadataStorage() override { return metadata_storage; } UInt32 getRefCount(const String & path) const override; diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp index 6aabf300cb8..5f376de34dc 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp @@ -5,7 +5,6 @@ #include #include - namespace DB { @@ -139,6 +138,87 @@ struct RemoveObjectStorageOperation final : public IDiskObjectStorageOperation } }; +struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperation +{ + RemoveBatchRequest remove_paths; + bool keep_all_batch_data; + NameSet file_names_remove_metadata_only; + StoredObjects objects_to_remove; + bool remove_from_cache = false; + + RemoveManyObjectStorageOperation( + IObjectStorage & object_storage_, + IMetadataStorage & metadata_storage_, + const RemoveBatchRequest & remove_paths_, + bool keep_all_batch_data_, + const NameSet & file_names_remove_metadata_only_) + : IDiskObjectStorageOperation(object_storage_, metadata_storage_) + , remove_paths(remove_paths_) + , keep_all_batch_data(keep_all_batch_data_) + , file_names_remove_metadata_only(file_names_remove_metadata_only_) + {} + + std::string getInfoForLog() const override + { + return fmt::format("RemoveManyObjectStorageOperation (paths size: {}, keep all batch {}, files to keep {})", remove_paths.size(), keep_all_batch_data, fmt::join(file_names_remove_metadata_only, ", ")); + } + + void execute(MetadataTransactionPtr tx) override + { + for (const auto & [path, if_exists] : remove_paths) + { + + if (!metadata_storage.exists(path)) + { + if (if_exists) + continue; + + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Metadata path '{}' doesn't exist", path); + } + + if (!metadata_storage.isFile(path)) + throw Exception(ErrorCodes::BAD_FILE_TYPE, "Path '{}' is not a regular file", path); + + try + { + uint32_t hardlink_count = metadata_storage.getHardlinkCount(path); + auto objects = metadata_storage.getStorageObjects(path); + + tx->unlinkMetadata(path); + + /// File is really redundant + if (hardlink_count == 0 && !keep_all_batch_data && !file_names_remove_metadata_only.contains(fs::path(path).filename())) + objects_to_remove.insert(objects_to_remove.end(), objects.begin(), objects.end()); + } + catch (const Exception & e) + { + /// If it's impossible to read meta - just remove it from FS. + if (e.code() == ErrorCodes::UNKNOWN_FORMAT + || e.code() == ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF + || e.code() == ErrorCodes::CANNOT_READ_ALL_DATA + || e.code() == ErrorCodes::CANNOT_OPEN_FILE) + { + tx->unlinkFile(path); + } + else + throw; + } + } + } + + void undo() override + { + + } + + void finalize() override + { + if (!objects_to_remove.empty()) + object_storage.removeObjects(objects_to_remove); + } +}; + + struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOperation { std::string path; @@ -480,14 +560,8 @@ void DiskObjectStorageTransaction::removeFileIfExists(const std::string & path) void DiskObjectStorageTransaction::removeSharedFiles( const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) { - for (const auto & file : files) - { - bool keep_file = keep_all_batch_data || file_names_remove_metadata_only.contains(fs::path(file.path).filename()); - if (file.if_exists) - removeSharedFileIfExists(file.path, keep_file); - else - removeSharedFile(file.path, keep_file); - } + auto operation = std::make_unique(object_storage, metadata_storage, files, keep_all_batch_data, file_names_remove_metadata_only); + operations_to_execute.emplace_back(std::move(operation)); } namespace diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index 2303401466d..2f82458ecd8 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -70,11 +70,12 @@ std::unique_ptr HDFSObjectStorage::readObjects( /// NOLI auto hdfs_path = path.substr(begin_of_path); auto hdfs_uri = path.substr(0, begin_of_path); - return std::make_unique(hdfs_uri, hdfs_path, config, disk_read_settings); + return std::make_unique( + hdfs_uri, hdfs_path, config, disk_read_settings, /* read_until_position */0, /* use_external_buffer */true); }; auto hdfs_impl = std::make_unique(std::move(read_buffer_creator), objects, disk_read_settings); - auto buf = std::make_unique(std::move(hdfs_impl)); + auto buf = std::make_unique(std::move(hdfs_impl), read_settings); return std::make_unique(std::move(buf), settings->min_bytes_for_seek); } diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index ccde7d20778..b3fa36ea169 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -31,6 +31,7 @@ #include #include + namespace DB { @@ -90,7 +91,19 @@ void logIfError(const Aws::Utils::Outcome & response, std::functi std::string S3ObjectStorage::generateBlobNameForPath(const std::string & /* path */) { - return getRandomASCIIString(32); + /// Path to store the new S3 object. + + /// Total length is 32 a-z characters for enough randomness. + /// First 3 characters are used as a prefix for + /// https://aws.amazon.com/premiumsupport/knowledge-center/s3-object-key-naming-pattern/ + + constexpr size_t key_name_total_size = 32; + constexpr size_t key_name_prefix_size = 3; + + /// Path to store new S3 object. + return fmt::format("{}/{}", + getRandomASCIIString(key_name_prefix_size), + getRandomASCIIString(key_name_total_size - key_name_prefix_size)); } Aws::S3::Model::HeadObjectOutcome S3ObjectStorage::requestObjectHeadData(const std::string & bucket_from, const std::string & key) const @@ -157,7 +170,7 @@ std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT } else { - auto buf = std::make_unique(std::move(s3_impl)); + auto buf = std::make_unique(std::move(s3_impl), disk_read_settings); return std::make_unique(std::move(buf), settings_ptr->min_bytes_for_seek); } } @@ -245,6 +258,8 @@ void S3ObjectStorage::removeObjectImpl(const StoredObject & object, bool if_exis auto outcome = client_ptr->DeleteObject(request); throwIfUnexpectedError(outcome, if_exists); + + LOG_TRACE(log, "Object with path {} was removed from S3", object.absolute_path); } void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_exists) @@ -288,6 +303,8 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e auto outcome = client_ptr->DeleteObjects(request); throwIfUnexpectedError(outcome, if_exists); + + LOG_TRACE(log, "Objects with paths [{}] were removed from S3", keys); } } } diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index 448826bfa71..ecbd8cc9aa1 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -12,6 +12,7 @@ #include #include #include +#include namespace DB @@ -180,6 +181,7 @@ private: const String version_id; + Poco::Logger * log = &Poco::Logger::get("S3ObjectStorage"); DataSourceDescription data_source_description; }; diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index b34eddf63f0..a93d95d91bd 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -116,7 +116,8 @@ std::unique_ptr getClient(const Poco::Util::AbstractConfigura S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( config.getString(config_prefix + ".region", ""), context->getRemoteHostFilter(), context->getGlobalContext()->getSettingsRef().s3_max_redirects, - context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging); + context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging, + /* for_disk_s3 = */ true); S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint"))); if (uri.key.back() != '/') diff --git a/src/Disks/ObjectStorages/StoredObject.h b/src/Disks/ObjectStorages/StoredObject.h index acb8a5fd127..d9faa766540 100644 --- a/src/Disks/ObjectStorages/StoredObject.h +++ b/src/Disks/ObjectStorages/StoredObject.h @@ -3,6 +3,7 @@ #include #include + namespace DB { diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp index 3c7ce47340d..b0fed4e001b 100644 --- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp @@ -188,7 +188,7 @@ std::unique_ptr WebObjectStorage::readObject( /// NOLINT } else { - auto buf = std::make_unique(std::move(web_impl)); + auto buf = std::make_unique(std::move(web_impl), read_settings); return std::make_unique(std::move(buf), min_bytes_for_seek); } } diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 1bc95b49dbe..0387cc86d48 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -92,6 +92,11 @@ list (APPEND OBJECT_LIBS $) add_subdirectory(array) list (APPEND OBJECT_LIBS $) +if (TARGET ch_contrib::datasketches) + add_subdirectory(UniqTheta) + list (APPEND OBJECT_LIBS $) +endif() + add_subdirectory(JSONPath) list (APPEND PRIVATE_LIBS clickhouse_functions_jsonpath) diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index 95af8a61aae..83b89b85b62 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -171,7 +171,7 @@ public: */ virtual bool isSuitableForConstantFolding() const { return true; } - /** If function isSuitableForConstantFolding then, this method will be called during query analyzis + /** If function isSuitableForConstantFolding then, this method will be called during query analysis * if some arguments are constants. For example logical functions (AndFunction, OrFunction) can * return they result based on some constant arguments. * Arguments are passed without modifications, useDefaultImplementationForNulls, useDefaultImplementationForNothing, @@ -394,7 +394,7 @@ private: using FunctionOverloadResolverPtr = std::shared_ptr; /// Old function interface. Check documentation in IFunction.h. -/// If client do not need statefull properties it can implement this interface. +/// If client do not need stateful properties it can implement this interface. class IFunction { public: diff --git a/src/Functions/UniqTheta/CMakeLists.txt b/src/Functions/UniqTheta/CMakeLists.txt new file mode 100644 index 00000000000..27e23eb3881 --- /dev/null +++ b/src/Functions/UniqTheta/CMakeLists.txt @@ -0,0 +1,9 @@ +include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") + +add_library(clickhouse_functions_uniqtheta FunctionsUniqTheta.cpp) + +target_link_libraries(clickhouse_functions_uniqtheta PRIVATE dbms) + +if (TARGET ch_contrib::datasketches) + target_link_libraries (clickhouse_functions_uniqtheta PRIVATE ch_contrib::datasketches) +endif () diff --git a/src/Functions/UniqTheta/FunctionsUniqTheta.cpp b/src/Functions/UniqTheta/FunctionsUniqTheta.cpp new file mode 100644 index 00000000000..aa280c0818e --- /dev/null +++ b/src/Functions/UniqTheta/FunctionsUniqTheta.cpp @@ -0,0 +1,68 @@ +#include + +#include "FunctionsUniqTheta.h" + +#if USE_DATASKETCHES + +namespace DB +{ + +REGISTER_FUNCTION(UniqTheta) +{ + factory.registerFunction( + { + R"( +Two uniqThetaSketch objects to do intersect calculation(set operation ∩), the result is a new uniqThetaSketch. + +A uniqThetaSketch object is to be constructed by aggregation function uniqTheta with -State. + +UniqThetaSketch is a data structure storage of approximate values set. +For more information on RoaringBitmap, see: [Theta Sketch Framework](https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html). + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "select finalizeAggregation(uniqThetaIntersect(arrayReduce('uniqThetaState',[1,2]), arrayReduce('uniqThetaState',[2,3,4])));"}}, + Documentation::Categories{"uniqTheta"} + }); + + factory.registerFunction( + { + R"( +Two uniqThetaSketch objects to do union calculation(set operation ∪), the result is a new uniqThetaSketch. + +A uniqThetaSketch object is to be constructed by aggregation function uniqTheta with -State. + +UniqThetaSketch is a data structure storage of approximate values set. +For more information on RoaringBitmap, see: [Theta Sketch Framework](https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html). + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "select finalizeAggregation(uniqThetaUnion(arrayReduce('uniqThetaState',[1,2]), arrayReduce('uniqThetaState',[2,3,4])));"}}, + Documentation::Categories{"uniqTheta"} + }); + factory.registerFunction( + { + R"( +Two uniqThetaSketch objects to do a_not_b calculation(set operation ×), the result is a new uniqThetaSketch. + +A uniqThetaSketch object is to be constructed by aggregation function uniqTheta with -State. + +UniqThetaSketch is a data structure storage of approximate values set. +For more information on RoaringBitmap, see: [Theta Sketch Framework](https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html). + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "select finalizeAggregation(uniqThetaNot(arrayReduce('uniqThetaState',[1,2]), arrayReduce('uniqThetaState',[2,3,4])));"}}, + Documentation::Categories{"uniqTheta"} + }); +} + +} + +#endif diff --git a/src/Functions/UniqTheta/FunctionsUniqTheta.h b/src/Functions/UniqTheta/FunctionsUniqTheta.h new file mode 100644 index 00000000000..7cdbf587cf7 --- /dev/null +++ b/src/Functions/UniqTheta/FunctionsUniqTheta.h @@ -0,0 +1,176 @@ +#pragma once + +#include + +#if USE_DATASKETCHES + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + + + namespace ErrorCodes + { + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + } + + struct UniqThetaIntersectImpl + { + static void apply(AggregateFunctionUniqThetaData & sketch_data_1, const AggregateFunctionUniqThetaData & sketch_data_2) + { + sketch_data_1.set.intersect(sketch_data_2.set); + } + }; + + struct UniqThetaUnionImpl + { + static void apply(AggregateFunctionUniqThetaData & sketch_data_1, const AggregateFunctionUniqThetaData & sketch_data_2) + { + sketch_data_1.set.merge(sketch_data_2.set); + } + }; + + struct UniqThetaNotImpl + { + static void apply(AggregateFunctionUniqThetaData & sketch_data_1, const AggregateFunctionUniqThetaData & sketch_data_2) + { + sketch_data_1.set.aNotB(sketch_data_2.set); + } + }; + + template + class FunctionUniqTheta : public IFunction + { + public: + static constexpr auto name = Name::name; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + bool isVariadic() const override { return false; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + size_t getNumberOfArguments() const override { return 2; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * sketch_type0 = typeid_cast(arguments[0].get()); + if (!(sketch_type0 && sketch_type0->getFunctionName() == "uniqTheta")) + throw Exception( + "First argument for function " + getName() + " must be a uniqTheta but it has type " + arguments[0]->getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + const auto * sketch_type1 = typeid_cast(arguments[1].get()); + if (!(sketch_type1 && sketch_type1->getFunctionName() == "uniqTheta")) + throw Exception( + "Second argument for function " + getName() + " must be a uniqTheta but it has type " + arguments[1]->getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + const DataTypes & arg_data_types0 = sketch_type0->getArgumentsDataTypes(); + const DataTypes & arg_data_types1 = sketch_type1->getArgumentsDataTypes(); + + if (arg_data_types0.size() != arg_data_types1.size()) + throw Exception( + "The nested type in uniqThetas must be the same length, but one is " + std::to_string(arg_data_types0.size()) + + ", and the other is " + std::to_string(arg_data_types1.size()), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + size_t types_size = arg_data_types0.size(); + for (size_t i = 0; i < types_size; ++i) + { + if (!arg_data_types0[i]->equals(*arg_data_types1[i])) + throw Exception( + "The " + std::to_string(i) + "th nested type in uniqThetas must be the same, but one is " + arg_data_types0[i]->getName() + + ", and the other is " + arg_data_types1[i]->getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + + return arguments[0]; + } + + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const ColumnAggregateFunction * column_ptrs[2]; + bool is_column_const[2]; + for (size_t i = 0; i < 2; ++i) + { + if (const auto * argument_column_const = typeid_cast(arguments[i].column.get())) + { + column_ptrs[i] = typeid_cast(argument_column_const->getDataColumnPtr().get()); + is_column_const[i] = true; + } + else + { + column_ptrs[i] = typeid_cast(arguments[i].column.get()); + is_column_const[i] = false; + } + } + + auto col_to = ColumnAggregateFunction::create(column_ptrs[0]->getAggregateFunction()); + + col_to->reserve(input_rows_count); + + const PaddedPODArray & container0 = column_ptrs[0]->getData(); + const PaddedPODArray & container1 = column_ptrs[1]->getData(); + + for (size_t i = 0; i < input_rows_count; ++i) + { + const AggregateDataPtr data_ptr_0 = is_column_const[0] ? container0[0] : container0[i]; + const AggregateDataPtr data_ptr_1 = is_column_const[1] ? container1[0] : container1[i]; + + col_to->insertFrom(data_ptr_0); + AggregateFunctionUniqThetaData & sketch_data_1 = *reinterpret_cast(col_to->getData()[i]); + const AggregateFunctionUniqThetaData & sketch_data_2 + = *reinterpret_cast(data_ptr_1); + Impl::apply(sketch_data_1, sketch_data_2); + } + return col_to; + } + }; + + struct NameUniqThetaIntersect + { + static constexpr auto name = "uniqThetaIntersect"; + }; + + struct NameUniqThetaUnion + { + static constexpr auto name = "uniqThetaUnion"; + }; + + struct NameUniqThetaNot + { + static constexpr auto name = "uniqThetaNot"; + }; + + using FunctionUniqThetaIntersect = FunctionUniqTheta; + using FunctionUniqThetaUnion = FunctionUniqTheta; + using FunctionUniqThetaNot = FunctionUniqTheta; + +} + + +#endif diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 7fb432eab22..f7fd06cac08 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -48,7 +48,7 @@ ReadBufferFromS3::ReadBufferFromS3( size_t offset_, size_t read_until_position_, bool restricted_seek_) - : ReadBufferFromFileBase(settings_.remote_fs_buffer_size, nullptr, 0) + : ReadBufferFromFileBase(use_external_buffer_ ? 0 : settings_.remote_fs_buffer_size, nullptr, 0) , client_ptr(std::move(client_ptr_)) , bucket(bucket_) , key(key_) diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 34e7a7f6eb4..569bebb1ed1 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -42,6 +42,18 @@ namespace ProfileEvents extern const Event S3WriteRequestsErrors; extern const Event S3WriteRequestsThrottling; extern const Event S3WriteRequestsRedirects; + + extern const Event DiskS3ReadMicroseconds; + extern const Event DiskS3ReadRequestsCount; + extern const Event DiskS3ReadRequestsErrors; + extern const Event DiskS3ReadRequestsThrottling; + extern const Event DiskS3ReadRequestsRedirects; + + extern const Event DiskS3WriteMicroseconds; + extern const Event DiskS3WriteRequestsCount; + extern const Event DiskS3WriteRequestsErrors; + extern const Event DiskS3WriteRequestsThrottling; + extern const Event DiskS3WriteRequestsRedirects; } namespace CurrentMetrics @@ -62,11 +74,13 @@ PocoHTTPClientConfiguration::PocoHTTPClientConfiguration( const String & force_region_, const RemoteHostFilter & remote_host_filter_, unsigned int s3_max_redirects_, - bool enable_s3_requests_logging_) + bool enable_s3_requests_logging_, + bool for_disk_s3_) : force_region(force_region_) , remote_host_filter(remote_host_filter_) , s3_max_redirects(s3_max_redirects_) , enable_s3_requests_logging(enable_s3_requests_logging_) + , for_disk_s3(for_disk_s3_) { } @@ -112,6 +126,7 @@ PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_config , remote_host_filter(client_configuration.remote_host_filter) , s3_max_redirects(client_configuration.s3_max_redirects) , enable_s3_requests_logging(client_configuration.enable_s3_requests_logging) + , for_disk_s3(client_configuration.for_disk_s3) , extra_headers(client_configuration.extra_headers) { } @@ -176,6 +191,46 @@ namespace } } +PocoHTTPClient::S3MetricKind PocoHTTPClient::getMetricKind(const Aws::Http::HttpRequest & request) +{ + switch (request.GetMethod()) + { + case Aws::Http::HttpMethod::HTTP_GET: + case Aws::Http::HttpMethod::HTTP_HEAD: + return S3MetricKind::Read; + case Aws::Http::HttpMethod::HTTP_POST: + case Aws::Http::HttpMethod::HTTP_DELETE: + case Aws::Http::HttpMethod::HTTP_PUT: + case Aws::Http::HttpMethod::HTTP_PATCH: + return S3MetricKind::Write; + } + throw Exception("Unsupported request method", ErrorCodes::NOT_IMPLEMENTED); +} + +void PocoHTTPClient::addMetric(const Aws::Http::HttpRequest & request, S3MetricType type, ProfileEvents::Count amount) const +{ + const ProfileEvents::Event events_map[static_cast(S3MetricType::EnumSize)][static_cast(S3MetricKind::EnumSize)] = { + {ProfileEvents::S3ReadMicroseconds, ProfileEvents::S3WriteMicroseconds}, + {ProfileEvents::S3ReadRequestsCount, ProfileEvents::S3WriteRequestsCount}, + {ProfileEvents::S3ReadRequestsErrors, ProfileEvents::S3WriteRequestsErrors}, + {ProfileEvents::S3ReadRequestsThrottling, ProfileEvents::S3WriteRequestsThrottling}, + {ProfileEvents::S3ReadRequestsRedirects, ProfileEvents::S3WriteRequestsRedirects}, + }; + + const ProfileEvents::Event disk_s3_events_map[static_cast(S3MetricType::EnumSize)][static_cast(S3MetricKind::EnumSize)] = { + {ProfileEvents::DiskS3ReadMicroseconds, ProfileEvents::DiskS3WriteMicroseconds}, + {ProfileEvents::DiskS3ReadRequestsCount, ProfileEvents::DiskS3WriteRequestsCount}, + {ProfileEvents::DiskS3ReadRequestsErrors, ProfileEvents::DiskS3WriteRequestsErrors}, + {ProfileEvents::DiskS3ReadRequestsThrottling, ProfileEvents::DiskS3WriteRequestsThrottling}, + {ProfileEvents::DiskS3ReadRequestsRedirects, ProfileEvents::DiskS3WriteRequestsRedirects}, + }; + + S3MetricKind kind = getMetricKind(request); + + ProfileEvents::increment(events_map[static_cast(type)][static_cast(kind)], amount); + if (for_disk_s3) + ProfileEvents::increment(disk_s3_events_map[static_cast(type)][static_cast(kind)], amount); +} void PocoHTTPClient::makeRequestInternal( Aws::Http::HttpRequest & request, @@ -189,45 +244,7 @@ void PocoHTTPClient::makeRequestInternal( if (enable_s3_requests_logging) LOG_TEST(log, "Make request to: {}", uri); - enum class S3MetricType - { - Microseconds, - Count, - Errors, - Throttling, - Redirects, - - EnumSize, - }; - - auto select_metric = [&request](S3MetricType type) - { - const ProfileEvents::Event events_map[][2] = { - {ProfileEvents::S3ReadMicroseconds, ProfileEvents::S3WriteMicroseconds}, - {ProfileEvents::S3ReadRequestsCount, ProfileEvents::S3WriteRequestsCount}, - {ProfileEvents::S3ReadRequestsErrors, ProfileEvents::S3WriteRequestsErrors}, - {ProfileEvents::S3ReadRequestsThrottling, ProfileEvents::S3WriteRequestsThrottling}, - {ProfileEvents::S3ReadRequestsRedirects, ProfileEvents::S3WriteRequestsRedirects}, - }; - - static_assert((sizeof(events_map) / sizeof(events_map[0])) == static_cast(S3MetricType::EnumSize)); - - switch (request.GetMethod()) - { - case Aws::Http::HttpMethod::HTTP_GET: - case Aws::Http::HttpMethod::HTTP_HEAD: - return events_map[static_cast(type)][0]; // Read - case Aws::Http::HttpMethod::HTTP_POST: - case Aws::Http::HttpMethod::HTTP_DELETE: - case Aws::Http::HttpMethod::HTTP_PUT: - case Aws::Http::HttpMethod::HTTP_PATCH: - return events_map[static_cast(type)][1]; // Write - } - - throw Exception("Unsupported request method", ErrorCodes::NOT_IMPLEMENTED); - }; - - ProfileEvents::increment(select_metric(S3MetricType::Count)); + addMetric(request, S3MetricType::Count); CurrentMetrics::Increment metric_increment{CurrentMetrics::S3Requests}; try @@ -334,7 +351,7 @@ void PocoHTTPClient::makeRequestInternal( auto & response_body_stream = session->receiveResponse(poco_response); watch.stop(); - ProfileEvents::increment(select_metric(S3MetricType::Microseconds), watch.elapsedMicroseconds()); + addMetric(request, S3MetricType::Microseconds, watch.elapsedMicroseconds()); int status_code = static_cast(poco_response.getStatus()); @@ -349,7 +366,7 @@ void PocoHTTPClient::makeRequestInternal( if (enable_s3_requests_logging) LOG_TEST(log, "Redirecting request to new location: {}", location); - ProfileEvents::increment(select_metric(S3MetricType::Redirects)); + addMetric(request, S3MetricType::Redirects); continue; } @@ -387,7 +404,7 @@ void PocoHTTPClient::makeRequestInternal( LOG_WARNING(log, "Response for request contain tag in body, settings internal server error (500 code)"); response->SetResponseCode(Aws::Http::HttpResponseCode::INTERNAL_SERVER_ERROR); - ProfileEvents::increment(select_metric(S3MetricType::Errors)); + addMetric(request, S3MetricType::Errors); if (error_report) error_report(request_configuration); @@ -401,11 +418,11 @@ void PocoHTTPClient::makeRequestInternal( if (status_code == 429 || status_code == 503) { // API throttling - ProfileEvents::increment(select_metric(S3MetricType::Throttling)); + addMetric(request, S3MetricType::Throttling); } else if (status_code >= 300) { - ProfileEvents::increment(select_metric(S3MetricType::Errors)); + addMetric(request, S3MetricType::Errors); if (status_code >= 500 && error_report) error_report(request_configuration); } @@ -423,7 +440,7 @@ void PocoHTTPClient::makeRequestInternal( response->SetClientErrorType(Aws::Client::CoreErrors::NETWORK_CONNECTION); response->SetClientErrorMessage(getCurrentExceptionMessage(false)); - ProfileEvents::increment(select_metric(S3MetricType::Errors)); + addMetric(request, S3MetricType::Errors); } } diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index 5fc8c9acc17..9005f132974 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -44,6 +44,7 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration const RemoteHostFilter & remote_host_filter; unsigned int s3_max_redirects; bool enable_s3_requests_logging; + bool for_disk_s3; HeaderCollection extra_headers; void updateSchemeAndRegion(); @@ -55,7 +56,8 @@ private: const String & force_region_, const RemoteHostFilter & remote_host_filter_, unsigned int s3_max_redirects_, - bool enable_s3_requests_logging_ + bool enable_s3_requests_logging_, + bool for_disk_s3_ ); /// Constructor of Aws::Client::ClientConfiguration must be called after AWS SDK initialization. @@ -113,18 +115,42 @@ public: Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const override; private: + void makeRequestInternal( Aws::Http::HttpRequest & request, std::shared_ptr & response, Aws::Utils::RateLimits::RateLimiterInterface * readLimiter, Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const; + enum class S3MetricType + { + Microseconds, + Count, + Errors, + Throttling, + Redirects, + + EnumSize, + }; + + enum class S3MetricKind + { + Read, + Write, + + EnumSize, + }; + + static S3MetricKind getMetricKind(const Aws::Http::HttpRequest & request); + void addMetric(const Aws::Http::HttpRequest & request, S3MetricType type, ProfileEvents::Count amount = 1) const; + std::function per_request_configuration; std::function error_report; ConnectionTimeouts timeouts; const RemoteHostFilter & remote_host_filter; unsigned int s3_max_redirects; bool enable_s3_requests_logging; + bool for_disk_s3; const HeaderCollection extra_headers; }; diff --git a/src/IO/S3/tests/gtest_aws_s3_client.cpp b/src/IO/S3/tests/gtest_aws_s3_client.cpp index 44bdf436fae..21d421bb4f6 100644 --- a/src/IO/S3/tests/gtest_aws_s3_client.cpp +++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp @@ -87,7 +87,8 @@ TEST(IOTestAwsS3Client, AppendExtraSSECHeaders) region, remote_host_filter, s3_max_redirects, - enable_s3_requests_logging + enable_s3_requests_logging, + /* for_disk_s3 = */ false ); client_configuration.endpointOverride = uri.endpoint; diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 1ff1c609952..ca93b8f0fe1 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -543,7 +543,7 @@ public: /// AWS API tries credentials providers one by one. Some of providers (like ProfileConfigFileAWSCredentialsProvider) can be /// quite verbose even if nobody configured them. So we use our provider first and only after it use default providers. { - DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(configuration.region, configuration.remote_host_filter, configuration.s3_max_redirects, configuration.enable_s3_requests_logging); + DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(configuration.region, configuration.remote_host_filter, configuration.s3_max_redirects, configuration.enable_s3_requests_logging, configuration.for_disk_s3); AddProvider(std::make_shared(aws_client_configuration)); } @@ -580,7 +580,7 @@ public: } else if (Aws::Utils::StringUtils::ToLower(ec2_metadata_disabled.c_str()) != "true") { - DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(configuration.region, configuration.remote_host_filter, configuration.s3_max_redirects, configuration.enable_s3_requests_logging); + DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(configuration.region, configuration.remote_host_filter, configuration.s3_max_redirects, configuration.enable_s3_requests_logging, configuration.for_disk_s3); /// See MakeDefaultHttpResourceClientConfiguration(). /// This is part of EC2 metadata client, but unfortunately it can't be accessed from outside @@ -700,9 +700,10 @@ namespace S3 const String & force_region, const RemoteHostFilter & remote_host_filter, unsigned int s3_max_redirects, - bool enable_s3_requests_logging) + bool enable_s3_requests_logging, + bool for_disk_s3) { - return PocoHTTPClientConfiguration(force_region, remote_host_filter, s3_max_redirects, enable_s3_requests_logging); + return PocoHTTPClientConfiguration(force_region, remote_host_filter, s3_max_redirects, enable_s3_requests_logging, for_disk_s3); } URI::URI(const Poco::URI & uri_) diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index 46a09ee8901..e532f70633b 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -45,7 +45,8 @@ public: const String & force_region, const RemoteHostFilter & remote_host_filter, unsigned int s3_max_redirects, - bool enable_s3_requests_logging); + bool enable_s3_requests_logging, + bool for_disk_s3); private: ClientFactory(); diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 6207ba53bd8..7646e2514a5 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -40,7 +40,7 @@ namespace ErrorCodes struct WriteBufferFromS3::UploadPartTask { Aws::S3::Model::UploadPartRequest req; - bool is_finised = false; + bool is_finished = false; std::string tag; std::exception_ptr exception; }; @@ -48,7 +48,7 @@ struct WriteBufferFromS3::UploadPartTask struct WriteBufferFromS3::PutObjectTask { Aws::S3::Model::PutObjectRequest req; - bool is_finised = false; + bool is_finished = false; std::exception_ptr exception; }; @@ -64,10 +64,10 @@ WriteBufferFromS3::WriteBufferFromS3( : BufferWithOwnMemory(buffer_size_, nullptr, 0) , bucket(bucket_) , key(key_) - , client_ptr(std::move(client_ptr_)) - , upload_part_size(s3_settings_.min_upload_part_size) , s3_settings(s3_settings_) + , client_ptr(std::move(client_ptr_)) , object_metadata(std::move(object_metadata_)) + , upload_part_size(s3_settings_.min_upload_part_size) , schedule(std::move(schedule_)) , write_settings(write_settings_) { @@ -218,7 +218,7 @@ void WriteBufferFromS3::writePart() return; } - if (part_tags.size() == S3_WARN_MAX_PARTS) + if (TSA_SUPPRESS_WARNING_FOR_READ(part_tags).size() == S3_WARN_MAX_PARTS) { // Don't throw exception here by ourselves but leave the decision to take by S3 server. LOG_WARNING(log, "Maximum part number in S3 protocol has reached (too many parts). Server may not accept this whole upload."); @@ -231,6 +231,7 @@ void WriteBufferFromS3::writePart() int part_number; { std::lock_guard lock(bg_tasks_mutex); + task = &upload_object_tasks.emplace_back(); ++num_added_bg_tasks; part_number = num_added_bg_tasks; @@ -240,7 +241,7 @@ void WriteBufferFromS3::writePart() auto task_finish_notify = [&, task]() { std::lock_guard lock(bg_tasks_mutex); - task->is_finised = true; + task->is_finished = true; ++num_finished_bg_tasks; /// Notification under mutex is important here. @@ -276,9 +277,11 @@ void WriteBufferFromS3::writePart() else { UploadPartTask task; - fillUploadRequest(task.req, part_tags.size() + 1); + auto & tags = TSA_SUPPRESS_WARNING_FOR_WRITE(part_tags); /// Suppress warning because schedule == false. + + fillUploadRequest(task.req, tags.size() + 1); processUploadRequest(task); - part_tags.push_back(task.tag); + tags.push_back(task.tag); } } @@ -302,6 +305,7 @@ void WriteBufferFromS3::processUploadRequest(UploadPartTask & task) if (outcome.IsSuccess()) { task.tag = outcome.GetResult().GetETag(); + std::lock_guard lock(bg_tasks_mutex); /// Protect part_tags from race LOG_TRACE(log, "Writing part finished. Bucket: {}, Key: {}, Upload_id: {}, Etag: {}, Parts: {}", bucket, key, multipart_upload_id, task.tag, part_tags.size()); } else @@ -312,9 +316,11 @@ void WriteBufferFromS3::processUploadRequest(UploadPartTask & task) void WriteBufferFromS3::completeMultipartUpload() { - LOG_TRACE(log, "Completing multipart upload. Bucket: {}, Key: {}, Upload_id: {}, Parts: {}", bucket, key, multipart_upload_id, part_tags.size()); + const auto & tags = TSA_SUPPRESS_WARNING_FOR_READ(part_tags); - if (part_tags.empty()) + LOG_TRACE(log, "Completing multipart upload. Bucket: {}, Key: {}, Upload_id: {}, Parts: {}", bucket, key, multipart_upload_id, tags.size()); + + if (tags.empty()) throw Exception("Failed to complete multipart upload. No parts have uploaded", ErrorCodes::S3_ERROR); Aws::S3::Model::CompleteMultipartUploadRequest req; @@ -323,10 +329,10 @@ void WriteBufferFromS3::completeMultipartUpload() req.SetUploadId(multipart_upload_id); Aws::S3::Model::CompletedMultipartUpload multipart_upload; - for (size_t i = 0; i < part_tags.size(); ++i) + for (size_t i = 0; i < tags.size(); ++i) { Aws::S3::Model::CompletedPart part; - multipart_upload.AddParts(part.WithETag(part_tags[i]).WithPartNumber(i + 1)); + multipart_upload.AddParts(part.WithETag(tags[i]).WithPartNumber(i + 1)); } req.SetMultipartUpload(multipart_upload); @@ -334,12 +340,12 @@ void WriteBufferFromS3::completeMultipartUpload() auto outcome = client_ptr->CompleteMultipartUpload(req); if (outcome.IsSuccess()) - LOG_TRACE(log, "Multipart upload has completed. Bucket: {}, Key: {}, Upload_id: {}, Parts: {}", bucket, key, multipart_upload_id, part_tags.size()); + LOG_TRACE(log, "Multipart upload has completed. Bucket: {}, Key: {}, Upload_id: {}, Parts: {}", bucket, key, multipart_upload_id, tags.size()); else { throw Exception(ErrorCodes::S3_ERROR, "{} Tags:{}", outcome.GetError().GetMessage(), - fmt::join(part_tags.begin(), part_tags.end(), " ")); + fmt::join(tags.begin(), tags.end(), " ")); } } @@ -364,7 +370,7 @@ void WriteBufferFromS3::makeSinglepartUpload() auto task_notify_finish = [&]() { std::lock_guard lock(bg_tasks_mutex); - put_object_task->is_finised = true; + put_object_task->is_finished = true; /// Notification under mutex is important here. /// Othervies, WriteBuffer could be destroyed in between @@ -417,7 +423,7 @@ void WriteBufferFromS3::fillPutRequest(Aws::S3::Model::PutObjectRequest & req) req.SetContentType("binary/octet-stream"); } -void WriteBufferFromS3::processPutRequest(PutObjectTask & task) +void WriteBufferFromS3::processPutRequest(const PutObjectTask & task) { auto outcome = client_ptr->PutObject(task.req); bool with_pool = static_cast(schedule); @@ -431,23 +437,25 @@ void WriteBufferFromS3::waitForReadyBackGroundTasks() { if (schedule) { - std::lock_guard lock(bg_tasks_mutex); + std::unique_lock lock(bg_tasks_mutex); + + /// Suppress warnings because bg_tasks_mutex is actually hold, but tsa annotations do not understand std::unique_lock + auto & tasks = TSA_SUPPRESS_WARNING_FOR_WRITE(upload_object_tasks); + + while (!tasks.empty() && tasks.front().is_finished) { - while (!upload_object_tasks.empty() && upload_object_tasks.front().is_finised) + auto & task = tasks.front(); + auto exception = task.exception; + auto tag = std::move(task.tag); + tasks.pop_front(); + + if (exception) { - auto & task = upload_object_tasks.front(); - auto exception = task.exception; - auto tag = std::move(task.tag); - upload_object_tasks.pop_front(); - - if (exception) - { - waitForAllBackGroundTasks(); - std::rethrow_exception(exception); - } - - part_tags.push_back(tag); + waitForAllBackGroundTasksUnlocked(lock); + std::rethrow_exception(exception); } + + TSA_SUPPRESS_WARNING_FOR_WRITE(part_tags).push_back(tag); } } } @@ -457,22 +465,33 @@ void WriteBufferFromS3::waitForAllBackGroundTasks() if (schedule) { std::unique_lock lock(bg_tasks_mutex); - bg_tasks_condvar.wait(lock, [this]() { return num_added_bg_tasks == num_finished_bg_tasks; }); + waitForAllBackGroundTasksUnlocked(lock); + } +} - while (!upload_object_tasks.empty()) +void WriteBufferFromS3::waitForAllBackGroundTasksUnlocked(std::unique_lock & bg_tasks_lock) +{ + if (schedule) + { + bg_tasks_condvar.wait(bg_tasks_lock, [this]() {return TSA_SUPPRESS_WARNING_FOR_READ(num_added_bg_tasks) == TSA_SUPPRESS_WARNING_FOR_READ(num_finished_bg_tasks); }); + + /// Suppress warnings because bg_tasks_mutex is actually hold, but tsa annotations do not understand std::unique_lock + auto & tasks = TSA_SUPPRESS_WARNING_FOR_WRITE(upload_object_tasks); + while (!tasks.empty()) { - auto & task = upload_object_tasks.front(); + auto & task = tasks.front(); + if (task.exception) std::rethrow_exception(task.exception); - part_tags.push_back(task.tag); + TSA_SUPPRESS_WARNING_FOR_WRITE(part_tags).push_back(task.tag); - upload_object_tasks.pop_front(); + tasks.pop_front(); } if (put_object_task) { - bg_tasks_condvar.wait(lock, [this]() { return put_object_task->is_finised; }); + bg_tasks_condvar.wait(bg_tasks_lock, [this]() { return put_object_task->is_finished; }); if (put_object_task->exception) std::rethrow_exception(put_object_task->exception); } diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 99440654910..ae03299ffbd 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -80,37 +80,39 @@ private: struct PutObjectTask; void fillPutRequest(Aws::S3::Model::PutObjectRequest & req); - void processPutRequest(PutObjectTask & task); + void processPutRequest(const PutObjectTask & task); void waitForReadyBackGroundTasks(); void waitForAllBackGroundTasks(); + void waitForAllBackGroundTasksUnlocked(std::unique_lock & bg_tasks_lock); + + const String bucket; + const String key; + const S3Settings::ReadWriteSettings s3_settings; + const std::shared_ptr client_ptr; + const std::optional> object_metadata; - String bucket; - String key; - std::shared_ptr client_ptr; size_t upload_part_size = 0; - S3Settings::ReadWriteSettings s3_settings; - std::optional> object_metadata; - - /// Buffer to accumulate data. - std::shared_ptr temporary_buffer; + std::shared_ptr temporary_buffer; /// Buffer to accumulate data. size_t last_part_size = 0; std::atomic total_parts_uploaded = 0; /// Upload in S3 is made in parts. /// We initiate upload, then upload each part and get ETag as a response, and then finalizeImpl() upload with listing all our parts. String multipart_upload_id; - std::vector part_tags; + std::vector TSA_GUARDED_BY(bg_tasks_mutex) part_tags; bool is_prefinalized = false; /// Following fields are for background uploads in thread pool (if specified). /// We use std::function to avoid dependency of Interpreters - ScheduleFunc schedule; - std::unique_ptr put_object_task; - std::list upload_object_tasks; - size_t num_added_bg_tasks = 0; - size_t num_finished_bg_tasks = 0; + const ScheduleFunc schedule; + + std::unique_ptr put_object_task; /// Does not need protection by mutex because of the logic around is_finished field. + std::list TSA_GUARDED_BY(bg_tasks_mutex) upload_object_tasks; + size_t num_added_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; + size_t num_finished_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; + std::mutex bg_tasks_mutex; std::condition_variable bg_tasks_condvar; diff --git a/src/Interpreters/AggregationUtils.cpp b/src/Interpreters/AggregationUtils.cpp new file mode 100644 index 00000000000..43062546450 --- /dev/null +++ b/src/Interpreters/AggregationUtils.cpp @@ -0,0 +1,113 @@ +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +OutputBlockColumns prepareOutputBlockColumns( + const Aggregator::Params & params, + const Aggregator::AggregateFunctionsPlainPtrs & aggregate_functions, + const Block & res_header, + Arenas & aggregates_pools, + bool final, + size_t rows) +{ + MutableColumns key_columns(params.keys_size); + MutableColumns aggregate_columns(params.aggregates_size); + MutableColumns final_aggregate_columns(params.aggregates_size); + Aggregator::AggregateColumnsData aggregate_columns_data(params.aggregates_size); + + for (size_t i = 0; i < params.keys_size; ++i) + { + key_columns[i] = res_header.safeGetByPosition(i).type->createColumn(); + key_columns[i]->reserve(rows); + } + + for (size_t i = 0; i < params.aggregates_size; ++i) + { + if (!final) + { + const auto & aggregate_column_name = params.aggregates[i].column_name; + aggregate_columns[i] = res_header.getByName(aggregate_column_name).type->createColumn(); + + /// The ColumnAggregateFunction column captures the shared ownership of the arena with the aggregate function states. + ColumnAggregateFunction & column_aggregate_func = assert_cast(*aggregate_columns[i]); + + for (auto & pool : aggregates_pools) + column_aggregate_func.addArena(pool); + + aggregate_columns_data[i] = &column_aggregate_func.getData(); + aggregate_columns_data[i]->reserve(rows); + } + else + { + final_aggregate_columns[i] = aggregate_functions[i]->getReturnType()->createColumn(); + final_aggregate_columns[i]->reserve(rows); + + if (aggregate_functions[i]->isState()) + { + /// The ColumnAggregateFunction column captures the shared ownership of the arena with aggregate function states. + if (auto * column_aggregate_func = typeid_cast(final_aggregate_columns[i].get())) + for (auto & pool : aggregates_pools) + column_aggregate_func->addArena(pool); + + /// Aggregate state can be wrapped into array if aggregate function ends with -Resample combinator. + final_aggregate_columns[i]->forEachSubcolumn( + [&aggregates_pools](auto & subcolumn) + { + if (auto * column_aggregate_func = typeid_cast(subcolumn.get())) + for (auto & pool : aggregates_pools) + column_aggregate_func->addArena(pool); + }); + } + } + } + + if (key_columns.size() != params.keys_size) + throw Exception{"Aggregate. Unexpected key columns size.", ErrorCodes::LOGICAL_ERROR}; + + std::vector raw_key_columns; + raw_key_columns.reserve(key_columns.size()); + for (auto & column : key_columns) + raw_key_columns.push_back(column.get()); + + return { + .key_columns = std::move(key_columns), + .raw_key_columns = std::move(raw_key_columns), + .aggregate_columns = std::move(aggregate_columns), + .final_aggregate_columns = std::move(final_aggregate_columns), + .aggregate_columns_data = std::move(aggregate_columns_data), + }; +} + +Block finalizeBlock(const Aggregator::Params & params, const Block & res_header, OutputBlockColumns && out_cols, bool final, size_t rows) +{ + auto && [key_columns, raw_key_columns, aggregate_columns, final_aggregate_columns, aggregate_columns_data] = out_cols; + + Block res = res_header.cloneEmpty(); + + for (size_t i = 0; i < params.keys_size; ++i) + res.getByPosition(i).column = std::move(key_columns[i]); + + for (size_t i = 0; i < params.aggregates_size; ++i) + { + const auto & aggregate_column_name = params.aggregates[i].column_name; + if (final) + res.getByName(aggregate_column_name).column = std::move(final_aggregate_columns[i]); + else + res.getByName(aggregate_column_name).column = std::move(aggregate_columns[i]); + } + + /// Change the size of the columns-constants in the block. + size_t columns = res_header.columns(); + for (size_t i = 0; i < columns; ++i) + if (isColumnConst(*res.getByPosition(i).column)) + res.getByPosition(i).column = res.getByPosition(i).column->cut(0, rows); + + return res; +} +} diff --git a/src/Interpreters/AggregationUtils.h b/src/Interpreters/AggregationUtils.h new file mode 100644 index 00000000000..cc37cec0a69 --- /dev/null +++ b/src/Interpreters/AggregationUtils.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +namespace DB +{ + +struct OutputBlockColumns +{ + MutableColumns key_columns; + std::vector raw_key_columns; + MutableColumns aggregate_columns; + MutableColumns final_aggregate_columns; + Aggregator::AggregateColumnsData aggregate_columns_data; +}; + + +OutputBlockColumns prepareOutputBlockColumns( + const Aggregator::Params & params, + const Aggregator::AggregateFunctionsPlainPtrs & aggregate_functions, + const Block & res_header, + Arenas & aggregates_pools, + bool final, + size_t rows); + +Block finalizeBlock(const Aggregator::Params & params, const Block & res_header, OutputBlockColumns && out_cols, bool final, size_t rows); +} diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 3340170f71b..ef55f92f63a 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -34,6 +34,8 @@ #include +#include + namespace ProfileEvents { extern const Event ExternalAggregationWritePart; @@ -1587,16 +1589,10 @@ Block Aggregator::convertOneBucketToBlock( bool final, size_t bucket) const { - Block block = prepareBlockAndFill(data_variants, final, method.data.impls[bucket].size(), - [bucket, &method, arena, this] ( - MutableColumns & key_columns, - AggregateColumnsData & aggregate_columns, - MutableColumns & final_aggregate_columns, - bool final_) - { - convertToBlockImpl(method, method.data.impls[bucket], - key_columns, aggregate_columns, final_aggregate_columns, arena, final_); - }); + // Used in ConvertingAggregatedToChunksSource -> ConvertingAggregatedToChunksTransform (expects single chunk for each bucket_id). + constexpr bool return_single_block = true; + Block block = convertToBlockImpl( + method, method.data.impls[bucket], arena, data_variants.aggregates_pools, final, method.data.impls[bucket].size()); block.info.bucket_num = bucket; return block; @@ -1702,26 +1698,17 @@ bool Aggregator::checkLimits(size_t result_size, bool & no_more_keys) const } -template -void Aggregator::convertToBlockImpl( - Method & method, - Table & data, - MutableColumns & key_columns, - AggregateColumnsData & aggregate_columns, - MutableColumns & final_aggregate_columns, - Arena * arena, - bool final) const +template +Aggregator::ConvertToBlockRes +Aggregator::convertToBlockImpl(Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, bool final, size_t rows) const { if (data.empty()) - return; + { + auto && out_cols = prepareOutputBlockColumns(params, aggregate_functions, getHeader(final), aggregates_pools, final, rows); + return {finalizeBlock(params, getHeader(final), std::move(out_cols), final, rows)}; + } - if (key_columns.size() != params.keys_size) - throw Exception{"Aggregate. Unexpected key columns size.", ErrorCodes::LOGICAL_ERROR}; - - std::vector raw_key_columns; - raw_key_columns.reserve(key_columns.size()); - for (auto & column : key_columns) - raw_key_columns.push_back(column.get()); + ConvertToBlockRes res; if (final) { @@ -1729,20 +1716,23 @@ void Aggregator::convertToBlockImpl( if (compiled_aggregate_functions_holder) { static constexpr bool use_compiled_functions = !Method::low_cardinality_optimization; - convertToBlockImplFinal(method, data, std::move(raw_key_columns), final_aggregate_columns, arena); + res = convertToBlockImplFinal(method, data, arena, aggregates_pools, rows); } else #endif { - convertToBlockImplFinal(method, data, std::move(raw_key_columns), final_aggregate_columns, arena); + res = convertToBlockImplFinal(method, data, arena, aggregates_pools, rows); } } else { - convertToBlockImplNotFinal(method, data, std::move(raw_key_columns), aggregate_columns); + res = convertToBlockImplNotFinal(method, data, aggregates_pools, rows); } + /// In order to release memory early. data.clearAndShrink(); + + return res; } @@ -1811,38 +1801,9 @@ inline void Aggregator::insertAggregatesIntoColumns(Mapped & mapped, MutableColu } -template -void NO_INLINE Aggregator::convertToBlockImplFinal( - Method & method, - Table & data, - std::vector key_columns, - MutableColumns & final_aggregate_columns, - Arena * arena) const +template +Block Aggregator::insertResultsIntoColumns(PaddedPODArray & places, OutputBlockColumns && out_cols, Arena * arena) const { - if constexpr (Method::low_cardinality_optimization) - { - if (data.hasNullKeyData()) - { - key_columns[0]->insertDefault(); - insertAggregatesIntoColumns(data.getNullKeyData(), final_aggregate_columns, arena); - } - } - - auto shuffled_key_sizes = method.shuffleKeyColumns(key_columns, key_sizes); - const auto & key_sizes_ref = shuffled_key_sizes ? *shuffled_key_sizes : key_sizes; - - PaddedPODArray places; - places.reserve(data.size()); - - data.forEachValue([&](const auto & key, auto & mapped) - { - method.insertKeyIntoColumns(key, key_columns, key_sizes_ref); - places.emplace_back(mapped); - - /// Mark the cell as destroyed so it will not be destroyed in destructor. - mapped = nullptr; - }); - std::exception_ptr exception; size_t aggregate_functions_destroy_index = 0; @@ -1863,7 +1824,7 @@ void NO_INLINE Aggregator::convertToBlockImplFinal( if (!is_aggregate_function_compiled[i]) continue; - auto & final_aggregate_column = final_aggregate_columns[i]; + auto & final_aggregate_column = out_cols.final_aggregate_columns[i]; final_aggregate_column = final_aggregate_column->cloneResized(places.size()); columns_data.emplace_back(getColumnData(final_aggregate_column.get())); } @@ -1884,7 +1845,7 @@ void NO_INLINE Aggregator::convertToBlockImplFinal( } } - auto & final_aggregate_column = final_aggregate_columns[aggregate_functions_destroy_index]; + auto & final_aggregate_column = out_cols.final_aggregate_columns[aggregate_functions_destroy_index]; size_t offset = offsets_of_aggregate_states[aggregate_functions_destroy_index]; /** We increase aggregate_functions_destroy_index because by function contract if insertResultIntoBatch @@ -1898,7 +1859,8 @@ void NO_INLINE Aggregator::convertToBlockImplFinal( bool is_state = aggregate_functions[destroy_index]->isState(); bool destroy_place_after_insert = !is_state; - aggregate_functions[destroy_index]->insertResultIntoBatch(0, places.size(), places.data(), offset, *final_aggregate_column, arena, destroy_place_after_insert); + aggregate_functions[destroy_index]->insertResultIntoBatch( + 0, places.size(), places.data(), offset, *final_aggregate_column, arena, destroy_place_after_insert); } } catch (...) @@ -1923,125 +1885,155 @@ void NO_INLINE Aggregator::convertToBlockImplFinal( if (exception) std::rethrow_exception(exception); + + return finalizeBlock(params, getHeader(/* final */ true), std::move(out_cols), /* final */ true, places.size()); } -template -void NO_INLINE Aggregator::convertToBlockImplNotFinal( - Method & method, - Table & data, - std::vector key_columns, - AggregateColumnsData & aggregate_columns) const +template +Aggregator::ConvertToBlockRes NO_INLINE +Aggregator::convertToBlockImplFinal(Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, size_t) const { - if constexpr (Method::low_cardinality_optimization) + const size_t max_block_size = params.max_block_size; + const bool final = true; + ConvertToBlockRes res; + + std::optional out_cols; + std::optional shuffled_key_sizes; + PaddedPODArray places; + + auto init_out_cols = [&]() { - if (data.hasNullKeyData()) + out_cols = prepareOutputBlockColumns(params, aggregate_functions, getHeader(final), aggregates_pools, final, max_block_size); + + if constexpr (Method::low_cardinality_optimization) { - key_columns[0]->insertDefault(); - - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_columns[i]->push_back(data.getNullKeyData() + offsets_of_aggregate_states[i]); - - data.getNullKeyData() = nullptr; - } - } - - auto shuffled_key_sizes = method.shuffleKeyColumns(key_columns, key_sizes); - const auto & key_sizes_ref = shuffled_key_sizes ? *shuffled_key_sizes : key_sizes; - - data.forEachValue([&](const auto & key, auto & mapped) - { - method.insertKeyIntoColumns(key, key_columns, key_sizes_ref); - - /// reserved, so push_back does not throw exceptions - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_columns[i]->push_back(mapped + offsets_of_aggregate_states[i]); - - mapped = nullptr; - }); -} - - -template -Block Aggregator::prepareBlockAndFill( - AggregatedDataVariants & data_variants, - bool final, - size_t rows, - Filler && filler) const -{ - MutableColumns key_columns(params.keys_size); - MutableColumns aggregate_columns(params.aggregates_size); - MutableColumns final_aggregate_columns(params.aggregates_size); - AggregateColumnsData aggregate_columns_data(params.aggregates_size); - - Block res_header = getHeader(final); - - for (size_t i = 0; i < params.keys_size; ++i) - { - key_columns[i] = res_header.safeGetByPosition(i).type->createColumn(); - key_columns[i]->reserve(rows); - } - - for (size_t i = 0; i < params.aggregates_size; ++i) - { - if (!final) - { - const auto & aggregate_column_name = params.aggregates[i].column_name; - aggregate_columns[i] = res_header.getByName(aggregate_column_name).type->createColumn(); - - /// The ColumnAggregateFunction column captures the shared ownership of the arena with the aggregate function states. - ColumnAggregateFunction & column_aggregate_func = assert_cast(*aggregate_columns[i]); - - for (auto & pool : data_variants.aggregates_pools) - column_aggregate_func.addArena(pool); - - aggregate_columns_data[i] = &column_aggregate_func.getData(); - aggregate_columns_data[i]->reserve(rows); - } - else - { - final_aggregate_columns[i] = aggregate_functions[i]->getReturnType()->createColumn(); - final_aggregate_columns[i]->reserve(rows); - - if (aggregate_functions[i]->isState()) + if (data.hasNullKeyData()) { - /// The ColumnAggregateFunction column captures the shared ownership of the arena with aggregate function states. - if (auto * column_aggregate_func = typeid_cast(final_aggregate_columns[i].get())) - for (auto & pool : data_variants.aggregates_pools) - column_aggregate_func->addArena(pool); - - /// Aggregate state can be wrapped into array if aggregate function ends with -Resample combinator. - final_aggregate_columns[i]->forEachSubcolumn([&data_variants](auto & subcolumn) - { - if (auto * column_aggregate_func = typeid_cast(subcolumn.get())) - for (auto & pool : data_variants.aggregates_pools) - column_aggregate_func->addArena(pool); - }); + out_cols->key_columns[0]->insertDefault(); + insertAggregatesIntoColumns(data.getNullKeyData(), out_cols->final_aggregate_columns, arena); + data.hasNullKeyData() = false; } } - } - filler(key_columns, aggregate_columns_data, final_aggregate_columns, final); + shuffled_key_sizes = method.shuffleKeyColumns(out_cols->raw_key_columns, key_sizes); - Block res = res_header.cloneEmpty(); + places.reserve(max_block_size); + }; - for (size_t i = 0; i < params.keys_size; ++i) - res.getByPosition(i).column = std::move(key_columns[i]); + // should be invoked at least once, because null data might be the only content of the `data` + init_out_cols(); - for (size_t i = 0; i < params.aggregates_size; ++i) + data.forEachValue( + [&](const auto & key, auto & mapped) + { + if (!out_cols.has_value()) + init_out_cols(); + + const auto & key_sizes_ref = shuffled_key_sizes ? *shuffled_key_sizes : key_sizes; + method.insertKeyIntoColumns(key, out_cols->raw_key_columns, key_sizes_ref); + places.emplace_back(mapped); + + /// Mark the cell as destroyed so it will not be destroyed in destructor. + mapped = nullptr; + + if constexpr (!return_single_block) + { + if (places.size() >= max_block_size) + { + res.emplace_back(insertResultsIntoColumns(places, std::move(out_cols.value()), arena)); + places.clear(); + out_cols.reset(); + } + } + }); + + if constexpr (return_single_block) { - const auto & aggregate_column_name = params.aggregates[i].column_name; - if (final) - res.getByName(aggregate_column_name).column = std::move(final_aggregate_columns[i]); - else - res.getByName(aggregate_column_name).column = std::move(aggregate_columns[i]); + return insertResultsIntoColumns(places, std::move(out_cols.value()), arena); } + else + { + if (out_cols.has_value()) + res.emplace_back(insertResultsIntoColumns(places, std::move(out_cols.value()), arena)); + return res; + } +} - /// Change the size of the columns-constants in the block. - size_t columns = res_header.columns(); - for (size_t i = 0; i < columns; ++i) - if (isColumnConst(*res.getByPosition(i).column)) - res.getByPosition(i).column = res.getByPosition(i).column->cut(0, rows); +template +Aggregator::ConvertToBlockRes NO_INLINE +Aggregator::convertToBlockImplNotFinal(Method & method, Table & data, Arenas & aggregates_pools, size_t) const +{ + const size_t max_block_size = params.max_block_size; + const bool final = false; + ConvertToBlockRes res; + std::optional out_cols; + std::optional shuffled_key_sizes; + + auto init_out_cols = [&]() + { + out_cols = prepareOutputBlockColumns(params, aggregate_functions, getHeader(final), aggregates_pools, final, max_block_size); + + if constexpr (Method::low_cardinality_optimization) + { + if (data.hasNullKeyData()) + { + out_cols->raw_key_columns[0]->insertDefault(); + + for (size_t i = 0; i < params.aggregates_size; ++i) + out_cols->aggregate_columns_data[i]->push_back(data.getNullKeyData() + offsets_of_aggregate_states[i]); + + data.getNullKeyData() = nullptr; + data.hasNullKeyData() = false; + } + } + + shuffled_key_sizes = method.shuffleKeyColumns(out_cols->raw_key_columns, key_sizes); + }; + + // should be invoked at least once, because null data might be the only content of the `data` + init_out_cols(); + + size_t rows_in_current_block = 0; + + data.forEachValue( + [&](const auto & key, auto & mapped) + { + if (!out_cols.has_value()) + init_out_cols(); + + const auto & key_sizes_ref = shuffled_key_sizes ? *shuffled_key_sizes : key_sizes; + method.insertKeyIntoColumns(key, out_cols->raw_key_columns, key_sizes_ref); + + /// reserved, so push_back does not throw exceptions + for (size_t i = 0; i < params.aggregates_size; ++i) + out_cols->aggregate_columns_data[i]->push_back(mapped + offsets_of_aggregate_states[i]); + + mapped = nullptr; + + ++rows_in_current_block; + + if constexpr (!return_single_block) + { + if (rows_in_current_block >= max_block_size) + { + res.emplace_back(finalizeBlock(params, getHeader(final), std::move(out_cols.value()), final, rows_in_current_block)); + out_cols.reset(); + rows_in_current_block = 0; + } + } + }); + + if constexpr (return_single_block) + { + return finalizeBlock(params, getHeader(final), std::move(out_cols).value(), final, rows_in_current_block); + } + else + { + if (rows_in_current_block) + res.emplace_back(finalizeBlock(params, getHeader(final), std::move(out_cols).value(), final, rows_in_current_block)); + return res; + } return res; } @@ -2105,39 +2097,35 @@ void Aggregator::createStatesAndFillKeyColumnsWithSingleKey( Block Aggregator::prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool is_overflows) const { size_t rows = 1; + auto && out_cols + = prepareOutputBlockColumns(params, aggregate_functions, getHeader(final), data_variants.aggregates_pools, final, rows); + auto && [key_columns, raw_key_columns, aggregate_columns, final_aggregate_columns, aggregate_columns_data] = out_cols; - auto filler = [&data_variants, this]( - MutableColumns & key_columns, - AggregateColumnsData & aggregate_columns, - MutableColumns & final_aggregate_columns, - bool final_) + if (data_variants.type == AggregatedDataVariants::Type::without_key || params.overflow_row) { - if (data_variants.type == AggregatedDataVariants::Type::without_key || params.overflow_row) + AggregatedDataWithoutKey & data = data_variants.without_key; + + if (!data) + throw Exception("Wrong data variant passed.", ErrorCodes::LOGICAL_ERROR); + + if (!final) { - AggregatedDataWithoutKey & data = data_variants.without_key; - - if (!data) - throw Exception("Wrong data variant passed.", ErrorCodes::LOGICAL_ERROR); - - if (!final_) - { - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_columns[i]->push_back(data + offsets_of_aggregate_states[i]); - data = nullptr; - } - else - { - /// Always single-thread. It's safe to pass current arena from 'aggregates_pool'. - insertAggregatesIntoColumns(data, final_aggregate_columns, data_variants.aggregates_pool); - } - - if (params.overflow_row) - for (size_t i = 0; i < params.keys_size; ++i) - key_columns[i]->insertDefault(); + for (size_t i = 0; i < params.aggregates_size; ++i) + aggregate_columns_data[i]->push_back(data + offsets_of_aggregate_states[i]); + data = nullptr; + } + else + { + /// Always single-thread. It's safe to pass current arena from 'aggregates_pool'. + insertAggregatesIntoColumns(data, final_aggregate_columns, data_variants.aggregates_pool); } - }; - Block block = prepareBlockAndFill(data_variants, final, rows, filler); + if (params.overflow_row) + for (size_t i = 0; i < params.keys_size; ++i) + key_columns[i]->insertDefault(); + } + + Block block = finalizeBlock(params, getHeader(final), std::move(out_cols), final, rows); if (is_overflows) block.info.is_overflows = true; @@ -2148,29 +2136,22 @@ Block Aggregator::prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_va return block; } -Block Aggregator::prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_variants, bool final) const +template +Aggregator::ConvertToBlockRes +Aggregator::prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_variants, bool final) const { - size_t rows = data_variants.sizeWithoutOverflowRow(); + const size_t rows = data_variants.sizeWithoutOverflowRow(); +#define M(NAME) \ + else if (data_variants.type == AggregatedDataVariants::Type::NAME) \ + { \ + return convertToBlockImpl( \ + *data_variants.NAME, data_variants.NAME->data, data_variants.aggregates_pool, data_variants.aggregates_pools, final, rows); \ + } - auto filler = [&data_variants, this]( - MutableColumns & key_columns, - AggregateColumnsData & aggregate_columns, - MutableColumns & final_aggregate_columns, - bool final_) - { - #define M(NAME) \ - else if (data_variants.type == AggregatedDataVariants::Type::NAME) \ - convertToBlockImpl(*data_variants.NAME, data_variants.NAME->data, \ - key_columns, aggregate_columns, final_aggregate_columns, data_variants.aggregates_pool, final_); - - if (false) {} // NOLINT - APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) - #undef M - else - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); - }; - - return prepareBlockAndFill(data_variants, final, rows, filler); + if (false) {} // NOLINT + APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) +#undef M + else throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); } @@ -2292,7 +2273,7 @@ BlocksList Aggregator::convertToBlocks(AggregatedDataVariants & data_variants, b if (data_variants.type != AggregatedDataVariants::Type::without_key) { if (!data_variants.isTwoLevel()) - blocks.emplace_back(prepareBlockAndFillSingleLevel(data_variants, final)); + blocks.splice(blocks.end(), prepareBlockAndFillSingleLevel(data_variants, final)); else blocks.splice(blocks.end(), prepareBlocksAndFillTwoLevel(data_variants, final, thread_pool.get())); } @@ -3044,9 +3025,15 @@ Block Aggregator::mergeBlocks(BlocksList & blocks, bool final) Block block; if (result.type == AggregatedDataVariants::Type::without_key || is_overflows) + { block = prepareBlockAndFillWithoutKey(result, final, is_overflows); + } else - block = prepareBlockAndFillSingleLevel(result, final); + { + // Used during memory efficient merging (SortingAggregatedTransform expects single chunk for each bucket_id). + constexpr bool return_single_block = true; + block = prepareBlockAndFillSingleLevel(result, final); + } /// NOTE: two-level data is not possible here - chooseAggregationMethod chooses only among single-level methods. if (!final) @@ -3247,4 +3234,6 @@ void Aggregator::destroyAllAggregateStates(AggregatedDataVariants & result) cons } +template Aggregator::ConvertToBlockRes +Aggregator::prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_variants, bool final) const; } diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index ce63c24969a..1d317e0a93a 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -1,8 +1,9 @@ #pragma once -#include -#include #include +#include +#include +#include #include @@ -872,6 +873,7 @@ using ManyAggregatedDataVariantsPtr = std::shared_ptr - void convertToBlockImpl( - Method & method, - Table & data, - MutableColumns & key_columns, - AggregateColumnsData & aggregate_columns, - MutableColumns & final_aggregate_columns, - Arena * arena, - bool final) const; + template + using ConvertToBlockRes = std::conditional_t; + + template + ConvertToBlockRes + convertToBlockImpl(Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, bool final, size_t rows) const; template void insertAggregatesIntoColumns( @@ -1293,27 +1296,16 @@ private: MutableColumns & final_aggregate_columns, Arena * arena) const; - template - void convertToBlockImplFinal( - Method & method, - Table & data, - std::vector key_columns, - MutableColumns & final_aggregate_columns, - Arena * arena) const; + template + Block insertResultsIntoColumns(PaddedPODArray & places, OutputBlockColumns && out_cols, Arena * arena) const; - template - void convertToBlockImplNotFinal( - Method & method, - Table & data, - std::vector key_columns, - AggregateColumnsData & aggregate_columns) const; + template + ConvertToBlockRes + convertToBlockImplFinal(Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, size_t rows) const; - template - Block prepareBlockAndFill( - AggregatedDataVariants & data_variants, - bool final, - size_t rows, - Filler && filler) const; + template + ConvertToBlockRes + convertToBlockImplNotFinal(Method & method, Table & data, Arenas & aggregates_pools, size_t rows) const; template Block convertOneBucketToBlock( @@ -1331,9 +1323,11 @@ private: std::atomic * is_cancelled = nullptr) const; Block prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool is_overflows) const; - Block prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_variants, bool final) const; BlocksList prepareBlocksAndFillTwoLevel(AggregatedDataVariants & data_variants, bool final, ThreadPool * thread_pool) const; + template + ConvertToBlockRes prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_variants, bool final) const; + template BlocksList prepareBlocksAndFillTwoLevelImpl( AggregatedDataVariants & data_variants, diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp index f4dbbaec16d..c22863ef8e5 100644 --- a/src/Interpreters/InterpreterRenameQuery.cpp +++ b/src/Interpreters/InterpreterRenameQuery.cpp @@ -16,6 +16,7 @@ namespace DB namespace ErrorCodes { extern const int NOT_IMPLEMENTED; + extern const int LOGICAL_ERROR; } InterpreterRenameQuery::InterpreterRenameQuery(const ASTPtr & query_ptr_, ContextPtr context_) @@ -31,11 +32,11 @@ BlockIO InterpreterRenameQuery::execute() if (!rename.cluster.empty()) { DDLQueryOnClusterParams params; - params.access_to_check = getRequiredAccess(); + params.access_to_check = getRequiredAccess(rename.database ? RenameType::RenameDatabase : RenameType::RenameTable); return executeDDLQueryOnCluster(query_ptr, getContext(), params); } - getContext()->checkAccess(getRequiredAccess()); + getContext()->checkAccess(getRequiredAccess(rename.database ? RenameType::RenameDatabase : RenameType::RenameTable)); String path = getContext()->getPath(); String current_database = getContext()->getCurrentDatabase(); @@ -165,18 +166,30 @@ BlockIO InterpreterRenameQuery::executeToDatabase(const ASTRenameQuery &, const return {}; } -AccessRightsElements InterpreterRenameQuery::getRequiredAccess() const +AccessRightsElements InterpreterRenameQuery::getRequiredAccess(InterpreterRenameQuery::RenameType type) const { AccessRightsElements required_access; const auto & rename = query_ptr->as(); for (const auto & elem : rename.elements) { - required_access.emplace_back(AccessType::SELECT | AccessType::DROP_TABLE, elem.from.database, elem.from.table); - required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT, elem.to.database, elem.to.table); - if (rename.exchange) + if (type == RenameType::RenameTable) { - required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT, elem.from.database, elem.from.table); - required_access.emplace_back(AccessType::SELECT | AccessType::DROP_TABLE, elem.to.database, elem.to.table); + required_access.emplace_back(AccessType::SELECT | AccessType::DROP_TABLE, elem.from.database, elem.from.table); + required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT, elem.to.database, elem.to.table); + if (rename.exchange) + { + required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT , elem.from.database, elem.from.table); + required_access.emplace_back(AccessType::SELECT | AccessType::DROP_TABLE, elem.to.database, elem.to.table); + } + } + else if (type == RenameType::RenameDatabase) + { + required_access.emplace_back(AccessType::SELECT | AccessType::DROP_DATABASE, elem.from.database); + required_access.emplace_back(AccessType::CREATE_DATABASE | AccessType::INSERT, elem.to.database); + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown type of rename query"); } } return required_access; diff --git a/src/Interpreters/InterpreterRenameQuery.h b/src/Interpreters/InterpreterRenameQuery.h index 194f6266634..6141e8c1585 100644 --- a/src/Interpreters/InterpreterRenameQuery.h +++ b/src/Interpreters/InterpreterRenameQuery.h @@ -63,7 +63,13 @@ private: BlockIO executeToTables(const ASTRenameQuery & rename, const RenameDescriptions & descriptions, TableGuards & ddl_guards); BlockIO executeToDatabase(const ASTRenameQuery & rename, const RenameDescriptions & descriptions); - AccessRightsElements getRequiredAccess() const; + enum class RenameType + { + RenameTable, + RenameDatabase + }; + + AccessRightsElements getRequiredAccess(RenameType type) const; ASTPtr query_ptr; bool renamed_instead_of_exchange{false}; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index b67171aac5a..c73db82a27b 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1763,7 +1763,7 @@ static void executeMergeAggregatedImpl( * but it can work more slowly. */ - Aggregator::Params params(keys, aggregates, overflow_row, settings.max_threads); + Aggregator::Params params(keys, aggregates, overflow_row, settings.max_threads, settings.max_block_size); auto merging_aggregated = std::make_unique( query_plan.getCurrentDataStream(), @@ -2359,6 +2359,7 @@ static Aggregator::Params getAggregatorParams( settings.min_free_disk_space_for_temporary_data, settings.compile_aggregate_expressions, settings.min_count_to_compile_aggregate_expression, + settings.max_block_size, /* only_merge */ false, stats_collecting_params }; diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 106b1611f80..a6d88c7d28b 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -753,7 +753,7 @@ bool InterpreterSystemQuery::dropReplicaImpl(ASTSystemQuery & query, const Stora "if you want to clean the data and drop this replica", ErrorCodes::TABLE_WAS_NOT_DROPPED); /// NOTE it's not atomic: replica may become active after this check, but before dropReplica(...) - /// However, the main usecase is to drop dead replica, which cannot become active. + /// However, the main use case is to drop dead replica, which cannot become active. /// This check prevents only from accidental drop of some other replica. if (zookeeper->exists(status.zookeeper_path + "/replicas/" + query.replica + "/is_active")) throw Exception("Can't drop replica: " + query.replica + ", because it's active", diff --git a/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/src/Interpreters/ReplaceQueryParameterVisitor.cpp index 03de8aecc92..664cda74522 100644 --- a/src/Interpreters/ReplaceQueryParameterVisitor.cpp +++ b/src/Interpreters/ReplaceQueryParameterVisitor.cpp @@ -1,16 +1,17 @@ -#include -#include #include -#include #include +#include #include #include -#include -#include -#include #include #include #include +#include +#include +#include +#include +#include +#include namespace DB @@ -30,7 +31,12 @@ void ReplaceQueryParameterVisitor::visit(ASTPtr & ast) else if (ast->as() || ast->as()) visitIdentifier(ast); else - visitChildren(ast); + { + if (auto * describe_query = dynamic_cast(ast.get()); describe_query && describe_query->table_expression) + visitChildren(describe_query->table_expression); + else + visitChildren(ast); + } } diff --git a/src/Interpreters/TreeCNFConverter.cpp b/src/Interpreters/TreeCNFConverter.cpp index 1f61c88ddd0..8812e90a5f0 100644 --- a/src/Interpreters/TreeCNFConverter.cpp +++ b/src/Interpreters/TreeCNFConverter.cpp @@ -349,7 +349,7 @@ CNFQuery & CNFQuery::pullNotOutFunctions() return *this; } -CNFQuery & CNFQuery::pushNotInFuntions() +CNFQuery & CNFQuery::pushNotInFunctions() { transformAtoms([](const AtomicFormula & atom) -> AtomicFormula { diff --git a/src/Interpreters/TreeCNFConverter.h b/src/Interpreters/TreeCNFConverter.h index a5d42e6b989..70c8990f74a 100644 --- a/src/Interpreters/TreeCNFConverter.h +++ b/src/Interpreters/TreeCNFConverter.h @@ -133,7 +133,7 @@ public: /// Converts != -> NOT =; <,>= -> (NOT) <; >,<= -> (NOT) <= for simpler matching CNFQuery & pullNotOutFunctions(); /// Revert pullNotOutFunctions actions - CNFQuery & pushNotInFuntions(); + CNFQuery & pushNotInFunctions(); /// (a OR b OR ...) AND (NOT a OR b OR ...) -> (b OR ...) CNFQuery & reduce(); diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index eaf59731967..3f7e141db3e 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -154,7 +154,7 @@ void optimizeGroupBy(ASTSelectQuery * select_query, ContextPtr context) continue; } } - /// don't optimise functions that shadow any of it's arguments, e.g.: + /// don't optimize functions that shadow any of it's arguments, e.g.: /// SELECT toString(dummy) as dummy FROM system.one GROUP BY dummy; if (!function->alias.empty()) { @@ -632,7 +632,7 @@ bool convertQueryToCNF(ASTSelectQuery * select_query) if (!cnf_form) return false; - cnf_form->pushNotInFuntions(); + cnf_form->pushNotInFunctions(); select_query->refWhere() = TreeCNFConverter::fromCNF(*cnf_form); return true; } diff --git a/src/Interpreters/TreeRewriter.h b/src/Interpreters/TreeRewriter.h index 16ff7f8b6c3..7954547c070 100644 --- a/src/Interpreters/TreeRewriter.h +++ b/src/Interpreters/TreeRewriter.h @@ -99,7 +99,7 @@ using TreeRewriterResultPtr = std::shared_ptr; /// Tree Rewriter in terms of CMU slides @sa https://15721.courses.cs.cmu.edu/spring2020/slides/19-optimizer1.pdf /// -/// Optimises AST tree and collect information for further expression analysis in ExpressionAnalyzer. +/// Optimizes AST tree and collect information for further expression analysis in ExpressionAnalyzer. /// Result AST has the following invariants: /// * all aliases are substituted /// * qualified names are translated diff --git a/src/Interpreters/WhereConstraintsOptimizer.cpp b/src/Interpreters/WhereConstraintsOptimizer.cpp index 83bdcfeb2e1..234b99167bb 100644 --- a/src/Interpreters/WhereConstraintsOptimizer.cpp +++ b/src/Interpreters/WhereConstraintsOptimizer.cpp @@ -170,7 +170,7 @@ void WhereConstraintsOptimizer::perform() return replaceTermsToConstants(atom, compare_graph); }) .reduce() - .pushNotInFuntions(); + .pushNotInFunctions(); if (optimize_append_index) AddIndexConstraintsOptimizer(metadata_snapshot).perform(cnf); diff --git a/src/Loggers/OwnSplitChannel.cpp b/src/Loggers/OwnSplitChannel.cpp index 933fc09d3e4..b1502cc4558 100644 --- a/src/Loggers/OwnSplitChannel.cpp +++ b/src/Loggers/OwnSplitChannel.cpp @@ -46,6 +46,8 @@ void OwnSplitChannel::log(const Poco::Message & msg) void OwnSplitChannel::tryLogSplit(const Poco::Message & msg) { + LockMemoryExceptionInThread lock_memory_tracker(VariableContext::Global); + try { logSplit(msg); @@ -62,8 +64,6 @@ void OwnSplitChannel::tryLogSplit(const Poco::Message & msg) /// but let's log it into the stderr at least. catch (...) { - LockMemoryExceptionInThread lock_memory_tracker(VariableContext::Global); - const std::string & exception_message = getCurrentExceptionMessage(true); const std::string & message = msg.getText(); diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index f0374d2419b..5a5326091e6 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -182,6 +182,7 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B transform_params->params.min_free_disk_space, transform_params->params.compile_aggregate_expressions, transform_params->params.min_count_to_compile_aggregate_expression, + transform_params->params.max_block_size, /* only_merge */ false, transform_params->params.stats_collecting_params}; auto transform_params_for_set = std::make_shared(src_header, std::move(params_for_set), final); @@ -376,16 +377,15 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B }); /// We add the explicit resize here, but not in case of aggregating in order, since AIO don't use two-level hash tables and thus returns only buckets with bucket_number = -1. - pipeline.resize(should_produce_results_in_order_of_bucket_number ? 1 : pipeline.getNumStreams(), true /* force */); + pipeline.resize(should_produce_results_in_order_of_bucket_number ? 1 : params.max_threads, true /* force */); aggregating = collector.detachProcessors(0); } else { - pipeline.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, transform_params); - }); + pipeline.addSimpleTransform([&](const Block & header) { return std::make_shared(header, transform_params); }); + + pipeline.resize(should_produce_results_in_order_of_bucket_number ? 1 : params.max_threads, false /* force */); aggregating = collector.detachProcessors(0); } diff --git a/src/Processors/QueryPlan/DistinctStep.cpp b/src/Processors/QueryPlan/DistinctStep.cpp index 7da2b5252f5..e3d29256c23 100644 --- a/src/Processors/QueryPlan/DistinctStep.cpp +++ b/src/Processors/QueryPlan/DistinctStep.cpp @@ -108,7 +108,7 @@ void DistinctStep::transformPipeline(QueryPipelineBuilder & pipeline, const Buil return; } /// final distinct for sorted stream (sorting inside and among chunks) - if (input_stream.sort_mode == DataStream::SortMode::Stream) + if (input_stream.sort_scope == DataStream::SortScope::Global) { assert(input_stream.has_single_port); diff --git a/src/Processors/QueryPlan/IQueryPlanStep.h b/src/Processors/QueryPlan/IQueryPlanStep.h index a66582ff06d..c5bd64d66be 100644 --- a/src/Processors/QueryPlan/IQueryPlanStep.h +++ b/src/Processors/QueryPlan/IQueryPlanStep.h @@ -31,18 +31,18 @@ public: /// QueryPipeline has single port. Totals or extremes ports are not counted. bool has_single_port = false; - /// How data is sorted. - enum class SortMode + /// Sorting scope + enum class SortScope { None, Chunk, /// Separate chunks are sorted - Port, /// Data from each port is sorted - Stream, /// Data is globally sorted + Stream, /// Each data steam is sorted + Global, /// Data is globally sorted }; /// It is not guaranteed that header has columns from sort_description. SortDescription sort_description = {}; - SortMode sort_mode = SortMode::None; + SortScope sort_scope = SortScope::None; /// Things which may be added: /// * limit @@ -54,7 +54,7 @@ public: return distinct_columns == other.distinct_columns && has_single_port == other.has_single_port && sort_description == other.sort_description - && (sort_description.empty() || sort_mode == other.sort_mode); + && (sort_description.empty() || sort_scope == other.sort_scope); } bool hasEqualHeaderWith(const DataStream & other) const diff --git a/src/Processors/QueryPlan/ITransformingStep.cpp b/src/Processors/QueryPlan/ITransformingStep.cpp index 9b9797b6540..64ad2ec5626 100644 --- a/src/Processors/QueryPlan/ITransformingStep.cpp +++ b/src/Processors/QueryPlan/ITransformingStep.cpp @@ -29,7 +29,7 @@ DataStream ITransformingStep::createOutputStream( if (stream_traits.preserves_sorting) { output_stream.sort_description = input_stream.sort_description; - output_stream.sort_mode = input_stream.sort_mode; + output_stream.sort_scope = input_stream.sort_scope; } return output_stream; diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index c27c0c0d318..9b4c0a6e920 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -333,8 +333,8 @@ static void explainStep( { if (step.hasOutputStream()) { - settings.out << prefix << "Sorting (" << step.getOutputStream().sort_mode << ")"; - if (step.getOutputStream().sort_mode != DataStream::SortMode::None) + settings.out << prefix << "Sorting (" << step.getOutputStream().sort_scope << ")"; + if (step.getOutputStream().sort_scope != DataStream::SortScope::None) { settings.out << ": "; dumpSortDescription(step.getOutputStream().sort_description, settings.out); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 1f6c6ee2a3f..6cae86c9717 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -143,9 +143,9 @@ ReadFromMergeTree::ReadFromMergeTree( { auto const & settings = context->getSettingsRef(); if ((settings.optimize_read_in_order || settings.optimize_aggregation_in_order) && query_info.getInputOrderInfo()) - output_stream->sort_mode = DataStream::SortMode::Port; + output_stream->sort_scope = DataStream::SortScope::Stream; else - output_stream->sort_mode = DataStream::SortMode::Chunk; + output_stream->sort_scope = DataStream::SortScope::Chunk; } output_stream->sort_description = std::move(sort_description); diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp index 38e02eebd44..9bad6a02d53 100644 --- a/src/Processors/QueryPlan/SortingStep.cpp +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -55,7 +55,7 @@ SortingStep::SortingStep( { /// TODO: check input_stream is partially sorted by the same description. output_stream->sort_description = result_description; - output_stream->sort_mode = DataStream::SortMode::Stream; + output_stream->sort_scope = DataStream::SortScope::Global; } SortingStep::SortingStep( @@ -73,7 +73,7 @@ SortingStep::SortingStep( { /// TODO: check input_stream is sorted by prefix_description. output_stream->sort_description = result_description; - output_stream->sort_mode = DataStream::SortMode::Stream; + output_stream->sort_scope = DataStream::SortScope::Global; } SortingStep::SortingStep( @@ -89,14 +89,14 @@ SortingStep::SortingStep( { /// TODO: check input_stream is partially sorted (each port) by the same description. output_stream->sort_description = result_description; - output_stream->sort_mode = DataStream::SortMode::Stream; + output_stream->sort_scope = DataStream::SortScope::Global; } void SortingStep::updateOutputStream() { output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits()); output_stream->sort_description = result_description; - output_stream->sort_mode = DataStream::SortMode::Stream; + output_stream->sort_scope = DataStream::SortScope::Global; } void SortingStep::updateLimit(size_t limit_) @@ -256,23 +256,23 @@ void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build return; } - const auto input_sort_mode = input_streams.front().sort_mode; + const auto input_sort_mode = input_streams.front().sort_scope; const SortDescription & input_sort_desc = input_streams.front().sort_description; if (optimize_sorting_by_input_stream_properties) { /// skip sorting if stream is already sorted - if (input_sort_mode == DataStream::SortMode::Stream && input_sort_desc.hasPrefix(result_description)) + if (input_sort_mode == DataStream::SortScope::Global && input_sort_desc.hasPrefix(result_description)) return; /// merge sorted - if (input_sort_mode == DataStream::SortMode::Port && input_sort_desc.hasPrefix(result_description)) + if (input_sort_mode == DataStream::SortScope::Stream && input_sort_desc.hasPrefix(result_description)) { mergingSorted(pipeline, result_description, limit); return; } /// if chunks already sorted according to result_sort_desc, then we can skip chunk sorting - if (input_sort_mode == DataStream::SortMode::Chunk && input_sort_desc.hasPrefix(result_description)) + if (input_sort_mode == DataStream::SortScope::Chunk && input_sort_desc.hasPrefix(result_description)) { const bool skip_partial_sort = true; fullSort(pipeline, result_description, limit, skip_partial_sort); diff --git a/src/Processors/TTL/TTLAggregationAlgorithm.cpp b/src/Processors/TTL/TTLAggregationAlgorithm.cpp index 0d160b8d32d..6a813a770cf 100644 --- a/src/Processors/TTL/TTLAggregationAlgorithm.cpp +++ b/src/Processors/TTL/TTLAggregationAlgorithm.cpp @@ -38,7 +38,8 @@ TTLAggregationAlgorithm::TTLAggregationAlgorithm( settings.max_threads, settings.min_free_disk_space_for_temporary_data, settings.compile_aggregate_expressions, - settings.min_count_to_compile_aggregate_expression); + settings.min_count_to_compile_aggregate_expression, + settings.max_block_size); aggregator = std::make_unique(header, params); diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index 2c7a4e23119..c2de0c3a23a 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -182,7 +182,8 @@ void AggregatingInOrderTransform::consume(Chunk chunk) if (cur_block_size >= max_block_size || cur_block_bytes + current_memory_usage >= max_block_bytes) { if (group_by_key) - group_by_block = params->aggregator.prepareBlockAndFillSingleLevel(variants, /* final= */ false); + group_by_block + = params->aggregator.prepareBlockAndFillSingleLevel(variants, /* final= */ false); cur_block_bytes += current_memory_usage; finalizeCurrentChunk(std::move(chunk), key_end); return; @@ -293,7 +294,8 @@ void AggregatingInOrderTransform::generate() if (cur_block_size && is_consume_finished) { if (group_by_key) - group_by_block = params->aggregator.prepareBlockAndFillSingleLevel(variants, /* final= */ false); + group_by_block + = params->aggregator.prepareBlockAndFillSingleLevel(variants, /* final= */ false); else params->aggregator.addSingleKeyToAggregateColumns(variants, res_aggregate_columns); variants.invalidate(); diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index 7f5896f5e97..4e55081ca48 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -203,7 +203,7 @@ public: { auto & output = outputs.front(); - if (finished && !has_input) + if (finished && single_level_chunks.empty()) { output.finish(); return Status::Finished; @@ -230,7 +230,7 @@ public: if (!processors.empty()) return Status::ExpandPipeline; - if (has_input) + if (!single_level_chunks.empty()) return preparePushToOutput(); /// Single level case. @@ -244,11 +244,14 @@ public: private: IProcessor::Status preparePushToOutput() { - auto & output = outputs.front(); - output.push(std::move(current_chunk)); - has_input = false; + if (single_level_chunks.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Some ready chunks expected"); - if (finished) + auto & output = outputs.front(); + output.push(std::move(single_level_chunks.back())); + single_level_chunks.pop_back(); + + if (finished && single_level_chunks.empty()) { output.finish(); return Status::Finished; @@ -268,17 +271,17 @@ private: { auto chunk = input.pull(); auto bucket = getInfoFromChunk(chunk)->bucket_num; - chunks[bucket] = std::move(chunk); + two_level_chunks[bucket] = std::move(chunk); } } if (!shared_data->is_bucket_processed[current_bucket_num]) return Status::NeedData; - if (!chunks[current_bucket_num]) + if (!two_level_chunks[current_bucket_num]) return Status::NeedData; - output.push(std::move(chunks[current_bucket_num])); + output.push(std::move(two_level_chunks[current_bucket_num])); ++current_bucket_num; if (current_bucket_num == NUM_BUCKETS) @@ -298,27 +301,16 @@ private: size_t num_threads; bool is_initialized = false; - bool has_input = false; bool finished = false; - Chunk current_chunk; + Chunks single_level_chunks; UInt32 current_bucket_num = 0; static constexpr Int32 NUM_BUCKETS = 256; - std::array chunks; + std::array two_level_chunks; Processors processors; - void setCurrentChunk(Chunk chunk) - { - if (has_input) - throw Exception("Current chunk was already set in " - "ConvertingAggregatedToChunksTransform.", ErrorCodes::LOGICAL_ERROR); - - has_input = true; - current_chunk = std::move(chunk); - } - void initialize() { is_initialized = true; @@ -339,7 +331,7 @@ private: auto block = params->aggregator.prepareBlockAndFillWithoutKey( *first, params->final, first->type != AggregatedDataVariants::Type::without_key); - setCurrentChunk(convertToChunk(block)); + single_level_chunks.emplace_back(convertToChunk(block)); } } @@ -364,9 +356,10 @@ private: else throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); - auto block = params->aggregator.prepareBlockAndFillSingleLevel(*first, params->final); + auto blocks = params->aggregator.prepareBlockAndFillSingleLevel(*first, params->final); + for (auto & block : blocks) + single_level_chunks.emplace_back(convertToChunk(block)); - setCurrentChunk(convertToChunk(block)); finished = true; } diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index fab810a1e49..4aebcd6f6ab 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -41,8 +41,9 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory(read_settings_.remote_fs_buffer_size) + size_t read_until_position_, + bool use_external_buffer_) + : BufferWithOwnMemory(use_external_buffer_ ? 0 : read_settings_.remote_fs_buffer_size) , hdfs_uri(hdfs_uri_) , hdfs_file_path(hdfs_file_path_) , builder(createHDFSBuilder(hdfs_uri_, config_)) @@ -132,10 +133,12 @@ ReadBufferFromHDFS::ReadBufferFromHDFS( const String & hdfs_file_path_, const Poco::Util::AbstractConfiguration & config_, const ReadSettings & read_settings_, - size_t read_until_position_) + size_t read_until_position_, + bool use_external_buffer_) : ReadBufferFromFileBase(read_settings_.remote_fs_buffer_size, nullptr, 0) , impl(std::make_unique( - hdfs_uri_, hdfs_file_path_, config_, read_settings_, read_until_position_)) + hdfs_uri_, hdfs_file_path_, config_, read_settings_, read_until_position_, use_external_buffer_)) + , use_external_buffer(use_external_buffer_) { } @@ -146,7 +149,18 @@ size_t ReadBufferFromHDFS::getFileSize() bool ReadBufferFromHDFS::nextImpl() { - impl->position() = impl->buffer().begin() + offset(); + if (use_external_buffer) + { + impl->set(internal_buffer.begin(), internal_buffer.size()); + assert(working_buffer.begin() != nullptr); + assert(!internal_buffer.empty()); + } + else + { + impl->position() = impl->buffer().begin() + offset(); + assert(!impl->hasPendingData()); + } + auto result = impl->next(); if (result) diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.h b/src/Storages/HDFS/ReadBufferFromHDFS.h index 41493c31882..c3b859f0566 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.h +++ b/src/Storages/HDFS/ReadBufferFromHDFS.h @@ -29,7 +29,8 @@ public: const String & hdfs_file_path_, const Poco::Util::AbstractConfiguration & config_, const ReadSettings & read_settings_, - size_t read_until_position_ = 0); + size_t read_until_position_ = 0, + bool use_external_buffer = false); ~ReadBufferFromHDFS() override; @@ -49,6 +50,7 @@ public: private: std::unique_ptr impl; + bool use_external_buffer; }; } diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 96987b0f60e..06ce4fb308d 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -810,7 +810,7 @@ void registerStorageKafka(StorageFactory & factory) /** Arguments of engine is following: * - Kafka broker list * - List of topics - * - Group ID (may be a constraint expression with a string result) + * - Group ID (may be a constant expression with a string result) * - Message format (string) * - Row delimiter * - Schema (optional, if the format supports it) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 32c2c09a392..a52de88321c 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -532,13 +532,34 @@ void IMergeTreeDataPart::removeIfNeeded() LOG_TRACE(storage.log, "Removed part from old location {}", path); } } - catch (...) + catch (const Exception & ex) { + tryLogCurrentException(__PRETTY_FUNCTION__, fmt::format("while removing part {} with path {}", name, path)); + + /// In this case we want to avoid assertions, because such errors are unavoidable in setup + /// with zero-copy replication. + if (const auto * keeper_exception = dynamic_cast(&ex)) + { + if (Coordination::isHardwareError(keeper_exception->code)) + return; + } + /// FIXME If part it temporary, then directory will not be removed for 1 day (temporary_directories_lifetime). /// If it's tmp_merge_ or tmp_fetch_, /// then all future attempts to execute part producing operation will fail with "directory already exists". - /// Seems like it's especially important for remote disks, because removal may fail due to network issues. - tryLogCurrentException(__PRETTY_FUNCTION__, "while removiong path: " + path); + assert(!is_temp); + assert(state != MergeTreeDataPartState::DeleteOnDestroy); + assert(state != MergeTreeDataPartState::Temporary); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__, fmt::format("while removing part {} with path {}", name, path)); + + /// FIXME If part it temporary, then directory will not be removed for 1 day (temporary_directories_lifetime). + /// If it's tmp_merge_ or tmp_fetch_, + /// then all future attempts to execute part producing operation will fail with "directory already exists". + /// + /// For remote disks this issue is really frequent, so we don't about server here assert(!is_temp); assert(state != MergeTreeDataPartState::DeleteOnDestroy); assert(state != MergeTreeDataPartState::Temporary); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index a8babeb59c0..b7b68367e98 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1046,29 +1046,43 @@ void MergeTreeData::loadDataPartsFromDisk( throw; broken = true; - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(__PRETTY_FUNCTION__, fmt::format("while loading part {} on path {}", part->name, part_path)); } catch (...) { broken = true; - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(__PRETTY_FUNCTION__, fmt::format("while loading part {} on path {}", part->name, part_path)); } /// Ignore broken parts that can appear as a result of hard server restart. if (broken) { - /// NOTE: getBytesOnDisk() cannot be used here, since it maybe zero of checksums.txt will not exist - size_t size_of_part = data_part_storage->calculateTotalSizeOnDisk(); + std::optional size_of_part; + try + { + /// NOTE: getBytesOnDisk() cannot be used here, since it maybe zero of checksums.txt will not exist + size_of_part = data_part_storage->calculateTotalSizeOnDisk(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__, fmt::format("while calculating part size {} on path {}", part->name, part_path)); + } + + std::string part_size_str = "failed to calculate size"; + if (size_of_part.has_value()) + part_size_str = formatReadableSizeWithBinarySuffix(*size_of_part); + LOG_ERROR(log, "Detaching broken part {}{} (size: {}). " - "If it happened after update, it is likely because of backward incompability. " + "If it happened after update, it is likely because of backward incompatibility. " "You need to resolve this manually", - getFullPathOnDisk(part_disk_ptr), part_name, formatReadableSizeWithBinarySuffix(size_of_part)); + getFullPathOnDisk(part_disk_ptr), part_name, part_size_str); std::lock_guard loading_lock(mutex); broken_parts_to_detach.push_back(part); ++suspicious_broken_parts; - suspicious_broken_parts_bytes += size_of_part; + if (size_of_part.has_value()) + suspicious_broken_parts_bytes += *size_of_part; return; } if (!part->index_granularity_info.is_adaptive) @@ -1177,14 +1191,10 @@ void MergeTreeData::loadDataPartsFromDisk( void MergeTreeData::loadDataPartsFromWAL( DataPartsVector & /* broken_parts_to_detach */, DataPartsVector & duplicate_parts_to_remove, - MutableDataPartsVector & parts_from_wal, - DataPartsLock & part_lock) + MutableDataPartsVector & parts_from_wal) { for (auto & part : parts_from_wal) { - if (getActiveContainingPart(part->info, DataPartState::Active, part_lock)) - continue; - part->modification_time = time(nullptr); /// Assume that all parts are Active, covered parts will be detected and marked as Outdated later part->setState(DataPartState::Active); @@ -1212,7 +1222,6 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) auto metadata_snapshot = getInMemoryMetadataPtr(); const auto settings = getSettings(); - MutableDataPartsVector parts_from_wal; Strings part_file_names; auto disks = getStoragePolicy()->getDisks(); @@ -1269,16 +1278,14 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) /// Collect part names by disk. std::map>> disk_part_map; - std::map disk_wal_part_map; ThreadPool pool(disks.size()); - std::mutex wal_init_lock; + for (const auto & disk_ptr : disks) { if (disk_ptr->isBroken()) continue; auto & disk_parts = disk_part_map[disk_ptr->getName()]; - auto & disk_wal_parts = disk_wal_part_map[disk_ptr->getName()]; pool.scheduleOrThrowOnError([&, disk_ptr]() { @@ -1291,34 +1298,11 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) if (!startsWith(it->name(), MergeTreeWriteAheadLog::WAL_FILE_NAME)) disk_parts.emplace_back(std::make_pair(it->name(), disk_ptr)); - else if (it->name() == MergeTreeWriteAheadLog::DEFAULT_WAL_FILE_NAME && settings->in_memory_parts_enable_wal) - { - std::lock_guard lock(wal_init_lock); - if (write_ahead_log != nullptr) - throw Exception( - "There are multiple WAL files appeared in current storage policy. You need to resolve this manually", - ErrorCodes::CORRUPTED_DATA); - - write_ahead_log = std::make_shared(*this, disk_ptr, it->name()); - for (auto && part : write_ahead_log->restore(metadata_snapshot, getContext())) - disk_wal_parts.push_back(std::move(part)); - } - else if (settings->in_memory_parts_enable_wal) - { - MergeTreeWriteAheadLog wal(*this, disk_ptr, it->name()); - for (auto && part : wal.restore(metadata_snapshot, getContext())) - disk_wal_parts.push_back(std::move(part)); - } } }); } - pool.wait(); - for (auto & [_, disk_wal_parts] : disk_wal_part_map) - parts_from_wal.insert( - parts_from_wal.end(), std::make_move_iterator(disk_wal_parts.begin()), std::make_move_iterator(disk_wal_parts.end())); - size_t num_parts = 0; std::queue>> parts_queue; for (auto & [_, disk_parts] : disk_part_map) @@ -1332,13 +1316,6 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) auto part_lock = lockParts(); data_parts_indexes.clear(); - if (num_parts == 0 && parts_from_wal.empty()) - { - resetObjectColumnsFromActiveParts(part_lock); - LOG_DEBUG(log, "There are no data parts"); - return; - } - DataPartsVector broken_parts_to_detach; DataPartsVector duplicate_parts_to_remove; @@ -1346,8 +1323,65 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) loadDataPartsFromDisk( broken_parts_to_detach, duplicate_parts_to_remove, pool, num_parts, parts_queue, skip_sanity_checks, settings); - if (!parts_from_wal.empty()) - loadDataPartsFromWAL(broken_parts_to_detach, duplicate_parts_to_remove, parts_from_wal, part_lock); + if (settings->in_memory_parts_enable_wal) + { + std::map disk_wal_part_map; + + std::mutex wal_init_lock; + for (const auto & disk_ptr : disks) + { + if (disk_ptr->isBroken()) + continue; + + auto & disk_wal_parts = disk_wal_part_map[disk_ptr->getName()]; + + pool.scheduleOrThrowOnError([&, disk_ptr]() + { + for (auto it = disk_ptr->iterateDirectory(relative_data_path); it->isValid(); it->next()) + { + if (!startsWith(it->name(), MergeTreeWriteAheadLog::WAL_FILE_NAME)) + continue; + + if (it->name() == MergeTreeWriteAheadLog::DEFAULT_WAL_FILE_NAME) + { + std::lock_guard lock(wal_init_lock); + if (write_ahead_log != nullptr) + throw Exception( + "There are multiple WAL files appeared in current storage policy. You need to resolve this manually", + ErrorCodes::CORRUPTED_DATA); + + write_ahead_log = std::make_shared(*this, disk_ptr, it->name()); + for (auto && part : write_ahead_log->restore(metadata_snapshot, getContext(), part_lock)) + disk_wal_parts.push_back(std::move(part)); + } + else + { + MergeTreeWriteAheadLog wal(*this, disk_ptr, it->name()); + for (auto && part : wal.restore(metadata_snapshot, getContext(), part_lock)) + disk_wal_parts.push_back(std::move(part)); + } + } + }); + } + + pool.wait(); + + MutableDataPartsVector parts_from_wal; + for (auto & [_, disk_wal_parts] : disk_wal_part_map) + parts_from_wal.insert( + parts_from_wal.end(), std::make_move_iterator(disk_wal_parts.begin()), std::make_move_iterator(disk_wal_parts.end())); + + loadDataPartsFromWAL(broken_parts_to_detach, duplicate_parts_to_remove, parts_from_wal); + + num_parts += parts_from_wal.size(); + } + + if (num_parts == 0) + { + resetObjectColumnsFromActiveParts(part_lock); + LOG_DEBUG(log, "There are no data parts"); + return; + } for (auto & part : broken_parts_to_detach) { @@ -1410,7 +1444,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) continue; } - /// Check if CSNs were witten after committing transaction, update and write if needed. + /// Check if CSNs were written after committing transaction, update and write if needed. bool version_updated = false; chassert(!version.creation_tid.isEmpty()); if (!part->version.creation_csn) @@ -1833,18 +1867,18 @@ size_t MergeTreeData::clearOldPartsFromFilesystem(bool force) void MergeTreeData::clearPartsFromFilesystem(const DataPartsVector & parts, bool throw_on_error, NameSet * parts_failed_to_delete) { - NameSet part_names_successeded; + NameSet part_names_succeed; - auto get_failed_parts = [&part_names_successeded, &parts_failed_to_delete, &parts] () + auto get_failed_parts = [&part_names_succeed, &parts_failed_to_delete, &parts] () { - if (part_names_successeded.size() == parts.size()) + if (part_names_succeed.size() == parts.size()) return; if (parts_failed_to_delete) { for (const auto & part : parts) { - if (!part_names_successeded.contains(part->name)) + if (!part_names_succeed.contains(part->name)) parts_failed_to_delete->insert(part->name); } } @@ -1852,7 +1886,7 @@ void MergeTreeData::clearPartsFromFilesystem(const DataPartsVector & parts, bool try { - clearPartsFromFilesystemImpl(parts, &part_names_successeded); + clearPartsFromFilesystemImpl(parts, &part_names_succeed); get_failed_parts(); } catch (...) @@ -1864,7 +1898,7 @@ void MergeTreeData::clearPartsFromFilesystem(const DataPartsVector & parts, bool } } -void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_to_remove, NameSet * part_names_successed) +void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_to_remove, NameSet * part_names_succeed) { const auto settings = getSettings(); if (parts_to_remove.size() > 1 && settings->max_part_removal_threads > 1 && parts_to_remove.size() > settings->concurrent_part_removal_threshold) @@ -1884,10 +1918,10 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t LOG_DEBUG(log, "Removing part from filesystem {}", part->name); part->remove(); - if (part_names_successed) + if (part_names_succeed) { std::lock_guard lock(part_names_mutex); - part_names_successed->insert(part->name); + part_names_succeed->insert(part->name); } }); } @@ -1900,13 +1934,13 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t { LOG_DEBUG(log, "Removing part from filesystem {}", part->name); part->remove(); - if (part_names_successed) - part_names_successed->insert(part->name); + if (part_names_succeed) + part_names_succeed->insert(part->name); } } } -size_t MergeTreeData::clearOldBrokenPartsFromDetachedDirecory() +size_t MergeTreeData::clearOldBrokenPartsFromDetachedDirectory() { /** * Remove old (configured by setting) broken detached parts. @@ -2059,7 +2093,7 @@ void MergeTreeData::rename(const String & new_table_path, const StorageID & new_ { /// Relies on storage path, so we drop it during rename - /// it will be recreated automatiaclly. + /// it will be recreated automatically. std::lock_guard wal_lock(write_ahead_log_mutex); if (write_ahead_log) { @@ -3894,7 +3928,7 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String throw Exception("Volume " + name + " does not exists on policy " + getStoragePolicy()->getName(), ErrorCodes::UNKNOWN_DISK); if (parts.empty()) - throw Exception("Nothing to move (сheck that the partition exists).", ErrorCodes::NO_SUCH_DATA_PART); + throw Exception("Nothing to move (check that the partition exists).", ErrorCodes::NO_SUCH_DATA_PART); std::erase_if(parts, [&](auto part_ptr) { @@ -5555,6 +5589,10 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg if (select_query->interpolate() && !select_query->interpolate()->children.empty()) return std::nullopt; + // Currently projections don't support GROUPING SET yet. + if (select_query->group_by_with_grouping_sets) + return std::nullopt; + auto query_options = SelectQueryOptions( QueryProcessingStage::WithMergeableState, /* depth */ 1, @@ -6248,7 +6286,7 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher( { // Store metadata for replicated table. - // Do nothing for non-replocated. + // Do nothing for non-replicated. createAndStoreFreezeMetadata(disk, part, fs::path(backup_part_path) / part->data_part_storage->getPartDirectory()); }; @@ -6561,7 +6599,7 @@ bool MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagge auto disk = moving_part.reserved_space->getDisk(); if (supportsReplication() && disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication) { - /// If we acuqired lock than let's try to move. After one + /// If we acquired lock than let's try to move. After one /// replica will actually move the part from disk to some /// zero-copy storage other replicas will just fetch /// metainformation. diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 68ec191412b..c91c7ba02a8 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -177,7 +177,7 @@ public: /// Rename map new_name -> old_name std::unordered_map rename_map; - bool isColumnRenamed(const String & new_name) const { return rename_map.count(new_name) > 0; } + bool isColumnRenamed(const String & new_name) const { return rename_map.contains(new_name); } String getColumnOldName(const String & new_name) const { return rename_map.at(new_name); } }; @@ -634,7 +634,7 @@ public: /// Delete WAL files containing parts, that all already stored on disk. size_t clearOldWriteAheadLogs(); - size_t clearOldBrokenPartsFromDetachedDirecory(); + size_t clearOldBrokenPartsFromDetachedDirectory(); /// Delete all directories which names begin with "tmp" /// Must be called with locked lockForShare() because it's using relative_data_path. @@ -761,7 +761,7 @@ public: const ColumnsDescription & getObjectColumns() const { return object_columns; } - /// Creates desciprion of columns of data type Object from the range of data parts. + /// Creates description of columns of data type Object from the range of data parts. static ColumnsDescription getObjectColumns( const DataPartsVector & parts, const ColumnsDescription & storage_columns); @@ -1083,7 +1083,7 @@ protected: DataPartsIndexes::index::type & data_parts_by_info; DataPartsIndexes::index::type & data_parts_by_state_and_info; - /// Current descriprion of columns of data type Object. + /// Current description of columns of data type Object. /// It changes only when set of parts is changed and is /// protected by @data_parts_mutex. ColumnsDescription object_columns; @@ -1125,7 +1125,7 @@ protected: return {begin, end}; } - /// Creates desciprion of columns of data type Object from the range of data parts. + /// Creates description of columns of data type Object from the range of data parts. static ColumnsDescription getObjectColumns( boost::iterator_range range, const ColumnsDescription & storage_columns); @@ -1263,7 +1263,7 @@ private: void checkPartCanBeAddedToTable(MutableDataPartPtr & part, DataPartsLock & lock) const; /// Preparing itself to be committed in memory: fill some fields inside part, add it to data_parts_indexes - /// in precommitted state and to transasction + /// in precommitted state and to transaction void preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction, DataPartStorageBuilderPtr builder); /// Low-level method for preparing parts for commit (in-memory). @@ -1339,8 +1339,7 @@ private: void loadDataPartsFromWAL( DataPartsVector & broken_parts_to_detach, DataPartsVector & duplicate_parts_to_remove, - MutableDataPartsVector & parts_from_wal, - DataPartsLock & part_lock); + MutableDataPartsVector & parts_from_wal); void resetObjectColumnsFromActiveParts(const DataPartsLock & lock); void updateObjectColumns(const DataPartPtr & part, const DataPartsLock & lock); @@ -1352,7 +1351,7 @@ private: /// Remove parts from disk calling part->remove(). Can do it in parallel in case of big set of parts and enabled settings. /// If we fail to remove some part and throw_on_error equal to `true` will throw an exception on the first failed part. /// Otherwise, in non-parallel case will break and return. - void clearPartsFromFilesystemImpl(const DataPartsVector & parts, NameSet * part_names_successed); + void clearPartsFromFilesystemImpl(const DataPartsVector & parts, NameSet * part_names_succeed); TemporaryParts temporary_parts; }; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index c024e5da7b5..709a8babcdd 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -313,6 +313,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( settings.min_free_disk_space_for_temporary_data, settings.compile_aggregate_expressions, settings.min_count_to_compile_aggregate_expression, + settings.max_block_size, only_merge); return std::make_pair(params, only_merge); diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp index 9b79f89ff98..c8b3349734e 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -122,7 +123,10 @@ void MergeTreeWriteAheadLog::rotate(const std::unique_lock &) init(); } -MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(const StorageMetadataPtr & metadata_snapshot, ContextPtr context) +MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore( + const StorageMetadataPtr & metadata_snapshot, + ContextPtr context, + std::unique_lock & parts_lock) { std::unique_lock lock(write_mutex); @@ -172,6 +176,9 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(const Stor part->uuid = metadata.part_uuid; block = block_in.read(); + + if (storage.getActiveContainingPart(part->info, MergeTreeDataPartState::Active, parts_lock)) + continue; } else { @@ -238,6 +245,15 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(const Stor std::copy_if(parts.begin(), parts.end(), std::back_inserter(result), [&dropped_parts](const auto & part) { return dropped_parts.count(part->name) == 0; }); + /// All parts in WAL had been already committed into the disk -> clear the WAL + if (result.empty()) + { + LOG_DEBUG(log, "WAL file '{}' had been completely processed. Removing.", path); + disk->removeFile(path); + init(); + return {}; + } + return result; } diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h index eb75d374cb1..b54161dbdaa 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h @@ -62,7 +62,10 @@ public: void addPart(DataPartInMemoryPtr & part); void dropPart(const String & part_name); - std::vector restore(const StorageMetadataPtr & metadata_snapshot, ContextPtr context); + std::vector restore( + const StorageMetadataPtr & metadata_snapshot, + ContextPtr context, + std::unique_lock & parts_lock); using MinMaxBlockNumber = std::pair; static std::optional tryParseMinMaxBlockNumber(const String & filename); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp index f3e33b6b38b..ba4979e57f2 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp @@ -149,7 +149,7 @@ void ReplicatedMergeTreeAttachThread::runImpl() storage.clearOldTemporaryDirectories(0, {"tmp_", "delete_tmp_", "tmp-fetch_"}); storage.clearOldWriteAheadLogs(); if (storage.getSettings()->merge_tree_enable_clear_old_broken_detached) - storage.clearOldBrokenPartsFromDetachedDirecory(); + storage.clearOldBrokenPartsFromDetachedDirectory(); storage.createNewZooKeeperNodes(); storage.syncPinnedPartUUIDs(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 74e3d0881ff..cc983960847 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -66,7 +66,7 @@ void ReplicatedMergeTreeCleanupThread::iterate() storage.clearOldWriteAheadLogs(); storage.clearOldTemporaryDirectories(storage.getSettings()->temporary_directories_lifetime.totalSeconds()); if (storage.getSettings()->merge_tree_enable_clear_old_broken_detached) - storage.clearOldBrokenPartsFromDetachedDirecory(); + storage.clearOldBrokenPartsFromDetachedDirectory(); } /// This is loose condition: no problem if we actually had lost leadership at this moment diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index c9ddd9147b9..0a435e558d2 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -29,6 +29,7 @@ #include #include #include +#include namespace fs = std::filesystem; @@ -166,14 +167,21 @@ StorageEmbeddedRocksDB::StorageEmbeddedRocksDB(const StorageID & table_id_, bool attach, ContextPtr context_, const String & primary_key_, - Int32 ttl_) + Int32 ttl_, + String rocksdb_dir_, + bool read_only_) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) , primary_key{primary_key_} + , rocksdb_dir(std::move(rocksdb_dir_)) , ttl(ttl_) + , read_only(read_only_) { setInMemoryMetadata(metadata_); - rocksdb_dir = context_->getPath() + relative_data_path_; + if (rocksdb_dir.empty()) + { + rocksdb_dir = context_->getPath() + relative_data_path_; + } if (!attach) { fs::create_directories(rocksdb_dir); @@ -269,7 +277,7 @@ void StorageEmbeddedRocksDB::initDB() if (ttl > 0) { rocksdb::DBWithTTL * db; - status = rocksdb::DBWithTTL::Open(merged, rocksdb_dir, &db, ttl); + status = rocksdb::DBWithTTL::Open(merged, rocksdb_dir, &db, ttl, read_only); if (!status.ok()) { throw Exception(ErrorCodes::ROCKSDB_ERROR, "Failed to open rocksdb path at: {}: {}", @@ -280,7 +288,14 @@ void StorageEmbeddedRocksDB::initDB() else { rocksdb::DB * db; - status = rocksdb::DB::Open(merged, rocksdb_dir, &db); + if (read_only) + { + status = rocksdb::DB::OpenForReadOnly(merged, rocksdb_dir, &db); + } + else + { + status = rocksdb::DB::Open(merged, rocksdb_dir, &db); + } if (!status.ok()) { throw Exception(ErrorCodes::ROCKSDB_ERROR, "Failed to open rocksdb path at: {}: {}", @@ -351,15 +366,21 @@ static StoragePtr create(const StorageFactory::Arguments & args) { // TODO custom RocksDBSettings, table function auto engine_args = args.engine_args; - if (engine_args.size() > 1) + if (engine_args.size() > 3) { - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Engine {} requires at most 1 parameter. ({} given). Correct usage: EmbeddedRocksDB([ttl])", + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Engine {} requires at most 3 parameters. ({} given). Correct usage: EmbeddedRocksDB([ttl, rocksdb_dir, read_only])", args.engine_name, engine_args.size()); } Int32 ttl{0}; + String rocksdb_dir; + bool read_only{false}; if (!engine_args.empty()) ttl = checkAndGetLiteralArgument(engine_args[0], "ttl"); + if (engine_args.size() > 1) + rocksdb_dir = checkAndGetLiteralArgument(engine_args[1], "rocksdb_dir"); + if (engine_args.size() > 2) + read_only = checkAndGetLiteralArgument(engine_args[2], "read_only"); StorageInMemoryMetadata metadata; metadata.setColumns(args.columns); @@ -374,7 +395,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) { throw Exception("StorageEmbeddedRocksDB must require one column in primary key", ErrorCodes::BAD_ARGUMENTS); } - return std::make_shared(args.table_id, args.relative_data_path, metadata, args.attach, args.getContext(), primary_key_names[0], ttl); + return std::make_shared(args.table_id, args.relative_data_path, metadata, args.attach, args.getContext(), primary_key_names[0], ttl, std::move(rocksdb_dir), read_only); } std::shared_ptr StorageEmbeddedRocksDB::getRocksDBStatistics() const diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h index ab87eac3e66..604976cd402 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h @@ -33,7 +33,9 @@ public: bool attach, ContextPtr context_, const String & primary_key_, - Int32 ttl_ = 0); + Int32 ttl_ = 0, + String rocksdb_dir_ = "", + bool read_only_ = false); std::string getName() const override { return "EmbeddedRocksDB"; } @@ -82,6 +84,7 @@ private: mutable std::shared_mutex rocksdb_ptr_mx; String rocksdb_dir; Int32 ttl; + bool read_only; void initDB(); }; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 5ce46ecffc8..66e570fdc3b 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1202,7 +1202,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign cleared_count += clearOldMutations(); cleared_count += clearEmptyParts(); if (getSettings()->merge_tree_enable_clear_old_broken_detached) - cleared_count += clearOldBrokenPartsFromDetachedDirecory(); + cleared_count += clearOldBrokenPartsFromDetachedDirectory(); return cleared_count; /// TODO maybe take into account number of cleared objects when calculating backoff }, common_assignee_trigger, getStorageID()), /* need_trigger */ false); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 762c3d52627..4be97e01293 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4582,7 +4582,7 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer if (entry.alter_version < metadata_version) { /// TODO Can we replace it with LOGICAL_ERROR? - /// As for now, it may rerely happen due to reordering of ALTER_METADATA entries in the queue of + /// As for now, it may rarely happen due to reordering of ALTER_METADATA entries in the queue of /// non-initial replica and also may happen after stale replica recovery. LOG_WARNING(log, "Attempt to update metadata of version {} " "to older version {} when processing log entry {}: {}", @@ -4664,7 +4664,7 @@ PartitionBlockNumbersHolder StorageReplicatedMergeTree::allocateBlockNumbersInAf } else { - /// TODO: Implement optimal block number aqcuisition algorithm in multiple (but not all) partitions + /// TODO: Implement optimal block number acquisition algorithm in multiple (but not all) partitions EphemeralLocksInAllPartitions lock_holder( fs::path(zookeeper_path) / "block_numbers", "block-", fs::path(zookeeper_path) / "temp", *zookeeper); @@ -4841,7 +4841,7 @@ void StorageReplicatedMergeTree::alter( Coordination::Responses results; Coordination::Error rc = zookeeper->tryMulti(ops, results); - /// For the sake of constitency with mechanics of concurrent background process of assigning parts merge tasks + /// For the sake of consistency with mechanics of concurrent background process of assigning parts merge tasks /// this placeholder must be held up until the moment of committing into ZK of the mutation entry /// See ReplicatedMergeTreeMergePredicate::canMergeTwoParts() method partition_block_numbers_holder.reset(); @@ -5897,7 +5897,7 @@ void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, Conte /// partitions, saves them in the mutation entry and writes the mutation entry to a new ZK node in /// the /mutations folder. This block numbers are needed to determine which parts should be mutated and /// which shouldn't (parts inserted after the mutation will have the block number higher than the - /// block number acquired by the mutation in that partition and so will not be mutatied). + /// block number acquired by the mutation in that partition and so will not be mutated). /// This block number is called "mutation version" in that partition. /// /// Mutation versions are acquired atomically in all partitions, so the case when an insert in some @@ -7217,7 +7217,7 @@ bool StorageReplicatedMergeTree::addOpsToDropAllPartsInPartition( } void StorageReplicatedMergeTree::dropAllPartsInPartitions( - zkutil::ZooKeeper & zookeeper, const Strings partition_ids, std::vector & entries, ContextPtr query_context, bool detach) + zkutil::ZooKeeper & zookeeper, const Strings & partition_ids, std::vector & entries, ContextPtr query_context, bool detach) { entries.reserve(partition_ids.size()); @@ -7600,7 +7600,7 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( if (!children.empty()) { - LOG_TRACE(logger, "Found {} ({}) zookeper locks for {}", zookeeper_part_uniq_node, children.size(), fmt::join(children, ", ")); + LOG_TRACE(logger, "Found {} ({}) zookeeper locks for {}", zookeeper_part_uniq_node, children.size(), fmt::join(children, ", ")); part_has_no_more_locks = false; continue; } @@ -7706,12 +7706,12 @@ String StorageReplicatedMergeTree::getSharedDataReplica( String zookeeper_part_uniq_node = fs::path(zc_zookeeper_path) / id; Strings id_replicas; zookeeper->tryGetChildren(zookeeper_part_uniq_node, id_replicas); - LOG_TRACE(log, "Found zookeper replicas for {}: {}", zookeeper_part_uniq_node, id_replicas.size()); + LOG_TRACE(log, "Found zookeeper replicas for {}: {}", zookeeper_part_uniq_node, id_replicas.size()); replicas.insert(id_replicas.begin(), id_replicas.end()); } } - LOG_TRACE(log, "Found zookeper replicas for part {}: {}", part.name, replicas.size()); + LOG_TRACE(log, "Found zookeeper replicas for part {}: {}", part.name, replicas.size()); Strings active_replicas; @@ -7724,7 +7724,7 @@ String StorageReplicatedMergeTree::getSharedDataReplica( if ((replica != replica_name) && (zookeeper->exists(fs::path(zookeeper_path) / "replicas" / replica / "is_active"))) active_replicas.push_back(replica); - LOG_TRACE(log, "Found zookeper active replicas for part {}: {}", part.name, active_replicas.size()); + LOG_TRACE(log, "Found zookeeper active replicas for part {}: {}", part.name, active_replicas.size()); if (active_replicas.empty()) return ""; @@ -8159,7 +8159,7 @@ void StorageReplicatedMergeTree::createZeroCopyLockNode( if (!created) { - String mode_str = mode == zkutil::CreateMode::Persistent ? "persistent" : "ephemral"; + String mode_str = mode == zkutil::CreateMode::Persistent ? "persistent" : "ephemeral"; throw Exception(ErrorCodes::NOT_FOUND_NODE, "Cannot create {} zero copy lock {} because part was unlocked from zookeeper", mode_str, zookeeper_node); } } diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 2e2a5ca79b7..79df4f11490 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -754,7 +754,7 @@ private: std::vector & delimiting_block_locks, std::vector & log_entry_ops_idx); void dropAllPartsInPartitions( - zkutil::ZooKeeper & zookeeper, const Strings partition_ids, std::vector & entries, ContextPtr query_context, bool detach); + zkutil::ZooKeeper & zookeeper, const Strings & partition_ids, std::vector & entries, ContextPtr query_context, bool detach); LogEntryPtr dropAllPartsInPartition( zkutil::ZooKeeper & zookeeper, const String & partition_id, ContextPtr query_context, bool detach); diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 1685de55b6e..627679d6779 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1076,7 +1076,8 @@ void StorageS3::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configuration S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( settings.auth_settings.region, ctx->getRemoteHostFilter(), ctx->getGlobalContext()->getSettingsRef().s3_max_redirects, - ctx->getGlobalContext()->getSettingsRef().enable_s3_requests_logging); + ctx->getGlobalContext()->getSettingsRef().enable_s3_requests_logging, + /* for_disk_s3 = */ false); client_configuration.endpointOverride = upd.uri.endpoint; client_configuration.maxConnections = upd.rw_settings.max_connections; diff --git a/src/TableFunctions/ITableFunction.h b/src/TableFunctions/ITableFunction.h index b419c4cfeed..4b9a87b93f1 100644 --- a/src/TableFunctions/ITableFunction.h +++ b/src/TableFunctions/ITableFunction.h @@ -69,13 +69,14 @@ public: virtual ~ITableFunction() = default; +protected: + virtual AccessType getSourceAccessType() const; + private: virtual StoragePtr executeImpl( const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const = 0; virtual const char * getStorageTypeName() const = 0; - - virtual AccessType getSourceAccessType() const; }; using TableFunctionPtr = std::shared_ptr; diff --git a/src/TableFunctions/TableFunctionHDFS.cpp b/src/TableFunctions/TableFunctionHDFS.cpp index ed3000ec152..57f692eadad 100644 --- a/src/TableFunctions/TableFunctionHDFS.cpp +++ b/src/TableFunctions/TableFunctionHDFS.cpp @@ -7,6 +7,8 @@ #include #include #include +#include +#include namespace DB { @@ -29,7 +31,10 @@ StoragePtr TableFunctionHDFS::getStorage( ColumnsDescription TableFunctionHDFS::getActualTableStructure(ContextPtr context) const { if (structure == "auto") + { + context->checkAccess(getSourceAccessType()); return StorageHDFS::getTableStructureFromData(format, filename, compression_method, context); + } return parseColumnsListFromString(structure, context); } diff --git a/src/TableFunctions/TableFunctionHDFSCluster.cpp b/src/TableFunctions/TableFunctionHDFSCluster.cpp index 4a68fec1a5e..385d280a100 100644 --- a/src/TableFunctions/TableFunctionHDFSCluster.cpp +++ b/src/TableFunctions/TableFunctionHDFSCluster.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -74,7 +75,10 @@ void TableFunctionHDFSCluster::parseArguments(const ASTPtr & ast_function, Conte ColumnsDescription TableFunctionHDFSCluster::getActualTableStructure(ContextPtr context) const { if (structure == "auto") + { + context->checkAccess(getSourceAccessType()); return StorageHDFS::getTableStructureFromData(format, filename, compression_method, context); + } return parseColumnsListFromString(structure, context); } diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index e81b67d70a4..86a7e9a0eae 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -133,6 +134,7 @@ ColumnsDescription TableFunctionS3::getActualTableStructure(ContextPtr context) { if (configuration.structure == "auto") { + context->checkAccess(getSourceAccessType()); return StorageS3::getTableStructureFromData( configuration.format, S3::URI(Poco::URI(configuration.url)), diff --git a/src/TableFunctions/TableFunctionS3Cluster.cpp b/src/TableFunctions/TableFunctionS3Cluster.cpp index d76bd954d27..5c61207b717 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.cpp +++ b/src/TableFunctions/TableFunctionS3Cluster.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -83,6 +84,7 @@ ColumnsDescription TableFunctionS3Cluster::getActualTableStructure(ContextPtr co { if (configuration.structure == "auto") { + context->checkAccess(getSourceAccessType()); return StorageS3::getTableStructureFromData( configuration.format, S3::URI(Poco::URI(configuration.url)), diff --git a/src/TableFunctions/TableFunctionURL.cpp b/src/TableFunctions/TableFunctionURL.cpp index bbae0990062..99ec87c2e8f 100644 --- a/src/TableFunctions/TableFunctionURL.cpp +++ b/src/TableFunctions/TableFunctionURL.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include @@ -113,12 +114,15 @@ ReadWriteBufferFromHTTP::HTTPHeaderEntries TableFunctionURL::getHeaders() const ColumnsDescription TableFunctionURL::getActualTableStructure(ContextPtr context) const { if (structure == "auto") + { + context->checkAccess(getSourceAccessType()); return StorageURL::getTableStructureFromData(format, filename, chooseCompressionMethod(Poco::URI(filename).getPath(), compression_method), getHeaders(), std::nullopt, context); + } return parseColumnsListFromString(structure, context); } diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 3d0513bca47..69ca2e763b2 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -247,6 +247,12 @@ CI_CONFIG = { "Stateless tests (release, s3 storage)": { "required_build": "package_release", }, + "Stateless tests (debug, s3 storage)": { + "required_build": "package_debug", + }, + "Stateless tests (tsan, s3 storage)": { + "required_build": "package_tsan", + }, "Stress test (asan)": { "required_build": "package_asan", }, diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 5e6542f6e4c..0a269c07642 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -20,8 +20,6 @@ from workflow_approve_rerun_lambda.app import TRUSTED_CONTRIBUTORS NAME = "Run Check" TRUSTED_ORG_IDS = { - 7409213, # yandex - 28471076, # altinity 54801242, # clickhouse } diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py index 29f3271a34c..39bd9cfb283 100644 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -50,8 +50,6 @@ WorkflowDescription = namedtuple( # See https://api.github.com/orgs/{name} TRUSTED_ORG_IDS = { - 7409213, # yandex - 28471076, # altinity 54801242, # clickhouse } @@ -104,8 +102,6 @@ TRUSTED_CONTRIBUTORS = { "kreuzerkrieg", "lehasm", # DOCSUP "michon470", # DOCSUP - "MyroTk", # Tester in Altinity - "myrrc", # Michael Kot, Altinity "nikvas0", "nvartolomei", "olgarev", # DOCSUP diff --git a/tests/config/config.d/zookeeper_fault_injection.xml b/tests/config/config.d/zookeeper_fault_injection.xml new file mode 100644 index 00000000000..45d3cc8193d --- /dev/null +++ b/tests/config/config.d/zookeeper_fault_injection.xml @@ -0,0 +1,19 @@ + + + + localhost + 9181 + + + + 0.00002 + 0.00002 + + diff --git a/tests/config/install.sh b/tests/config/install.sh index e7d0f8e7acf..e27675b8abb 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -15,7 +15,6 @@ mkdir -p $DEST_SERVER_PATH/config.d/ mkdir -p $DEST_SERVER_PATH/users.d/ mkdir -p $DEST_CLIENT_PATH -ln -sf $SRC_PATH/config.d/zookeeper.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/zookeeper_write.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/listen.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/text_log.xml $DEST_SERVER_PATH/config.d/ @@ -89,6 +88,12 @@ ln -sf $SRC_PATH/dhparam.pem $DEST_SERVER_PATH/ ln -sf --backup=simple --suffix=_original.xml \ $SRC_PATH/config.d/query_masking_rules.xml $DEST_SERVER_PATH/config.d/ +if [[ -n "$ZOOKEEPER_FAULT_INJECTION" ]] && [[ "$ZOOKEEPER_FAULT_INJECTION" -eq 1 ]]; then + ln -sf $SRC_PATH/config.d/zookeeper_fault_injection.xml $DEST_SERVER_PATH/config.d/ +else + ln -sf $SRC_PATH/config.d/zookeeper.xml $DEST_SERVER_PATH/config.d/ +fi + # We randomize creating the snapshot on exit for Keeper to test out using older snapshots create_snapshot_on_exit=$(($RANDOM % 2)) sed --follow-symlinks -i "s|true|$create_snapshot_on_exit|" $DEST_SERVER_PATH/config.d/keeper_port.xml diff --git a/tests/integration/runner b/tests/integration/runner index f0d87b23a83..e1b9a55b43e 100755 --- a/tests/integration/runner +++ b/tests/integration/runner @@ -350,8 +350,7 @@ if __name__ == "__main__": # randomizer, we should remove it after Sep 2022 try: subprocess.check_call( - "docker volume rm $(docker volume ls -q | " - f"grep '{VOLUME_NAME}_.*_volume')", + f"docker volume ls -q | grep '{VOLUME_NAME}_.*_volume' | xargs --no-run-if-empty docker volume rm", shell=True, ) except Exception as ex: diff --git a/tests/integration/test_join_set_family_s3/test.py b/tests/integration/test_join_set_family_s3/test.py index b09d5735628..38b56b7b15b 100644 --- a/tests/integration/test_join_set_family_s3/test.py +++ b/tests/integration/test_join_set_family_s3/test.py @@ -27,7 +27,7 @@ def cluster(): def assert_objects_count(cluster, objects_count, path="data/"): minio = cluster.minio_client - s3_objects = list(minio.list_objects(cluster.minio_bucket, path)) + s3_objects = list(minio.list_objects(cluster.minio_bucket, path, recursive=True)) if objects_count != len(s3_objects): for s3_object in s3_objects: object_meta = minio.stat_object(cluster.minio_bucket, s3_object.object_name) diff --git a/tests/integration/test_log_family_s3/test.py b/tests/integration/test_log_family_s3/test.py index 76ff0930db3..bed379d098b 100644 --- a/tests/integration/test_log_family_s3/test.py +++ b/tests/integration/test_log_family_s3/test.py @@ -25,7 +25,7 @@ def cluster(): def assert_objects_count(cluster, objects_count, path="data/"): minio = cluster.minio_client - s3_objects = list(minio.list_objects(cluster.minio_bucket, path)) + s3_objects = list(minio.list_objects(cluster.minio_bucket, path, recursive=True)) if objects_count != len(s3_objects): for s3_object in s3_objects: object_meta = minio.stat_object(cluster.minio_bucket, s3_object.object_name) diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index 544f064bdff..4276125c347 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -120,11 +120,17 @@ def run_s3_mocks(cluster): def wait_for_delete_s3_objects(cluster, expected, timeout=30): minio = cluster.minio_client while timeout > 0: - if len(list(minio.list_objects(cluster.minio_bucket, "data/"))) == expected: + if ( + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) + == expected + ): return timeout -= 1 time.sleep(1) - assert len(list(minio.list_objects(cluster.minio_bucket, "data/"))) == expected + assert ( + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) + == expected + ) @pytest.fixture(autouse=True) @@ -140,7 +146,9 @@ def drop_table(cluster, node_name): wait_for_delete_s3_objects(cluster, 0) finally: # Remove extra objects to prevent tests cascade failing - for obj in list(minio.list_objects(cluster.minio_bucket, "data/")): + for obj in list( + minio.list_objects(cluster.minio_bucket, "data/", recursive=True) + ): minio.remove_object(cluster.minio_bucket, obj.object_name) @@ -162,7 +170,7 @@ def test_simple_insert_select( node.query("INSERT INTO s3_test VALUES {}".format(values1)) assert node.query("SELECT * FROM s3_test order by dt, id FORMAT Values") == values1 assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/"))) + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) == FILES_OVERHEAD + files_per_part ) @@ -173,7 +181,7 @@ def test_simple_insert_select( == values1 + "," + values2 ) assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/"))) + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) == FILES_OVERHEAD + files_per_part * 2 ) @@ -217,7 +225,7 @@ def test_insert_same_partition_and_merge(cluster, merge_vertical, node_name): node.query("SELECT count(distinct(id)) FROM s3_test FORMAT Values") == "(8192)" ) assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/"))) + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) == FILES_OVERHEAD_PER_PART_WIDE * 6 + FILES_OVERHEAD ) @@ -306,28 +314,28 @@ def test_attach_detach_partition(cluster, node_name): ) assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)" assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/"))) + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 ) node.query("ALTER TABLE s3_test DETACH PARTITION '2020-01-03'") assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(4096)" assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/"))) + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 ) node.query("ALTER TABLE s3_test ATTACH PARTITION '2020-01-03'") assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)" assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/"))) + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 ) node.query("ALTER TABLE s3_test DROP PARTITION '2020-01-03'") assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(4096)" assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/"))) + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE ) @@ -338,7 +346,8 @@ def test_attach_detach_partition(cluster, node_name): ) assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(0)" assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/"))) == FILES_OVERHEAD + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) + == FILES_OVERHEAD ) @@ -356,21 +365,21 @@ def test_move_partition_to_another_disk(cluster, node_name): ) assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)" assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/"))) + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 ) node.query("ALTER TABLE s3_test MOVE PARTITION '2020-01-04' TO DISK 'hdd'") assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)" assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/"))) + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE ) node.query("ALTER TABLE s3_test MOVE PARTITION '2020-01-04' TO DISK 's3'") assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)" assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/"))) + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 ) @@ -391,7 +400,7 @@ def test_table_manipulations(cluster, node_name): node.query("RENAME TABLE s3_test TO s3_renamed") assert node.query("SELECT count(*) FROM s3_renamed FORMAT Values") == "(8192)" assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/"))) + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 ) node.query("RENAME TABLE s3_renamed TO s3_test") @@ -402,14 +411,15 @@ def test_table_manipulations(cluster, node_name): node.query("ATTACH TABLE s3_test") assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)" assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/"))) + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 ) node.query("TRUNCATE TABLE s3_test") assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(0)" assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/"))) == FILES_OVERHEAD + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) + == FILES_OVERHEAD ) @@ -434,7 +444,7 @@ def test_move_replace_partition_to_another_table(cluster, node_name): assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)" assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(16384)" assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/"))) + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 ) @@ -448,7 +458,7 @@ def test_move_replace_partition_to_another_table(cluster, node_name): assert node.query("SELECT count(*) FROM s3_clone FORMAT Values") == "(8192)" # Number of objects in S3 should be unchanged. assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/"))) + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) == FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4 ) @@ -462,7 +472,7 @@ def test_move_replace_partition_to_another_table(cluster, node_name): assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)" assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(16384)" assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/"))) + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) == FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 6 ) @@ -483,14 +493,14 @@ def test_move_replace_partition_to_another_table(cluster, node_name): assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(16384)" # Data should remain in S3 assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/"))) + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 ) node.query("ALTER TABLE s3_test FREEZE") # Number S3 objects should be unchanged. assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/"))) + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 ) @@ -499,7 +509,7 @@ def test_move_replace_partition_to_another_table(cluster, node_name): wait_for_delete_s3_objects(cluster, FILES_OVERHEAD_PER_PART_WIDE * 4) - for obj in list(minio.list_objects(cluster.minio_bucket, "data/")): + for obj in list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)): minio.remove_object(cluster.minio_bucket, obj.object_name) @@ -520,7 +530,7 @@ def test_freeze_unfreeze(cluster, node_name): node.query("TRUNCATE TABLE s3_test") assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/"))) + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 ) @@ -533,7 +543,8 @@ def test_freeze_unfreeze(cluster, node_name): # Data should be removed from S3. assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/"))) == FILES_OVERHEAD + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) + == FILES_OVERHEAD ) @@ -556,7 +567,7 @@ def test_freeze_system_unfreeze(cluster, node_name): node.query("TRUNCATE TABLE s3_test") node.query("DROP TABLE s3_test_removed NO DELAY") assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/"))) + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 ) @@ -565,7 +576,8 @@ def test_freeze_system_unfreeze(cluster, node_name): # Data should be removed from S3. assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/"))) == FILES_OVERHEAD + len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) + == FILES_OVERHEAD ) @@ -692,7 +704,7 @@ def test_lazy_seek_optimization_for_async_read(cluster, node_name): node.query("SELECT * FROM s3_test WHERE value LIKE '%abc%' ORDER BY value LIMIT 10") node.query("DROP TABLE IF EXISTS s3_test NO DELAY") minio = cluster.minio_client - for obj in list(minio.list_objects(cluster.minio_bucket, "data/")): + for obj in list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)): minio.remove_object(cluster.minio_bucket, obj.object_name) diff --git a/tests/integration/test_profile_events_s3/test.py b/tests/integration/test_profile_events_s3/test.py index aa578a1273a..18f1c5ee9ad 100644 --- a/tests/integration/test_profile_events_s3/test.py +++ b/tests/integration/test_profile_events_s3/test.py @@ -33,9 +33,10 @@ def cluster(): init_list = { "ReadBufferFromS3Bytes": 0, - "S3ReadMicroseconds": 0, "ReadBufferFromS3Microseconds": 0, "ReadBufferFromS3RequestsErrors": 0, + "WriteBufferFromS3Bytes": 0, + "S3ReadMicroseconds": 0, "S3ReadRequestsCount": 0, "S3ReadRequestsErrorsTotal": 0, "S3ReadRequestsErrors503": 0, @@ -45,14 +46,23 @@ init_list = { "S3WriteRequestsErrorsTotal": 0, "S3WriteRequestsErrors503": 0, "S3WriteRequestsRedirects": 0, - "WriteBufferFromS3Bytes": 0, + "DiskS3ReadMicroseconds": 0, + "DiskS3ReadRequestsCount": 0, + "DiskS3ReadRequestsErrorsTotal": 0, + "DiskS3ReadRequestsErrors503": 0, + "DiskS3ReadRequestsRedirects": 0, + "DiskS3WriteMicroseconds": 0, + "DiskS3WriteRequestsCount": 0, + "DiskS3WriteRequestsErrorsTotal": 0, + "DiskS3WriteRequestsErrors503": 0, + "DiskS3WriteRequestsRedirects": 0, } def get_s3_events(instance): result = init_list.copy() events = instance.query( - "SELECT event,value FROM system.events WHERE event LIKE '%S3%'" + "SELECT event, value FROM system.events WHERE event LIKE '%S3%'" ).split("\n") for event in events: ev = event.split("\t") @@ -75,20 +85,20 @@ def get_minio_stat(cluster): ) ).text.split("\n") for line in stat: - x = re.search("s3_requests_total(\{.*\})?\s(\d+)(\s.*)?", line) + x = re.search(r"s3_requests_total(\{.*\})?\s(\d+)(\s.*)?", line) if x != None: y = re.search('.*api="(get|list|head|select).*', x.group(1)) if y != None: result["get_requests"] += int(x.group(2)) else: result["set_requests"] += int(x.group(2)) - x = re.search("s3_errors_total(\{.*\})?\s(\d+)(\s.*)?", line) + x = re.search(r"s3_errors_total(\{.*\})?\s(\d+)(\s.*)?", line) if x != None: result["errors"] += int(x.group(2)) - x = re.search("s3_rx_bytes_total(\{.*\})?\s([\d\.e\+\-]+)(\s.*)?", line) + x = re.search(r"s3_rx_bytes_total(\{.*\})?\s([\d\.e\+\-]+)(\s.*)?", line) if x != None: result["tx_bytes"] += float(x.group(2)) - x = re.search("s3_tx_bytes_total(\{.*\})?\s([\d\.e\+\-]+)(\s.*)?", line) + x = re.search(r"s3_tx_bytes_total(\{.*\})?\s([\d\.e\+\-]+)(\s.*)?", line) if x != None: result["rx_bytes"] += float(x.group(2)) return result @@ -118,8 +128,10 @@ def get_query_stat(instance, hint): def get_minio_size(cluster): minio = cluster.minio_client size = 0 - for obj in minio.list_objects(cluster.minio_bucket, "data/"): - size += obj.size + for obj_level1 in minio.list_objects( + cluster.minio_bucket, prefix="data/", recursive=True + ): + size += obj_level1.size return size @@ -135,7 +147,7 @@ def test_profile_events(cluster): metrics0 = get_s3_events(instance) minio0 = get_minio_stat(cluster) - query1 = "CREATE TABLE test_s3.test_s3 (key UInt32, value UInt32) ENGINE=MergeTree PRIMARY KEY key ORDER BY key SETTINGS storage_policy='s3'" + query1 = "CREATE TABLE test_s3.test_s3 (key UInt32, value UInt32) ENGINE=MergeTree PRIMARY KEY key ORDER BY key SETTINGS storage_policy = 's3'" instance.query(query1) size1 = get_minio_size(cluster) @@ -157,7 +169,7 @@ def test_profile_events(cluster): metrics1["WriteBufferFromS3Bytes"] - metrics0["WriteBufferFromS3Bytes"] == size1 ) - query2 = "INSERT INTO test_s3.test_s3 FORMAT Values" + query2 = "INSERT INTO test_s3.test_s3 VALUES" instance.query(query2 + " (1,1)") size2 = get_minio_size(cluster) @@ -172,9 +184,12 @@ def test_profile_events(cluster): metrics2["S3WriteRequestsCount"] - metrics1["S3WriteRequestsCount"] == minio2["set_requests"] - minio1["set_requests"] ) + stat2 = get_query_stat(instance, query2) + for metric in stat2: assert stat2[metric] == metrics2[metric] - metrics1[metric] + assert ( metrics2["WriteBufferFromS3Bytes"] - metrics1["WriteBufferFromS3Bytes"] == size2 - size1 @@ -195,6 +210,7 @@ def test_profile_events(cluster): == minio3["set_requests"] - minio2["set_requests"] ) stat3 = get_query_stat(instance, query3) + # With async reads profile events are not updated fully because reads are done in a separate thread. # for metric in stat3: # print(metric) diff --git a/tests/integration/test_replicated_merge_tree_s3/test.py b/tests/integration/test_replicated_merge_tree_s3/test.py index 37027d07969..0d978bb6967 100644 --- a/tests/integration/test_replicated_merge_tree_s3/test.py +++ b/tests/integration/test_replicated_merge_tree_s3/test.py @@ -113,7 +113,7 @@ def drop_table(cluster): minio = cluster.minio_client # Remove extra objects to prevent tests cascade failing - for obj in list(minio.list_objects(cluster.minio_bucket, "data/")): + for obj in list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)): minio.remove_object(cluster.minio_bucket, obj.object_name) @@ -130,9 +130,9 @@ def test_insert_select_replicated(cluster, min_rows_for_wide_part, files_per_par insert(cluster, node_idxs=[1, 2, 3], verify=True) minio = cluster.minio_client - assert len(list(minio.list_objects(cluster.minio_bucket, "data/"))) == 3 * ( - FILES_OVERHEAD + files_per_part * 3 - ) + assert len( + list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)) + ) == 3 * (FILES_OVERHEAD + files_per_part * 3) def test_drop_cache_on_cluster(cluster): diff --git a/tests/integration/test_replicated_merge_tree_s3_zero_copy/test.py b/tests/integration/test_replicated_merge_tree_s3_zero_copy/test.py index 73b611ad169..60a1b9b9746 100644 --- a/tests/integration/test_replicated_merge_tree_s3_zero_copy/test.py +++ b/tests/integration/test_replicated_merge_tree_s3_zero_copy/test.py @@ -87,7 +87,7 @@ def drop_table(cluster): minio = cluster.minio_client # Remove extra objects to prevent tests cascade failing - for obj in list(minio.list_objects(cluster.minio_bucket, "data/")): + for obj in list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)): minio.remove_object(cluster.minio_bucket, obj.object_name) @@ -124,6 +124,6 @@ def test_insert_select_replicated(cluster, min_rows_for_wide_part, files_per_par ) minio = cluster.minio_client - assert len(list(minio.list_objects(cluster.minio_bucket, "data/"))) == ( - 3 * FILES_OVERHEAD - ) + (files_per_part * 3) + assert len( + list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)) + ) == (3 * FILES_OVERHEAD) + (files_per_part * 3) diff --git a/tests/integration/test_rocksdb_options/test.py b/tests/integration/test_rocksdb_options/test.py index a9e12eae4fd..c746d4d0042 100644 --- a/tests/integration/test_rocksdb_options/test.py +++ b/tests/integration/test_rocksdb_options/test.py @@ -42,6 +42,18 @@ def test_valid_options(start_cluster): DROP TABLE test; """ ) + node.query( + """ + CREATE TABLE test (key UInt64, value String) Engine=EmbeddedRocksDB(0, '/var/lib/clickhouse/store/test_rocksdb_read_only') PRIMARY KEY(key); + DROP TABLE test; + """ + ) + node.query( + """ + CREATE TABLE test (key UInt64, value String) Engine=EmbeddedRocksDB(10, '/var/lib/clickhouse/store/test_rocksdb_read_only', 1) PRIMARY KEY(key); + DROP TABLE test; + """ + ) def test_invalid_options(start_cluster): diff --git a/tests/queries/0_stateless/01594_too_low_memory_limits.reference b/tests/integration/test_rocksdb_read_only/__init__.py similarity index 100% rename from tests/queries/0_stateless/01594_too_low_memory_limits.reference rename to tests/integration/test_rocksdb_read_only/__init__.py diff --git a/tests/integration/test_rocksdb_read_only/configs/rocksdb.xml b/tests/integration/test_rocksdb_read_only/configs/rocksdb.xml new file mode 100644 index 00000000000..c0ac49576fc --- /dev/null +++ b/tests/integration/test_rocksdb_read_only/configs/rocksdb.xml @@ -0,0 +1,22 @@ + + + + + 8 + + + 2 + + + + test + + 10000 + + + 14 + +
+
+
+
diff --git a/tests/integration/test_rocksdb_read_only/test.py b/tests/integration/test_rocksdb_read_only/test.py new file mode 100644 index 00000000000..dcbfa417bff --- /dev/null +++ b/tests/integration/test_rocksdb_read_only/test.py @@ -0,0 +1,137 @@ +# pylint: disable=unused-argument +# pylint: disable=redefined-outer-name +# pylint: disable=line-too-long + +import pytest + +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node", main_configs=["configs/rocksdb.xml"], stay_alive=True +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_read_only(start_cluster): + # fail if read_only = true and directory does not exist. + with pytest.raises(QueryRuntimeException): + node.query( + """ + CREATE TABLE test (key UInt64, value String) Engine=EmbeddedRocksDB(0, '/var/lib/clickhouse/store/test_rocksdb_read_only', 1) PRIMARY KEY(key); + """ + ) + # create directory if read_only = false + node.query( + """ + CREATE TABLE test (key UInt64, value String) Engine=EmbeddedRocksDB(0, '/var/lib/clickhouse/store/test_rocksdb_read_only') PRIMARY KEY(key); + INSERT INTO test (key, value) VALUES (0, 'a'), (1, 'b'), (2, 'c'); + """ + ) + # fail if create multiple non-read-only tables on the same directory + with pytest.raises(QueryRuntimeException): + node.query( + """ + CREATE TABLE test_fail (key UInt64, value String) Engine=EmbeddedRocksDB(0, '/var/lib/clickhouse/store/test_rocksdb_read_only') PRIMARY KEY(key); + """ + ) + with pytest.raises(QueryRuntimeException): + node.query( + """ + CREATE TABLE test_fail (key UInt64, value String) Engine=EmbeddedRocksDB(10, '/var/lib/clickhouse/store/test_rocksdb_read_only') PRIMARY KEY(key); + """ + ) + # success if create multiple read-only tables on the same directory + node.query( + """ + CREATE TABLE test_1 (key UInt64, value String) Engine=EmbeddedRocksDB(0, '/var/lib/clickhouse/store/test_rocksdb_read_only', 1) PRIMARY KEY(key); + DROP TABLE test_1; + """ + ) + node.query( + """ + CREATE TABLE test_2 (key UInt64, value String) Engine=EmbeddedRocksDB(10, '/var/lib/clickhouse/store/test_rocksdb_read_only', 1) PRIMARY KEY(key); + DROP TABLE test_2; + """ + ) + # success if create table on existing directory with no other tables on it + node.query( + """ + DROP TABLE test; + CREATE TABLE test (key UInt64, value String) Engine=EmbeddedRocksDB(10, '/var/lib/clickhouse/store/test_rocksdb_read_only', 1) PRIMARY KEY(key); + """ + ) + result = node.query("""SELECT count() FROM test;""") + assert result.strip() == "3" + # fail if insert into table with read_only = true + with pytest.raises(QueryRuntimeException): + node.query( + """INSERT INTO test (key, value) VALUES (4, 'd'); + """ + ) + node.query( + """ + DROP TABLE test; + """ + ) + + +def test_dirctory_missing_after_stop(start_cluster): + # for read_only = false + node.query( + """ + CREATE TABLE test (key UInt64, value String) Engine=EmbeddedRocksDB(0, '/var/lib/clickhouse/store/test_rocksdb_read_only_missing') PRIMARY KEY(key); + """ + ) + node.stop_clickhouse() + node.exec_in_container( + [ + "bash", + "-c", + "rm -r /var/lib/clickhouse/store/test_rocksdb_read_only_missing", + ] + ) + node.start_clickhouse() + result = node.query( + """INSERT INTO test (key, value) VALUES (0, 'a'); + SELECT * FROM test; + """ + ) + assert result.strip() == "0\ta" + node.query( + """DROP TABLE test; + """ + ) + # for read_only = true + node.query( + """ + CREATE TABLE test (key UInt64, value String) Engine=EmbeddedRocksDB(0, '/var/lib/clickhouse/store/test_rocksdb_read_only_missing', 1) PRIMARY KEY(key); + """ + ) + node.stop_clickhouse() + node.exec_in_container( + [ + "bash", + "-c", + "rm -r /var/lib/clickhouse/store/test_rocksdb_read_only_missing", + ] + ) + node.start_clickhouse() + with pytest.raises(QueryRuntimeException): + node.query("""INSERT INTO test (key, value) VALUES (1, 'b');""") + result = node.query("""SELECT * FROM test;""") + assert result.strip() == "" + node.query( + """DROP TABLE test; + """ + ) diff --git a/tests/integration/test_s3_zero_copy_replication/test.py b/tests/integration/test_s3_zero_copy_replication/test.py index 7b7fb9d21ad..860b83d4ed1 100644 --- a/tests/integration/test_s3_zero_copy_replication/test.py +++ b/tests/integration/test_s3_zero_copy_replication/test.py @@ -39,7 +39,9 @@ def cluster(): def get_large_objects_count(cluster, size=100, folder="data"): minio = cluster.minio_client counter = 0 - for obj in minio.list_objects(cluster.minio_bucket, "{}/".format(folder)): + for obj in minio.list_objects( + cluster.minio_bucket, "{}/".format(folder), recursive=True + ): if obj.size is not None and obj.size >= size: counter = counter + 1 return counter diff --git a/tests/integration/test_storage_url/test.py b/tests/integration/test_storage_url/test.py index 6ffb38bd8d7..5591e63400c 100644 --- a/tests/integration/test_storage_url/test.py +++ b/tests/integration/test_storage_url/test.py @@ -1,31 +1,26 @@ import pytest - from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV -uuids = [] +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance( + "node1", main_configs=["configs/conf.xml"], with_nginx=True +) -@pytest.fixture(scope="module") -def cluster(): +@pytest.fixture(scope="module", autouse=True) +def setup_node(): try: - cluster = ClickHouseCluster(__file__) - cluster.add_instance( - "node1", main_configs=["configs/conf.xml"], with_nginx=True - ) cluster.start() - - yield cluster - + node1.query( + "insert into table function url(url1) partition by column3 values (1, 2, 3), (3, 2, 1), (1, 3, 2)" + ) + yield finally: cluster.shutdown() -def test_partition_by(cluster): - node1 = cluster.instances["node1"] - - node1.query( - f"insert into table function url(url1) partition by column3 values (1, 2, 3), (3, 2, 1), (1, 3, 2)" - ) +def test_partition_by(): result = node1.query( f"select * from url('http://nginx:80/test_1', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')" ) @@ -38,3 +33,45 @@ def test_partition_by(cluster): f"select * from url('http://nginx:80/test_3', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')" ) assert result.strip() == "1\t2\t3" + + +def test_table_function_url_access_rights(): + node1.query("CREATE USER OR REPLACE u1") + + expected_error = "necessary to have grant CREATE TEMPORARY TABLE, URL ON *.*" + assert expected_error in node1.query_and_get_error( + f"SELECT * FROM url('http://nginx:80/test_1', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')", + user="u1", + ) + + expected_error = "necessary to have grant CREATE TEMPORARY TABLE, URL ON *.*" + assert expected_error in node1.query_and_get_error( + f"SELECT * FROM url('http://nginx:80/test_1', 'TSV')", user="u1" + ) + + assert node1.query( + f"DESCRIBE TABLE url('http://nginx:80/test_1', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')", + user="u1", + ) == TSV([["column1", "UInt32"], ["column2", "UInt32"], ["column3", "UInt32"]]) + + assert node1.query( + f"DESCRIBE TABLE url('http://nginx:80/not-exist', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')", + user="u1", + ) == TSV([["column1", "UInt32"], ["column2", "UInt32"], ["column3", "UInt32"]]) + + expected_error = "necessary to have grant URL ON *.*" + assert expected_error in node1.query_and_get_error( + f"DESCRIBE TABLE url('http://nginx:80/test_1', 'TSV')", user="u1" + ) + + node1.query("GRANT URL ON *.* TO u1") + assert node1.query( + f"DESCRIBE TABLE url('http://nginx:80/test_1', 'TSV')", + user="u1", + ) == TSV( + [ + ["c1", "Nullable(Int64)"], + ["c2", "Nullable(Int64)"], + ["c3", "Nullable(Int64)"], + ] + ) diff --git a/tests/integration/test_ttl_move/test.py b/tests/integration/test_ttl_move/test.py index 49d7ab4f2fc..99978cbf6dc 100644 --- a/tests/integration/test_ttl_move/test.py +++ b/tests/integration/test_ttl_move/test.py @@ -1284,19 +1284,6 @@ def test_materialize_ttl_in_partition(started_cluster, name, engine): def test_alter_multiple_ttls(started_cluster, name, engine, positive): name = unique_table_name(name) - """Copyright 2019, Altinity LTD - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License.""" """Check that when multiple TTL expressions are set and before any parts are inserted the TTL expressions are changed with ALTER command then all old @@ -1664,16 +1651,6 @@ def test_double_move_while_select(started_cluster, name, positive): def test_alter_with_merge_work(started_cluster, name, engine, positive): name = unique_table_name(name) - """Copyright 2019, Altinity LTD -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License.""" """Check that TTL expressions are re-evaluated for existing parts after ALTER command changes TTL expressions and parts are merged. diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py index bcdb2d25912..cacd9ef0c78 100644 --- a/tests/integration/test_ttl_replicated/test.py +++ b/tests/integration/test_ttl_replicated/test.py @@ -342,16 +342,6 @@ def optimize_with_retry(node, table_name, retry=20): ], ) def test_ttl_alter_delete(started_cluster, name, engine): - """Copyright 2019, Altinity LTD - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License.""" """Check compatibility with old TTL delete expressions to make sure that: * alter modify of column's TTL delete expression works diff --git a/tests/performance/lz4.xml b/tests/performance/lz4.xml new file mode 100644 index 00000000000..c5cf2772121 --- /dev/null +++ b/tests/performance/lz4.xml @@ -0,0 +1,22 @@ + + create table t_lz4(a UInt64) engine=MergeTree order by tuple() + create table t_lz4_norm(a UInt64) engine=MergeTree order by tuple() + create table t_lz4_uncomp(a UInt32) engine=MergeTree order by a + + insert into t_lz4 select number % 100 from numbers_mt(5e7) order by rand() + optimize table t_lz4 final + + insert into t_lz4_norm select number from numbers_mt(5e7) order by rand() + optimize table t_lz4_norm final + + insert into t_lz4_uncomp select number from numbers_mt(5e7) + optimize table t_lz4_uncomp final + + select a from t_lz4 format Null + select a from t_lz4_norm format Null + select a from t_lz4_uncomp format Null + + drop table t_lz4 + drop table t_lz4_norm + drop table t_lz4_uncomp + diff --git a/tests/performance/lz4_hits_columns.xml b/tests/performance/lz4_hits_columns.xml new file mode 100644 index 00000000000..0b93e4bc4e8 --- /dev/null +++ b/tests/performance/lz4_hits_columns.xml @@ -0,0 +1,39 @@ + + + + column + + ClientIP + ClientTimeZone + CookieEnable + CounterClass + CounterID + EventDate + EventTime + GoodEvent + HitColor + JavaEnable + OpenerName + PageCharset + ParamCurrency + ParamPrice + Referer + RefererCategoryID + RefererHash + RegionID + SearchPhrase + SilverlightVersion4 + Title + TraficSourceID + URLCategoryID + UserAgent + UserAgentMinor + UserID + WatchID + WindowName + + + + + select {column} from hits_100m_single format Null + diff --git a/tests/performance/queries_over_aggregation.xml b/tests/performance/queries_over_aggregation.xml index 2a92ea26819..ceaed61c5bb 100644 --- a/tests/performance/queries_over_aggregation.xml +++ b/tests/performance/queries_over_aggregation.xml @@ -1,4 +1,8 @@ + select sipHash64(number) from numbers(1e7) group by number format Null + select * from (select * from numbers(1e7) group by number) group by number format Null + select * from (select * from numbers(1e7) group by number) order by number format Null + select * from (select * from numbers_mt(1e7) group by number) group by number format Null select * from (select * from numbers_mt(1e7) group by number) order by number format Null select * from (select * from numbers_mt(1e7) group by number) group by number format Null settings max_bytes_before_external_group_by = 1 diff --git a/tests/queries/0_stateless/00284_external_aggregation.sql b/tests/queries/0_stateless/00284_external_aggregation.sql index a42dd91b6a5..d19f9f5aee8 100644 --- a/tests/queries/0_stateless/00284_external_aggregation.sql +++ b/tests/queries/0_stateless/00284_external_aggregation.sql @@ -8,6 +8,7 @@ SET group_by_two_level_threshold_bytes = 50000000; SELECT sum(k), sum(c) FROM (SELECT number AS k, count() AS c FROM (SELECT * FROM system.numbers LIMIT 10000000) GROUP BY k); SELECT sum(k), sum(c), max(u) FROM (SELECT number AS k, count() AS c, uniqArray(range(number % 16)) AS u FROM (SELECT * FROM system.numbers LIMIT 1000000) GROUP BY k); +SET max_memory_usage = 0; SET group_by_two_level_threshold = 100000; SET max_bytes_before_external_group_by = '1Mi'; diff --git a/tests/queries/0_stateless/01064_incremental_streaming_from_2_src_with_feedback.sql b/tests/queries/0_stateless/01064_incremental_streaming_from_2_src_with_feedback.sql index 0bc5fcd1db8..9a439180265 100644 --- a/tests/queries/0_stateless/01064_incremental_streaming_from_2_src_with_feedback.sql +++ b/tests/queries/0_stateless/01064_incremental_streaming_from_2_src_with_feedback.sql @@ -1,4 +1,5 @@ SET joined_subquery_requires_alias = 0; +SET max_threads = 1; -- incremental streaming usecase -- that has sense only if data filling order has guarantees of chronological order diff --git a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh index 06d6ef6a94b..26c2bf133ac 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh @@ -111,5 +111,9 @@ for i in $(seq $REPLICAS); do $CLICKHOUSE_CLIENT --query "SELECT * FROM system.mutations WHERE is_done = 0 and table = 'concurrent_alter_add_drop_$i'" $CLICKHOUSE_CLIENT --query "SELECT COUNT() FROM system.replication_queue WHERE table = 'concurrent_alter_add_drop_$i'" $CLICKHOUSE_CLIENT --query "SELECT * FROM system.replication_queue WHERE table = 'concurrent_alter_add_drop_$i' and (type = 'ALTER_METADATA' or type = 'MUTATE_PART')" + + $CLICKHOUSE_CLIENT --query "DETACH TABLE concurrent_alter_add_drop_$i" + $CLICKHOUSE_CLIENT --query "ATTACH TABLE concurrent_alter_add_drop_$i" + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS concurrent_alter_add_drop_$i" done diff --git a/tests/queries/0_stateless/01091_num_threads.sql b/tests/queries/0_stateless/01091_num_threads.sql index faeceb0e6d6..0d2a66a8c2e 100644 --- a/tests/queries/0_stateless/01091_num_threads.sql +++ b/tests/queries/0_stateless/01091_num_threads.sql @@ -28,7 +28,7 @@ WITH ORDER BY event_time DESC LIMIT 1 ) AS id -SELECT uniqExact(thread_id) +SELECT uniqExact(thread_id) > 2 FROM system.query_thread_log WHERE (event_date >= (today() - 1)) AND (query_id = id) AND (thread_id != master_thread_id); diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh index 2f4164ee0d1..b48958a18f6 100755 --- a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh +++ b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-replicated-database, no-parallel, no-fasttest, no-tsan, no-asan, no-random-settings, no-s3-storage +# Tags: no-replicated-database, no-parallel, no-fasttest, no-tsan, no-asan, no-random-settings, no-s3-storage, no-msan # Tag no-fasttest: max_memory_usage_for_user can interfere another queries running concurrently # Regression for MemoryTracker that had been incorrectly accounted @@ -32,7 +32,7 @@ function execute_group_by() # max_memory_usage_for_user is installed to 0 once there are no more # queries for user. local opts=( - "--max_memory_usage_for_user="$((150<<20)) + "--max_memory_usage_for_user="$((200<<20)) "--max_threads=2" ) execute_null "${opts[@]}" <<<'SELECT uniq(number) FROM numbers_mt(1e6) GROUP BY number % 5e5' diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference index a3f2106cd5f..540137d4887 100644 --- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference +++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference @@ -6,4 +6,4 @@ 2020-01-01 00:00:00 2 1 499999 -5 +18 diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql index a5423d1a3ff..23678c1abd9 100644 --- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql +++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql @@ -1,7 +1,7 @@ DROP TABLE IF EXISTS select_final; SET do_not_merge_across_partitions_select_final = 1; -SET max_threads = 0; +SET max_threads = 16; CREATE TABLE select_final (t DateTime, x Int32, string String) ENGINE = ReplacingMergeTree() PARTITION BY toYYYYMM(t) ORDER BY (x, t); diff --git a/tests/queries/0_stateless/01594_too_low_memory_limits.config.xml b/tests/queries/0_stateless/01594_too_low_memory_limits.config.xml deleted file mode 100644 index 0c286bfbd21..00000000000 --- a/tests/queries/0_stateless/01594_too_low_memory_limits.config.xml +++ /dev/null @@ -1,35 +0,0 @@ - - - - trace - true - - - 9000 - - ./ - - 0 - - - - - - - ::/0 - - - default - default - 1 - - - - - - - - - - - diff --git a/tests/queries/0_stateless/01594_too_low_memory_limits.sh b/tests/queries/0_stateless/01594_too_low_memory_limits.sh deleted file mode 100755 index b513a947bd9..00000000000 --- a/tests/queries/0_stateless/01594_too_low_memory_limits.sh +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-parallel, no-fasttest -# Tag no-tsan: requires jemalloc to track small allocations -# Tag no-asan: requires jemalloc to track small allocations -# Tag no-ubsan: requires jemalloc to track small allocations -# Tag no-msan: requires jemalloc to track small allocations - -# -# Regression for INSERT SELECT, that abnormally terminates the server -# in case of too small memory limits. -# -# NOTE: After #24483 had been merged the only place where the allocation may -# fail is the insert into PODArray in DB::OwnSplitChannel::log, but after -# #24069 those errors will be ignored, so to check new behaviour separate -# server is required. -# - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -server_opts=( - "--config-file=$CURDIR/$(basename "${BASH_SOURCE[0]}" .sh).config.xml" - "--" - # to avoid multiple listen sockets (complexity for port discovering) - "--listen_host=127.1" - # we will discover the real port later. - "--tcp_port=0" - "--shutdown_wait_unfinished=0" -) -CLICKHOUSE_WATCHDOG_ENABLE=0 $CLICKHOUSE_SERVER_BINARY "${server_opts[@]}" >clickhouse-server.log 2>clickhouse-server.stderr & -server_pid=$! - -trap cleanup EXIT -function cleanup() -{ - kill -9 $server_pid - - echo "Test failed. Server log:" - cat clickhouse-server.log - cat clickhouse-server.stderr - rm -f clickhouse-server.log - rm -f clickhouse-server.stderr - - exit 1 -} - -server_port= -i=0 retries=300 -# wait until server will start to listen (max 30 seconds) -while [[ -z $server_port ]] && [[ $i -lt $retries ]]; do - server_port=$(lsof -n -a -P -i tcp -s tcp:LISTEN -p $server_pid 2>/dev/null | awk -F'[ :]' '/LISTEN/ { print $(NF-1) }') - ((++i)) - sleep 0.1 - if ! kill -0 $server_pid >& /dev/null; then - echo "No server (pid $server_pid)" - break - fi -done -if [[ -z $server_port ]]; then - echo "Cannot wait for LISTEN socket" >&2 - exit 1 -fi - -# wait for the server to start accepting tcp connections (max 30 seconds) -i=0 retries=300 -while ! $CLICKHOUSE_CLIENT_BINARY --host 127.1 --port "$server_port" --format Null -q 'select 1' 2>/dev/null && [[ $i -lt $retries ]]; do - sleep 0.1 - if ! kill -0 $server_pid >& /dev/null; then - echo "No server (pid $server_pid)" - break - fi -done -if ! $CLICKHOUSE_CLIENT_BINARY --host 127.1 --port "$server_port" --format Null -q 'select 1'; then - echo "Cannot wait until server will start accepting connections on " >&2 - exit 1 -fi - -# it is not mandatory to use existing table since it fails earlier, hence just a placeholder. -# this is format of INSERT SELECT, that pass these settings exactly for INSERT query not the SELECT -if $CLICKHOUSE_CLIENT_BINARY --host 127.1 --port "$server_port" --format Null --send_logs_level=warning --max_memory_usage=1 --max_untracked_memory=1 -q 'insert into placeholder_table_name select * from numbers_mt(65535)' >& /dev/null; then - echo "INSERT SELECT should fail" >&2 - exit 1 -fi - -# no sleep, since flushing to stderr should not be buffered. -if ! grep -E -q 'Cannot add message to the log: Code: 60.*placeholder_table_name' clickhouse-server.stderr; then - echo "Adding message to the log should fail" >&2 - exit 1 -fi - -# check that server is still alive -$CLICKHOUSE_CLIENT_BINARY --host 127.1 --port "$server_port" --format Null -q 'SELECT 1' - -# send TERM and save the error code to ensure that it is 0 (EXIT_SUCCESS) -kill $server_pid -wait $server_pid -return_code=$? - -trap '' EXIT -if [ $return_code != 0 ]; then - cat clickhouse-server.log - cat clickhouse-server.stderr -fi -rm -f clickhouse-server.log -rm -f clickhouse-server.stderr - -exit $return_code diff --git a/tests/queries/0_stateless/01671_aggregate_function_group_bitmap_data.reference b/tests/queries/0_stateless/01671_aggregate_function_group_bitmap_data.reference index 161f4a6372f..8c3288df670 100644 --- a/tests/queries/0_stateless/01671_aggregate_function_group_bitmap_data.reference +++ b/tests/queries/0_stateless/01671_aggregate_function_group_bitmap_data.reference @@ -1,27 +1,12 @@ 1 50 50 1 0 49 1 50 50 1 0 49 1 50 50 1 0 49 -1 50 51 0 1 51 -1 50 50 1 0 49 -1 50 51 0 1 51 -1 50 50 1 0 49 -1 50 50 1 0 49 -1 50 50 1 0 49 1 50 50 1 0 49 1 50 50 1 0 49 -1 50 51 0 1 51 -1 50 50 1 0 49 -1 50 51 0 1 51 -1 50 50 1 0 49 -1 50 51 0 1 51 -1 50 50 1 0 49 -1 50 51 0 1 51 1 50 50 1 0 49 1 50 50 1 0 49 1 50 50 1 0 49 -1 50 51 0 1 51 1 50 50 1 0 49 -1 50 51 0 1 51 1 50 50 1 0 49 1 50 50 1 0 49 1 50 50 1 0 49 @@ -29,32 +14,47 @@ 1 50 50 1 0 49 1 50 50 1 0 49 1 50 50 1 0 49 -1 50 51 0 1 51 +1 50 50 1 0 49 +1 50 50 1 0 49 +1 50 50 1 0 49 +1 50 50 1 0 49 +1 50 50 1 0 49 +1 50 50 1 0 49 +1 50 50 1 0 49 +1 50 50 1 0 49 +1 50 50 1 0 49 +1 50 50 1 0 49 +1 50 50 1 0 49 +1 50 50 1 0 49 +1 50 50 1 0 49 +1 50 50 1 0 49 +1 50 50 1 0 49 +1 50 50 1 0 49 +1 50 50 1 0 49 +1 50 50 1 0 49 +1 50 50 1 0 49 +1 50 50 1 0 49 +1 50 50 1 0 49 +1 50 50 1 0 49 +1 50 50 1 0 49 1 50 50 1 0 49 1 50 51 0 1 51 -1 50 50 1 0 49 1 50 51 0 1 51 1 50 51 0 1 51 -1 50 50 1 0 49 -1 50 50 1 0 49 -1 50 50 1 0 49 -1 50 51 0 1 51 -1 50 50 1 0 49 -1 50 50 1 0 49 1 50 51 0 1 51 1 50 51 0 1 51 -1 50 50 1 0 49 -1 50 51 0 1 51 -1 50 50 1 0 49 -1 50 50 1 0 49 -1 50 50 1 0 49 -1 50 50 1 0 49 1 50 51 0 1 51 1 50 51 0 1 51 -1 50 50 1 0 49 -1 50 50 1 0 49 1 50 51 0 1 51 -1 50 50 1 0 49 -1 50 50 1 0 49 -1 50 50 1 0 49 +1 50 51 0 1 51 +1 50 51 0 1 51 +1 50 51 0 1 51 +1 50 51 0 1 51 +1 50 51 0 1 51 +1 50 51 0 1 51 +1 50 51 0 1 51 +1 50 51 0 1 51 +1 50 51 0 1 51 +1 50 51 0 1 51 +1 50 51 0 1 51 1 50 51 0 1 51 diff --git a/tests/queries/0_stateless/01671_aggregate_function_group_bitmap_data.sql b/tests/queries/0_stateless/01671_aggregate_function_group_bitmap_data.sql index d70665655ca..a04f40058fd 100644 --- a/tests/queries/0_stateless/01671_aggregate_function_group_bitmap_data.sql +++ b/tests/queries/0_stateless/01671_aggregate_function_group_bitmap_data.sql @@ -52,6 +52,7 @@ ALL LEFT JOIN FROM group_bitmap_data_test WHERE pickup_date = '2019-01-01' GROUP BY city_id -) AS js2 USING (city_id); +) AS js2 USING (city_id) +ORDER BY today_users, before_users, ll_users, old_users, new_users, diff_users; DROP TABLE IF EXISTS group_bitmap_data_test; diff --git a/tests/queries/0_stateless/01710_aggregate_projection_with_grouping_set.reference b/tests/queries/0_stateless/01710_aggregate_projection_with_grouping_set.reference new file mode 100644 index 00000000000..b233507ce6d --- /dev/null +++ b/tests/queries/0_stateless/01710_aggregate_projection_with_grouping_set.reference @@ -0,0 +1,28 @@ +a 2 +a x 1 +a y 1 +b 2 +b x 1 +b y 1 + 4 +a 2 +a x 1 +a y 1 +b 2 +b x 1 +b y 1 + 4 + x 2 + y 2 +a 2 +a x 1 +a y 1 +b 2 +b x 1 +b y 1 +a x 1 +a y 1 +b x 1 +b y 1 + + 4 diff --git a/tests/queries/0_stateless/01710_aggregate_projection_with_grouping_set.sql b/tests/queries/0_stateless/01710_aggregate_projection_with_grouping_set.sql new file mode 100644 index 00000000000..652ce786b5d --- /dev/null +++ b/tests/queries/0_stateless/01710_aggregate_projection_with_grouping_set.sql @@ -0,0 +1,15 @@ +drop table if exists test; + +create table test(dim1 String, dim2 String, projection p1 (select dim1, dim2, count() group by dim1, dim2)) engine MergeTree order by dim1; + +insert into test values ('a', 'x') ('a', 'y') ('b', 'x') ('b', 'y'); + +select dim1, dim2, count() from test group by grouping sets ((dim1, dim2), dim1) order by dim1, dim2, count(); + +select dim1, dim2, count() from test group by dim1, dim2 with rollup order by dim1, dim2, count(); + +select dim1, dim2, count() from test group by dim1, dim2 with cube order by dim1, dim2, count(); + +select dim1, dim2, count() from test group by dim1, dim2 with totals order by dim1, dim2, count(); + +drop table test; diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql index 62b578c21d6..3d6a25fe799 100644 --- a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql +++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql @@ -12,7 +12,7 @@ select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by n -- and the query with GROUP BY on remote servers will first do GROUP BY and then send the block, -- so the initiator will first receive all blocks from remotes and only after start merging, -- and will hit the memory limit. -select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='100Mi'; -- { serverError 241 } +select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='100Mi', max_block_size=1e12; -- { serverError 241 } -- with optimize_aggregation_in_order=1 remote servers will produce blocks more frequently, -- since they don't need to wait until the aggregation will be finished, diff --git a/tests/queries/0_stateless/01798_uniq_theta_union_intersect_not.reference b/tests/queries/0_stateless/01798_uniq_theta_union_intersect_not.reference new file mode 100644 index 00000000000..ff12af23d43 --- /dev/null +++ b/tests/queries/0_stateless/01798_uniq_theta_union_intersect_not.reference @@ -0,0 +1,18 @@ +uniqTheta union test +0 0 0 +4 2 3 +4 3 2 +uniqTheta intersect test +0 0 0 +1 2 3 +1 3 2 +uniqTheta union test +0 0 0 +1 2 3 +2 3 2 +uniqTheta retention test +4 9 4 +uniqTheta retention with AggregatingMergeTree test +0.5 2 4 +uniqTheta retention with MergeTree test +0.5 2 4 diff --git a/tests/queries/0_stateless/01798_uniq_theta_union_intersect_not.sql b/tests/queries/0_stateless/01798_uniq_theta_union_intersect_not.sql new file mode 100644 index 00000000000..ccaf6ca6c6c --- /dev/null +++ b/tests/queries/0_stateless/01798_uniq_theta_union_intersect_not.sql @@ -0,0 +1,90 @@ +-- Tags: no-fasttest + +SELECT 'uniqTheta union test'; + +select finalizeAggregation(uniqThetaUnion(a, b)), finalizeAggregation(a), finalizeAggregation(b) from (select arrayReduce('uniqThetaState',[]) as a, arrayReduce('uniqThetaState',[]) as b ); + +select finalizeAggregation(uniqThetaUnion(a, b)), finalizeAggregation(a), finalizeAggregation(b) from (select arrayReduce('uniqThetaState',[1,2]) as a, arrayReduce('uniqThetaState',[2,3,4]) as b ); + +select finalizeAggregation(uniqThetaUnion(a, b)), finalizeAggregation(a), finalizeAggregation(b) from (select arrayReduce('uniqThetaState',[2,3,4]) as a, arrayReduce('uniqThetaState',[1,2]) as b ); + +SELECT 'uniqTheta intersect test'; + +select finalizeAggregation(uniqThetaIntersect(a, b)), finalizeAggregation(a), finalizeAggregation(b) from (select arrayReduce('uniqThetaState',[]) as a, arrayReduce('uniqThetaState',[]) as b ); + +select finalizeAggregation(uniqThetaIntersect(a, b)), finalizeAggregation(a), finalizeAggregation(b) from (select arrayReduce('uniqThetaState',[1,2]) as a, arrayReduce('uniqThetaState',[2,3,4]) as b ); + +select finalizeAggregation(uniqThetaIntersect(a, b)), finalizeAggregation(a), finalizeAggregation(b) from (select arrayReduce('uniqThetaState',[2,3,4]) as a, arrayReduce('uniqThetaState',[1,2]) as b ); + +SELECT 'uniqTheta union test'; + +select finalizeAggregation(uniqThetaNot(a, b)), finalizeAggregation(a), finalizeAggregation(b) from (select arrayReduce('uniqThetaState',[]) as a, arrayReduce('uniqThetaState',[]) as b ); + +select finalizeAggregation(uniqThetaNot(a, b)), finalizeAggregation(a), finalizeAggregation(b) from (select arrayReduce('uniqThetaState',[1,2]) as a, arrayReduce('uniqThetaState',[2,3,4]) as b ); + +select finalizeAggregation(uniqThetaNot(a, b)), finalizeAggregation(a), finalizeAggregation(b) from (select arrayReduce('uniqThetaState',[2,3,4]) as a, arrayReduce('uniqThetaState',[1,2]) as b ); + +SELECT 'uniqTheta retention test'; + +select finalizeAggregation(uniqThetaIntersect(a,b)), finalizeAggregation(a),finalizeAggregation(b) from +( +select (uniqThetaStateIf(number, number>0)) as a, (uniqThetaStateIf(number, number>5)) as b +from +(select number FROM system.numbers LIMIT 10) +); + +SELECT 'uniqTheta retention with AggregatingMergeTree test'; +DROP TABLE IF EXISTS test1; + +CREATE TABLE test1 +( + `year` String , + `uv` AggregateFunction(uniqTheta, Int64) +) +ENGINE = AggregatingMergeTree() +ORDER BY (year); + +INSERT INTO TABLE test1(year, uv) select '2021',uniqThetaState(toInt64(1)); +INSERT INTO TABLE test1(year, uv) select '2021',uniqThetaState(toInt64(2)); +INSERT INTO TABLE test1(year, uv) select '2021',uniqThetaState(toInt64(3)); +INSERT INTO TABLE test1(year, uv) select '2021',uniqThetaState(toInt64(4)); +INSERT INTO TABLE test1(year, uv) select '2022',uniqThetaState(toInt64(1)); +INSERT INTO TABLE test1(year, uv) select '2022',uniqThetaState(toInt64(3)); + +select finalizeAggregation(uniqThetaIntersect(uv2021,uv2022))/finalizeAggregation(uv2021),finalizeAggregation(uniqThetaIntersect(uv2021,uv2022)),finalizeAggregation(uv2021) +from +( +select uniqThetaMergeStateIf(uv,year='2021') as uv2021, uniqThetaMergeStateIf(uv,year='2022') as uv2022 +from test1 +); + +DROP TABLE IF EXISTS test1; + +SELECT 'uniqTheta retention with MergeTree test'; +DROP TABLE IF EXISTS test2; + +CREATE TABLE test2 +( + `year` String , + `uv` Int64 +) +ENGINE = MergeTree() +ORDER BY (year); + +INSERT INTO TABLE test2(year, uv) select '2021',1; +INSERT INTO TABLE test2(year, uv) select '2021',2; +INSERT INTO TABLE test2(year, uv) select '2021',3; +INSERT INTO TABLE test2(year, uv) select '2021',4; +INSERT INTO TABLE test2(year, uv) select '2022',1; +INSERT INTO TABLE test2(year, uv) select '2022',3; + +select finalizeAggregation(uniqThetaIntersect(uv2021,uv2022))/finalizeAggregation(uv2021),finalizeAggregation(uniqThetaIntersect(uv2021,uv2022)),finalizeAggregation(uv2021) +from +( +select uniqThetaStateIf(uv,year='2021') as uv2021, uniqThetaStateIf(uv,year='2022') as uv2022 +from test2 +); + + + +DROP TABLE IF EXISTS test2; diff --git a/tests/integration/02044_exists_operator.reference b/tests/queries/0_stateless/02044_exists_operator.reference similarity index 100% rename from tests/integration/02044_exists_operator.reference rename to tests/queries/0_stateless/02044_exists_operator.reference diff --git a/tests/integration/02044_exists_operator.sql b/tests/queries/0_stateless/02044_exists_operator.sql similarity index 100% rename from tests/integration/02044_exists_operator.sql rename to tests/queries/0_stateless/02044_exists_operator.sql diff --git a/tests/queries/0_stateless/02277_full_sort_join_misc.sql b/tests/queries/0_stateless/02277_full_sort_join_misc.sql index b4e3882edaf..4297f532b98 100644 --- a/tests/queries/0_stateless/02277_full_sort_join_misc.sql +++ b/tests/queries/0_stateless/02277_full_sort_join_misc.sql @@ -1,24 +1,24 @@ SET join_algorithm = 'full_sorting_merge'; -SELECT * FROM (SELECT 1 as key) AS t1 JOIN (SELECT 1 as key) t2 ON t1.key = t2.key; +SELECT * FROM (SELECT 1 as key) AS t1 JOIN (SELECT 1 as key) t2 ON t1.key = t2.key ORDER BY key; -SELECT * FROM (SELECT 1 as key) AS t1 JOIN (SELECT 1 as key) t2 USING key; +SELECT * FROM (SELECT 1 as key) AS t1 JOIN (SELECT 1 as key) t2 USING key ORDER BY key; -SELECT * FROM (SELECT 1 :: UInt32 as key) AS t1 FULL JOIN (SELECT 1 :: Nullable(UInt32) as key) t2 USING (key); +SELECT * FROM (SELECT 1 :: UInt32 as key) AS t1 FULL JOIN (SELECT 1 :: Nullable(UInt32) as key) t2 USING (key) ORDER BY key; -SELECT * FROM (SELECT 1 :: UInt32 as key) AS t1 FULL JOIN (SELECT NULL :: Nullable(UInt32) as key) t2 USING (key); +SELECT * FROM (SELECT 1 :: UInt32 as key) AS t1 FULL JOIN (SELECT NULL :: Nullable(UInt32) as key) t2 USING (key) ORDER BY key; -SELECT * FROM (SELECT 1 :: Int32 as key) AS t1 JOIN (SELECT 1 :: UInt32 as key) t2 ON t1.key = t2.key; +SELECT * FROM (SELECT 1 :: Int32 as key) AS t1 JOIN (SELECT 1 :: UInt32 as key) t2 ON t1.key = t2.key ORDER BY key; -SELECT * FROM (SELECT -1 :: Nullable(Int32) as key) AS t1 FULL JOIN (SELECT 4294967295 :: UInt32 as key) t2 ON t1.key = t2.key; +SELECT * FROM (SELECT -1 :: Nullable(Int32) as key) AS t1 FULL JOIN (SELECT 4294967295 :: UInt32 as key) t2 ON t1.key = t2.key ORDER BY key; -SELECT * FROM (SELECT 'a' :: LowCardinality(String) AS key) AS t1 JOIN (SELECT 'a' :: String AS key) AS t2 ON t1.key = t2.key; +SELECT * FROM (SELECT 'a' :: LowCardinality(String) AS key) AS t1 JOIN (SELECT 'a' :: String AS key) AS t2 ON t1.key = t2.key ORDER BY key; -SELECT * FROM (SELECT 'a' :: LowCardinality(Nullable(String)) AS key) AS t1 JOIN (SELECT 'a' :: String AS key) AS t2 ON t1.key = t2.key; +SELECT * FROM (SELECT 'a' :: LowCardinality(Nullable(String)) AS key) AS t1 JOIN (SELECT 'a' :: String AS key) AS t2 ON t1.key = t2.key ORDER BY key; -SELECT * FROM (SELECT 'a' :: LowCardinality(Nullable(String)) AS key) AS t1 JOIN (SELECT 'a' :: Nullable(String) AS key) AS t2 ON t1.key = t2.key; +SELECT * FROM (SELECT 'a' :: LowCardinality(Nullable(String)) AS key) AS t1 JOIN (SELECT 'a' :: Nullable(String) AS key) AS t2 ON t1.key = t2.key ORDER BY key; -SELECT * FROM (SELECT 'a' :: LowCardinality(String) AS key) AS t1 JOIN (SELECT 'a' :: LowCardinality(String) AS key) AS t2 ON t1.key = t2.key; +SELECT * FROM (SELECT 'a' :: LowCardinality(String) AS key) AS t1 JOIN (SELECT 'a' :: LowCardinality(String) AS key) AS t2 ON t1.key = t2.key ORDER BY key; -SELECT 5 == count() FROM (SELECT number as a from numbers(5)) as t1 LEFT JOIN (SELECT number as b from numbers(5) WHERE number > 100) as t2 ON t1.a = t2.b; -SELECT 5 == count() FROM (SELECT number as a from numbers(5) WHERE number > 100) as t1 RIGHT JOIN (SELECT number as b from numbers(5)) as t2 ON t1.a = t2.b; +SELECT 5 == count() FROM (SELECT number as a from numbers(5)) as t1 LEFT JOIN (SELECT number as b from numbers(5) WHERE number > 100) as t2 ON t1.a = t2.b ORDER BY 1; +SELECT 5 == count() FROM (SELECT number as a from numbers(5) WHERE number > 100) as t1 RIGHT JOIN (SELECT number as b from numbers(5)) as t2 ON t1.a = t2.b ORDER BY 1; diff --git a/tests/queries/0_stateless/02343_aggregation_pipeline.reference b/tests/queries/0_stateless/02343_aggregation_pipeline.reference index 67bd9c414ba..ec9a394d05d 100644 --- a/tests/queries/0_stateless/02343_aggregation_pipeline.reference +++ b/tests/queries/0_stateless/02343_aggregation_pipeline.reference @@ -1,5 +1,22 @@ -- { echoOn } +explain pipeline select * from (select * from numbers(1e8) group by number) group by number; +(Expression) +ExpressionTransform × 16 + (Aggregating) + Resize 16 → 16 + AggregatingTransform × 16 + StrictResize 16 → 16 + (Expression) + ExpressionTransform × 16 + (Aggregating) + Resize 1 → 16 + AggregatingTransform + (Expression) + ExpressionTransform + (ReadFromStorage) + Limit + Numbers 0 → 1 explain pipeline select * from (select * from numbers_mt(1e8) group by number) group by number; (Expression) ExpressionTransform × 16 diff --git a/tests/queries/0_stateless/02343_aggregation_pipeline.sql b/tests/queries/0_stateless/02343_aggregation_pipeline.sql index d259889b042..85e9fd1be1e 100644 --- a/tests/queries/0_stateless/02343_aggregation_pipeline.sql +++ b/tests/queries/0_stateless/02343_aggregation_pipeline.sql @@ -1,9 +1,12 @@ set max_threads = 16; set prefer_localhost_replica = 1; set optimize_aggregation_in_order = 0; +set max_block_size = 65505; -- { echoOn } +explain pipeline select * from (select * from numbers(1e8) group by number) group by number; + explain pipeline select * from (select * from numbers_mt(1e8) group by number) group by number; explain pipeline select * from (select * from numbers_mt(1e8) group by number) order by number; diff --git a/tests/queries/0_stateless/02355_control_block_size_in_aggregator.reference b/tests/queries/0_stateless/02355_control_block_size_in_aggregator.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02355_control_block_size_in_aggregator.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02355_control_block_size_in_aggregator.sql b/tests/queries/0_stateless/02355_control_block_size_in_aggregator.sql new file mode 100644 index 00000000000..b4754c6d6fe --- /dev/null +++ b/tests/queries/0_stateless/02355_control_block_size_in_aggregator.sql @@ -0,0 +1,9 @@ +SET max_block_size = 4213; + +SELECT DISTINCT (blockSize() <= 4213) +FROM +( + SELECT number + FROM numbers(100000) + GROUP BY number +); diff --git a/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.reference b/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.reference index f46cdb6e5e3..1da5cd0b7b3 100644 --- a/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.reference +++ b/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.reference @@ -7,3 +7,10 @@ UInt64 String DateTime Map(UUID, Array(Float32)) 13 str 2022-08-04 18:30:53 {'10':[11,12],'13':[14,15]} 1 1 +_CAST(42, \'Int64\') Int64 +_CAST([1, 2, 3], \'Array(UInt8)\') Array(UInt8) +_CAST(((\'abc\', 22), (\'def\', 33)), \'Map(String, UInt8)\') Map(String, UInt8) +_CAST([[4, 5, 6], [7], [8, 9]], \'Array(Array(UInt8))\') Array(Array(UInt8)) +_CAST(((10, [11, 12]), (13, [14, 15])), \'Map(UInt8, Array(UInt8))\') Map(UInt8, Array(UInt8)) +_CAST(((\'ghj\', ((\'klm\', [16, 17]))), (\'nop\', ((\'rst\', [18])))), \'Map(String, Map(String, Array(UInt8)))\') Map(String, Map(String, Array(UInt8))) +a Int8 diff --git a/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.sh b/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.sh index 335af1bb6e6..e61dc337d2a 100755 --- a/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.sh +++ b/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.sh @@ -68,13 +68,27 @@ $CLICKHOUSE_CLIENT -n -q "select {n: UInt8} -- { serverError 456 }" $CLICKHOUSE_CLIENT -n -q "set param_n = 12; set param_n = 13; select {n: UInt8}" -# but multiple different parameters could be defined within each session +# multiple different parameters could be defined within each session $CLICKHOUSE_CLIENT -n -q " set param_a = 13, param_b = 'str'; set param_c = '2022-08-04 18:30:53'; set param_d = '{\'10\': [11, 12], \'13\': [14, 15]}'; select {a: UInt32}, {b: String}, {c: DateTime}, {d: Map(String, Array(UInt8))}" + # empty parameter name is not allowed $CLICKHOUSE_CLIENT --param_="" -q "select 1" 2>&1 | grep -c 'Code: 36' $CLICKHOUSE_CLIENT -q "set param_ = ''" 2>&1 | grep -c 'Code: 36' + + +# parameters are also supported for DESCRIBE TABLE queries +$CLICKHOUSE_CLIENT \ + --param_id="42" \ + --param_arr="[1, 2, 3]" \ + --param_map="{'abc': 22, 'def': 33}" \ + --param_mul_arr="[[4, 5, 6], [7], [8, 9]]" \ + --param_map_arr="{10: [11, 12], 13: [14, 15]}" \ + --param_map_map_arr="{'ghj': {'klm': [16, 17]}, 'nop': {'rst': [18]}}" \ + -q "describe table(select {id: Int64}, {arr: Array(UInt8)}, {map: Map(String, UInt8)}, {mul_arr: Array(Array(UInt8))}, {map_arr: Map(UInt8, Array(UInt8))}, {map_map_arr: Map(String, Map(String, Array(UInt8)))})" + +$CLICKHOUSE_CLIENT --param_p=42 -q "describe table (select * from (select {p:Int8} as a group by a) order by a)" diff --git a/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.reference b/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.reference index c32f227006c..1ad64150049 100644 --- a/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.reference +++ b/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.reference @@ -1,8 +1,8 @@ -- EXPLAIN PLAN sorting for MergeTree w/o sorting key -- QUERY: set optimize_read_in_order=1;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a -Sorting (Stream): a ASC +Sorting (Global): a ASC Sorting (Sorting for ORDER BY) -Sorting (Stream): a ASC +Sorting (Global): a ASC Sorting (None) Sorting (None) -- disable optimization -> sorting order is NOT propagated from subquery -> full sort @@ -20,22 +20,22 @@ LimitsCheckingTransform PartialSortingTransform -- ExpressionStep preserves sort mode -- QUERY: set optimize_read_in_order=1;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a -Sorting (Stream): a ASC +Sorting (Global): a ASC Sorting +Sorting (Global): a ASC +Sorting (Stream): a ASC Sorting (Stream): a ASC -Sorting (Port): a ASC -Sorting (Port): a ASC -- QUERY: set optimize_read_in_order=1;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a+1 Sorting (None) Sorting (Sorting for ORDER BY) -Sorting (Stream): plus(a, 1) ASC +Sorting (Global): plus(a, 1) ASC Sorting (Chunk): a ASC Sorting (Chunk): a ASC -- ExpressionStep breaks sort mode -- QUERY: set optimize_read_in_order=1;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a+1 FROM optimize_sorting ORDER BY a+1 -Sorting (Stream): plus(a, 1) ASC +Sorting (Global): plus(a, 1) ASC Sorting (Sorting for ORDER BY) -Sorting (Stream): plus(a, 1) ASC +Sorting (Global): plus(a, 1) ASC Sorting (None) Sorting (Chunk): a ASC -- FilterStep preserves sort mode @@ -62,28 +62,28 @@ Sorting (None) Sorting (Chunk): a ASC -- aliases break sorting order -- QUERY: set optimize_read_in_order=1;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM (SELECT sipHash64(a) AS a FROM (SELECT a FROM optimize_sorting ORDER BY a)) ORDER BY a -Sorting (Stream): a ASC +Sorting (Global): a ASC Sorting (Sorting for ORDER BY) -Sorting (Stream): a ASC +Sorting (Global): a ASC Sorting (None) Sorting +Sorting (Global): a ASC +Sorting (Stream): a ASC Sorting (Stream): a ASC -Sorting (Port): a ASC -Sorting (Port): a ASC -- aliases DONT break sorting order -- QUERY: set optimize_read_in_order=1;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, b FROM (SELECT x AS a, y AS b FROM (SELECT a AS x, b AS y FROM optimize_sorting) ORDER BY x, y) -Sorting (Stream): x ASC, y ASC +Sorting (Global): x ASC, y ASC Sorting (Sorting for ORDER BY) -Sorting (Stream): x ASC, y ASC +Sorting (Global): x ASC, y ASC Sorting (Chunk): a ASC, b ASC Sorting (Chunk): a ASC, b ASC -- actions chain breaks sorting order: input(column a)->sipHash64(column a)->alias(sipHash64(column a), a)->plus(alias a, 1) -- QUERY: set optimize_read_in_order=1;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, z FROM (SELECT sipHash64(a) AS a, a + 1 AS z FROM (SELECT a FROM optimize_sorting ORDER BY a + 1)) ORDER BY a + 1 Sorting (None) Sorting (Sorting for ORDER BY) -Sorting (Stream): plus(a, 1) ASC +Sorting (Global): plus(a, 1) ASC Sorting (None) Sorting (Sorting for ORDER BY) -Sorting (Stream): plus(a, 1) ASC +Sorting (Global): plus(a, 1) ASC Sorting (Chunk): a ASC Sorting (Chunk): a ASC diff --git a/tests/queries/0_stateless/02410_inmemory_wal_cleanup.reference b/tests/queries/0_stateless/02410_inmemory_wal_cleanup.reference new file mode 100644 index 00000000000..6727d83a6f4 --- /dev/null +++ b/tests/queries/0_stateless/02410_inmemory_wal_cleanup.reference @@ -0,0 +1,35 @@ +-- { echo } + +DROP TABLE IF EXISTS in_memory; +CREATE TABLE in_memory (a UInt32) ENGINE = MergeTree ORDER BY a SETTINGS min_rows_for_compact_part = 1000, min_bytes_for_wide_part = 10485760; +INSERT INTO in_memory VALUES (1); +INSERT INTO in_memory VALUES (2); +SELECT name, active, part_type FROM system.parts WHERE database = currentDatabase() AND table = 'in_memory'; +all_1_1_0 1 InMemory +all_2_2_0 1 InMemory +SELECT * FROM in_memory ORDER BY a; +1 +2 +-- no WAL remove since parts are still in use +DETACH TABLE in_memory; +ATTACH TABLE in_memory; +SELECT name, active, part_type FROM system.parts WHERE database = currentDatabase() AND table = 'in_memory'; +all_1_1_0 1 InMemory +all_2_2_0 1 InMemory +SELECT * FROM in_memory ORDER BY a; +1 +2 +-- WAL should be removed, since on disk part covers all parts in WAL +OPTIMIZE TABLE in_memory; +DETACH TABLE in_memory; +ATTACH TABLE in_memory; +SELECT name, active, part_type FROM system.parts WHERE database = currentDatabase() AND table = 'in_memory'; +all_1_2_1 1 Compact +-- check that the WAL will be reinitialized after remove +INSERT INTO in_memory VALUES (3); +DETACH TABLE in_memory; +ATTACH TABLE in_memory; +SELECT * FROM in_memory ORDER BY a; +1 +2 +3 diff --git a/tests/queries/0_stateless/02410_inmemory_wal_cleanup.sql b/tests/queries/0_stateless/02410_inmemory_wal_cleanup.sql new file mode 100644 index 00000000000..0228852a115 --- /dev/null +++ b/tests/queries/0_stateless/02410_inmemory_wal_cleanup.sql @@ -0,0 +1,27 @@ +-- { echo } + +DROP TABLE IF EXISTS in_memory; + +CREATE TABLE in_memory (a UInt32) ENGINE = MergeTree ORDER BY a SETTINGS min_rows_for_compact_part = 1000, min_bytes_for_wide_part = 10485760; +INSERT INTO in_memory VALUES (1); +INSERT INTO in_memory VALUES (2); +SELECT name, active, part_type FROM system.parts WHERE database = currentDatabase() AND table = 'in_memory'; +SELECT * FROM in_memory ORDER BY a; + +-- no WAL remove since parts are still in use +DETACH TABLE in_memory; +ATTACH TABLE in_memory; +SELECT name, active, part_type FROM system.parts WHERE database = currentDatabase() AND table = 'in_memory'; +SELECT * FROM in_memory ORDER BY a; + +-- WAL should be removed, since on disk part covers all parts in WAL +OPTIMIZE TABLE in_memory; +DETACH TABLE in_memory; +ATTACH TABLE in_memory; +SELECT name, active, part_type FROM system.parts WHERE database = currentDatabase() AND table = 'in_memory'; + +-- check that the WAL will be reinitialized after remove +INSERT INTO in_memory VALUES (3); +DETACH TABLE in_memory; +ATTACH TABLE in_memory; +SELECT * FROM in_memory ORDER BY a; diff --git a/tests/queries/0_stateless/02416_rename_database_rbac.reference b/tests/queries/0_stateless/02416_rename_database_rbac.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02416_rename_database_rbac.sh b/tests/queries/0_stateless/02416_rename_database_rbac.sh new file mode 100755 index 00000000000..c319136d29c --- /dev/null +++ b/tests/queries/0_stateless/02416_rename_database_rbac.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +DROP DATABASE IF EXISTS database_02416; +CREATE DATABASE database_02416; + +DROP USER IF EXISTS user_test_02416; +CREATE USER user_test_02416 IDENTIFIED WITH plaintext_password BY 'user_test_02416'; + +GRANT CREATE DATABASE ON *.* TO 'user_test_02416' WITH GRANT OPTION; +GRANT DROP DATABASE ON *.* TO 'user_test_02416' WITH GRANT OPTION; +REVOKE DROP DATABASE ON database_02416.* FROM 'user_test_02416'; +GRANT CREATE TABLE ON *.* TO 'user_test_02416' WITH GRANT OPTION; +GRANT DROP TABLE ON *.* TO 'user_test_02416' WITH GRANT OPTION; +""" +${CLICKHOUSE_CLIENT} --multiline --multiquery --user user_test_02416 --password user_test_02416 -q """ +RENAME DATABASE user_test_02416 to aaaaaaaaa; -- { serverError 497 } +""" diff --git a/tests/queries/0_stateless/02417_repeat_input_commands.expect b/tests/queries/0_stateless/02417_repeat_input_commands.expect new file mode 100755 index 00000000000..119aac68645 --- /dev/null +++ b/tests/queries/0_stateless/02417_repeat_input_commands.expect @@ -0,0 +1,81 @@ +#!/usr/bin/expect -f + +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + +log_user 0 +set timeout 10 +match_max 100000 + +expect_after { + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } +} + +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion" +expect ":) " + +# ----------------------------------------- +# test . and / commands prior to the first query + +send -- ".\r" +expect "Empty query" +expect ":) " + +send -- "/\r" +expect "Empty query" +expect ":) " + +# ----------------------------------------- +# test . and / commands after first query + +send -- "SELECT 123\r" +expect "│ 123 │" +expect "1 row in set." +expect ":) " + +send -- ".\r" +expect "│ 123 │" +expect "1 row in set." +expect ":) " + +# test input of . more than once in a row +send -- ".\r" +expect "│ 123 │" +expect "1 row in set." +expect ":) " + +send -- "/\r" +expect "│ 123 │" +expect "1 row in set." +expect ":) " + +# test input of / more than once in a row +send -- "/\r" +expect "│ 123 │" +expect "1 row in set." +expect ":) " + +# ----------------------------------------- +# test . and / commands after another query + +send -- "SELECT 321\r" +expect "│ 321 │" +expect "1 row in set." +expect ":) " + +send -- ".\r" +expect "│ 321 │" +expect "1 row in set." +expect ":) " + +send -- "/\r" +expect "│ 321 │" +expect "1 row in set." +expect ":) " + +send -- "quit\r" +expect eof diff --git a/tests/queries/0_stateless/02417_repeat_input_commands.reference b/tests/queries/0_stateless/02417_repeat_input_commands.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/1_stateful/00175_obfuscator_schema_inference.reference b/tests/queries/1_stateful/00175_obfuscator_schema_inference.reference new file mode 100644 index 00000000000..bd7f726bffd --- /dev/null +++ b/tests/queries/1_stateful/00175_obfuscator_schema_inference.reference @@ -0,0 +1,4 @@ +403489 +1000 320 171 23 +2500 597 332 14 +2500 597 332 14 diff --git a/tests/queries/1_stateful/00175_obfuscator_schema_inference.sh b/tests/queries/1_stateful/00175_obfuscator_schema_inference.sh new file mode 100755 index 00000000000..8ff0d2fa648 --- /dev/null +++ b/tests/queries/1_stateful/00175_obfuscator_schema_inference.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Compared to explicitly specifying the structure of the input, +# schema inference adds Nullable(T) to all types, so the model and the results +# are a bit different from test '00175_obfuscator_schema_inference.sh' + +$CLICKHOUSE_CLIENT --max_threads 1 --query="SELECT URL, Title, SearchPhrase FROM test.hits LIMIT 1000" > "${CLICKHOUSE_TMP}"/data.tsv + +# Test obfuscator without saving the model +$CLICKHOUSE_OBFUSCATOR --input-format TSV --output-format TSV --seed hello --limit 2500 < "${CLICKHOUSE_TMP}"/data.tsv > "${CLICKHOUSE_TMP}"/data2500.tsv 2>/dev/null + +# Test obfuscator with saving the model +$CLICKHOUSE_OBFUSCATOR --input-format TSV --output-format TSV --seed hello --limit 0 --save "${CLICKHOUSE_TMP}"/model.bin < "${CLICKHOUSE_TMP}"/data.tsv 2>/dev/null +wc -c < "${CLICKHOUSE_TMP}"/model.bin +$CLICKHOUSE_OBFUSCATOR --input-format TSV --output-format TSV --seed hello --limit 2500 --load "${CLICKHOUSE_TMP}"/model.bin < "${CLICKHOUSE_TMP}"/data.tsv > "${CLICKHOUSE_TMP}"/data2500_load_from_model.tsv 2>/dev/null +rm "${CLICKHOUSE_TMP}"/model.bin + +$CLICKHOUSE_LOCAL --structure "URL String, Title String, SearchPhrase String" --input-format TSV --output-format TSV --query "SELECT count(), uniq(URL), uniq(Title), uniq(SearchPhrase) FROM table" < "${CLICKHOUSE_TMP}"/data.tsv +$CLICKHOUSE_LOCAL --structure "URL String, Title String, SearchPhrase String" --input-format TSV --output-format TSV --query "SELECT count(), uniq(URL), uniq(Title), uniq(SearchPhrase) FROM table" < "${CLICKHOUSE_TMP}"/data2500.tsv +$CLICKHOUSE_LOCAL --structure "URL String, Title String, SearchPhrase String" --input-format TSV --output-format TSV --query "SELECT count(), uniq(URL), uniq(Title), uniq(SearchPhrase) FROM table" < "${CLICKHOUSE_TMP}"/data2500_load_from_model.tsv + +rm "${CLICKHOUSE_TMP}"/data.tsv +rm "${CLICKHOUSE_TMP}"/data2500.tsv +rm "${CLICKHOUSE_TMP}"/data2500_load_from_model.tsv diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index b43ad62bc5b..2f3cf4b0620 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -203,16 +203,11 @@ std::vector> Runner::getConnections() Coordination::ZooKeeper::Node node{Poco::Net::SocketAddress{host_string}, false}; std::vector nodes; nodes.push_back(node); - zookeepers.emplace_back(std::make_shared( - nodes, - "", /*chroot*/ - "", /*identity type*/ - "", /*identity*/ - Poco::Timespan(0, 30000 * 1000), - Poco::Timespan(0, 1000 * 1000), - Poco::Timespan(0, 10000 * 1000), - nullptr)); - + zkutil::ZooKeeperArgs args; + args.session_timeout_ms = 30000; + args.connection_timeout_ms = 1000; + args.operation_timeout_ms = 10000; + zookeepers.emplace_back(std::make_shared(nodes, args, nullptr)); } diff --git a/utils/self-extracting-executable/decompressor.cpp b/utils/self-extracting-executable/decompressor.cpp index ad4cd5b2f63..c997526d38d 100644 --- a/utils/self-extracting-executable/decompressor.cpp +++ b/utils/self-extracting-executable/decompressor.cpp @@ -361,6 +361,8 @@ int decompressFiles(int input_fd, char * path, char * name, bool & have_compress #endif +#if !defined(OS_DARWIN) && !defined(OS_FREEBSD) + uint32_t getInode(const char * self) { std::ifstream maps("/proc/self/maps"); @@ -386,6 +388,8 @@ uint32_t getInode(const char * self) return 0; } +#endif + int main(int/* argc*/, char* argv[]) { char self[4096] = {0}; @@ -409,6 +413,7 @@ int main(int/* argc*/, char* argv[]) else name = file_path; +#if !defined(OS_DARWIN) && !defined(OS_FREEBSD) /// get inode of this executable uint32_t inode = getInode(self); if (inode == 0) @@ -460,6 +465,7 @@ int main(int/* argc*/, char* argv[]) printf("No target executable - decompression only was performed.\n"); return 0; } +#endif int input_fd = open(self, O_RDONLY); if (input_fd == -1) @@ -522,19 +528,21 @@ int main(int/* argc*/, char* argv[]) if (has_exec) { +#if !defined(OS_DARWIN) && !defined(OS_FREEBSD) /// write one byte to the lock in case other copies of compressed are running to indicate that /// execution should be performed write(lock, "1", 1); - +#endif execv(self, argv); /// This part of code will be reached only if error happened perror("execv"); return 1; } - +#if !defined(OS_DARWIN) && !defined(OS_FREEBSD) /// since inodes can be reused - it's a precaution if lock file already exists and have size of 1 ftruncate(lock, 0); +#endif printf("No target executable - decompression only was performed.\n"); } diff --git a/utils/zookeeper-cli/zookeeper-cli.cpp b/utils/zookeeper-cli/zookeeper-cli.cpp index 94fec5a8855..bfcdb0a90de 100644 --- a/utils/zookeeper-cli/zookeeper-cli.cpp +++ b/utils/zookeeper-cli/zookeeper-cli.cpp @@ -69,7 +69,7 @@ int main(int argc, char ** argv) Poco::Logger::root().setChannel(channel); Poco::Logger::root().setLevel("trace"); - zkutil::ZooKeeper zk(argv[1]); + zkutil::ZooKeeper zk{zkutil::ZooKeeperArgs(argv[1])}; LineReader lr({}, false, {"\\"}, {}); do