From 4e9d894e24041f7c462a486394f19d602403ae15 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Oct 2022 01:32:35 +0200 Subject: [PATCH 001/689] Compress marks and primary key by default --- src/Storages/MergeTree/MergeTreeSettings.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index a0db39a97f1..210846b9bb2 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -147,8 +147,8 @@ struct Settings; M(Bool, remote_fs_zero_copy_path_compatible_mode, false, "Run zero-copy in compatible mode during conversion process.", 0) \ \ /** Compress marks and primary key. */ \ - M(Bool, compress_marks, false, "Marks support compression, reduce mark file size and speed up network transmission.", 0) \ - M(Bool, compress_primary_key, false, "Primary key support compression, reduce primary key file size and speed up network transmission.", 0) \ + M(Bool, compress_marks, true, "Marks support compression, reduce mark file size and speed up network transmission.", 0) \ + M(Bool, compress_primary_key, true, "Primary key support compression, reduce primary key file size and speed up network transmission.", 0) \ M(String, marks_compression_codec, "ZSTD(3)", "Compression encoding used by marks, marks are small enough and cached, so the default compression is ZSTD(3).", 0) \ M(String, primary_key_compression_codec, "ZSTD(3)", "Compression encoding used by primary, primary key is small enough and cached, so the default compression is ZSTD(3).", 0) \ M(UInt64, marks_compress_block_size, 65536, "Mark compress block size, the actual size of the block to compress.", 0) \ From 462e7f76bf20b2872e3605a17080a97d1d6e8e33 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 27 Dec 2022 13:28:25 +0100 Subject: [PATCH 002/689] Attempt to fix the "system.stack_trace" test --- src/Core/Settings.h | 1 + src/Storages/System/StorageSystemStackTrace.cpp | 6 +++++- tests/queries/0_stateless/01051_system_stack_trace.sql | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b10760c8277..f26d6a2e12a 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -655,6 +655,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(String, additional_result_filter, "", "Additional filter expression which would be applied to query result", 0) \ \ M(String, workload, "default", "Name of workload to be used to access resources", 0) \ + M(Milliseconds, storage_system_stack_trace_pipe_read_timeout_ms, 100, "Maximum time to read from a pipe for receiving information from the threads when querying the `system.stack_trace` table. This setting is used for testing purposes and not meant to be changed by users.", 0) \ \ /** Experimental functions */ \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp index df3d8b74e6e..8f3f9744725 100644 --- a/src/Storages/System/StorageSystemStackTrace.cpp +++ b/src/Storages/System/StorageSystemStackTrace.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -262,6 +263,9 @@ Pipe StorageSystemStackTrace::read( { storage_snapshot->check(column_names); + int pipe_read_timeout_ms = static_cast( + context->getSettingsRef().storage_system_stack_trace_pipe_read_timeout_ms.totalMilliseconds()); + /// It shouldn't be possible to do concurrent reads from this table. std::lock_guard lock(mutex); @@ -334,7 +338,7 @@ Pipe StorageSystemStackTrace::read( } /// Just in case we will wait for pipe with timeout. In case signal didn't get processed. - if (send_signal && wait(100) && sig_value.sival_int == data_ready_num.load(std::memory_order_acquire)) + if (send_signal && wait(pipe_read_timeout_ms) && sig_value.sival_int == data_ready_num.load(std::memory_order_acquire)) { size_t stack_trace_size = stack_trace.getSize(); size_t stack_trace_offset = stack_trace.getOffset(); diff --git a/tests/queries/0_stateless/01051_system_stack_trace.sql b/tests/queries/0_stateless/01051_system_stack_trace.sql index e322462a46a..a7e6660d06e 100644 --- a/tests/queries/0_stateless/01051_system_stack_trace.sql +++ b/tests/queries/0_stateless/01051_system_stack_trace.sql @@ -1,4 +1,4 @@ --- Tags: race +SET storage_system_stack_trace_pipe_read_timeout_ms = 10000; -- { echo } SELECT count() > 0 FROM system.stack_trace WHERE query_id != ''; From d7f4bc4c72f238d4de1ac9dfda786d3a678c29c5 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Wed, 15 Mar 2023 17:01:19 +0100 Subject: [PATCH 003/689] ActionsDAG: remove wrong optimization Replacing AND with a casto to UInt8 can change the condition. e.g. 0x100 cast to UInt8 is 0, but it should be 1. 0.1 cast to UInt8 is 0, but it should be 1. Basically we can't cast anything to UInt8 so we completely remove this optimization. Closes #47317 --- src/Interpreters/ActionsDAG.cpp | 62 ++++++------------- .../02568_and_consistency.reference | 5 ++ .../0_stateless/02568_and_consistency.sql | 42 +++++++++++++ 3 files changed, 66 insertions(+), 43 deletions(-) create mode 100644 tests/queries/0_stateless/02568_and_consistency.reference create mode 100644 tests/queries/0_stateless/02568_and_consistency.sql diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 46b5a93b28c..ac11862d375 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1966,8 +1966,12 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( } auto conjunction = getConjunctionNodes(predicate, allowed_nodes); - if (conjunction.rejected.size() == 1 && WhichDataType{removeNullable(conjunction.rejected.front()->result_type)}.isFloat()) + if (conjunction.rejected.size() == 1 && !conjunction.rejected.front()->result_type->equals(*predicate->result_type) + && conjunction.allowed.front()->type == ActionType::COLUMN) + { + // No further optimization can be done return nullptr; + } auto actions = cloneActionsForConjunction(conjunction.allowed, all_inputs); if (!actions) @@ -2017,55 +2021,26 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( else { /// Predicate is conjunction, where both allowed and rejected sets are not empty. - /// Replace this node to conjunction of rejected predicates. NodeRawConstPtrs new_children = std::move(conjunction.rejected); - if (new_children.size() == 1) + if (new_children.size() == 1 && new_children.front()->result_type->equals(*predicate->result_type)) { - /// Rejected set has only one predicate. - if (new_children.front()->result_type->equals(*predicate->result_type)) - { - /// If it's type is same, just add alias. - Node node; - node.type = ActionType::ALIAS; - node.result_name = predicate->result_name; - node.result_type = predicate->result_type; - node.children.swap(new_children); - *predicate = std::move(node); - } - else if (!WhichDataType{removeNullable(new_children.front()->result_type)}.isFloat()) - { - /// If type is different, cast column. - /// This case is possible, cause AND can use any numeric type as argument. - /// But casting floats to UInt8 or Bool produces different results. - /// so we can't apply this optimization to them. - Node node; - node.type = ActionType::COLUMN; - node.result_name = predicate->result_type->getName(); - node.column = DataTypeString().createColumnConst(0, node.result_name); - node.result_type = std::make_shared(); - - const auto * right_arg = &nodes.emplace_back(std::move(node)); - const auto * left_arg = new_children.front(); - - predicate->children = {left_arg, right_arg}; - auto arguments = prepareFunctionArguments(predicate->children); - - FunctionOverloadResolverPtr func_builder_cast = CastInternalOverloadResolver::createImpl(); - - predicate->function_base = func_builder_cast->build(arguments); - predicate->function = predicate->function_base->prepare(arguments); - } + /// Rejected set has only one predicate. And the type is the same as the result_type. + /// Just add alias. + Node node; + node.type = ActionType::ALIAS; + node.result_name = predicate->result_name; + node.result_type = predicate->result_type; + node.children.swap(new_children); + *predicate = std::move(node); } else { - /// Predicate is function AND, which still have more then one argument. - /// Or there is only one argument that is a float and we can't just - /// remove the AND. + /// Predicate is function AND, which still have more then one argument + /// or it has one argument of the wrong type. /// Just update children and rebuild it. - predicate->children.swap(new_children); - if (WhichDataType{removeNullable(predicate->children.front()->result_type)}.isFloat()) + if (new_children.size() == 1) { Node node; node.type = ActionType::COLUMN; @@ -2073,8 +2048,9 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( node.column = DataTypeUInt8().createColumnConst(0, 1u); node.result_type = std::make_shared(); const auto * const_col = &nodes.emplace_back(std::move(node)); - predicate->children.emplace_back(const_col); + new_children.emplace_back(const_col); } + predicate->children.swap(new_children); auto arguments = prepareFunctionArguments(predicate->children); FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); diff --git a/tests/queries/0_stateless/02568_and_consistency.reference b/tests/queries/0_stateless/02568_and_consistency.reference new file mode 100644 index 00000000000..07a8041d0ee --- /dev/null +++ b/tests/queries/0_stateless/02568_and_consistency.reference @@ -0,0 +1,5 @@ += +1554690688 += +1554690688 += diff --git a/tests/queries/0_stateless/02568_and_consistency.sql b/tests/queries/0_stateless/02568_and_consistency.sql new file mode 100644 index 00000000000..4e76da78427 --- /dev/null +++ b/tests/queries/0_stateless/02568_and_consistency.sql @@ -0,0 +1,42 @@ +DROP TABLE IF EXISTS t1; +CREATE TABLE t1 (c0 Int32, PRIMARY KEY (c0)) ENGINE=MergeTree; +INSERT INTO t1 VALUES (1554690688); + +select '='; + +SELECT MIN(t1.c0) +FROM t1 +GROUP BY + (-sign(cos(t1.c0))) * (-max2(t1.c0, t1.c0 / t1.c0)), + t1.c0 * t1.c0, + sign(-exp(-t1.c0)) +HAVING -(-(MIN(t1.c0) + MIN(t1.c0))) AND (pow('{b' > '-657301241', log(-1004522121)) IS NOT NULL) +UNION ALL +SELECT MIN(t1.c0) +FROM t1 +GROUP BY + (-sign(cos(t1.c0))) * (-max2(t1.c0, t1.c0 / t1.c0)), + t1.c0 * t1.c0, + sign(-exp(-t1.c0)) +HAVING NOT (-(-(MIN(t1.c0) + MIN(t1.c0))) AND (pow('{b' > '-657301241', log(-1004522121)) IS NOT NULL)) +UNION ALL +SELECT MIN(t1.c0) +FROM t1 +GROUP BY + (-sign(cos(t1.c0))) * (-max2(t1.c0, t1.c0 / t1.c0)), + t1.c0 * t1.c0, + sign(-exp(-t1.c0)) +HAVING (-(-(MIN(t1.c0) + MIN(t1.c0))) AND (pow('{b' > '-657301241', log(-1004522121)) IS NOT NULL)) IS NULL +SETTINGS aggregate_functions_null_for_empty = 1, enable_optimize_predicate_expression = 0; + +select '='; + +SELECT MIN(t1.c0) +FROM t1 +GROUP BY t1.c0 +HAVING and(MIN(t1.c0) + MIN(t1.c0), 1) +SETTINGS aggregate_functions_null_for_empty = 1, enable_optimize_predicate_expression = 0; + +select '='; + +DROP TABLE IF EXISTS t1; From e7d19cc45f5e9b42d2b6b6a063835c6e9614e3c1 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Wed, 15 Mar 2023 17:15:22 +0100 Subject: [PATCH 004/689] Fix test that expected CH to apply a wrong optimization The result of minus is an int64. Casting that result to uint8 instead of bool can lead to wrong results e.g. 256 cast to uint8 is 0, but it should be `true` --- tests/queries/0_stateless/01655_plan_optimizations.reference | 2 +- tests/queries/0_stateless/01655_plan_optimizations.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference index f870a52284c..48d99647b43 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations.reference +++ b/tests/queries/0_stateless/01655_plan_optimizations.reference @@ -53,7 +53,7 @@ Filter column: notEquals(y, 0) 9 10 > one condition of filter should be pushed down after aggregating, other condition is casted Filter column -FUNCTION _CAST(minus(s, 4) :: 1, UInt8 :: 3) -> and(notEquals(y, 0), minus(s, 4)) +FUNCTION and(minus(s, 4) :: 1, 1 :: 3) -> and(notEquals(y, 0), minus(s, 4)) UInt8 : 2 Aggregating Filter column: notEquals(y, 0) 0 1 diff --git a/tests/queries/0_stateless/01655_plan_optimizations.sh b/tests/queries/0_stateless/01655_plan_optimizations.sh index aaecdc390cb..ec856c9bf27 100755 --- a/tests/queries/0_stateless/01655_plan_optimizations.sh +++ b/tests/queries/0_stateless/01655_plan_optimizations.sh @@ -56,7 +56,7 @@ $CLICKHOUSE_CLIENT -q " select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 and s - 4 settings enable_optimize_predicate_expression=0" | - grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|FUNCTION _CAST(minus(s, 4) :: 1, UInt8 :: 3) -> and(notEquals(y, 0), minus(s, 4))" + grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|FUNCTION and(minus(s, 4) :: 1, 1 :: 3) -> and(notEquals(y, 0), minus(s, 4)) UInt8 : 2" $CLICKHOUSE_CLIENT -q " select s, y from ( select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y From 89a76c2d771473a8d99d4d0522961e787e829657 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 18 Mar 2023 23:45:57 +0100 Subject: [PATCH 005/689] Use LLVM 16 --- .github/workflows/nightly.yml | 4 +-- docker/packager/packager | 19 +++++------ docker/test/fasttest/run.sh | 2 +- docker/test/fuzzer/run-fuzzer.sh | 2 +- docker/test/keeper-jepsen/run.sh | 2 +- docker/test/server-jepsen/run.sh | 2 +- docker/test/util/Dockerfile | 2 +- docs/en/development/build.md | 5 ++- docs/en/development/continuous-integration.md | 2 +- docs/en/development/developer-instruction.md | 2 +- tests/ci/ci_config.py | 34 +++++++++---------- 11 files changed, 37 insertions(+), 39 deletions(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index f6d6d192f48..b42052f91fe 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -123,8 +123,8 @@ jobs: SONAR_SCANNER_VERSION: 4.7.0.2747 SONAR_SERVER_URL: "https://sonarcloud.io" BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed - CC: clang-15 - CXX: clang++-15 + CC: clang-16 + CXX: clang++-16 steps: - name: Check out repository code uses: ClickHouse/checkout@v1 diff --git a/docker/packager/packager b/docker/packager/packager index 58dd299fd6d..35d08231c26 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -341,17 +341,16 @@ if __name__ == "__main__": parser.add_argument( "--compiler", choices=( - "clang-15", - "clang-15-darwin", - "clang-15-darwin-aarch64", - "clang-15-aarch64", - "clang-15-aarch64-v80compat", - "clang-15-ppc64le", - "clang-15-amd64-compat", - "clang-15-freebsd", - "gcc-11", + "clang-16", + "clang-16-darwin", + "clang-16-darwin-aarch64", + "clang-16-aarch64", + "clang-16-aarch64-v80compat", + "clang-16-ppc64le", + "clang-16-amd64-compat", + "clang-16-freebsd", ), - default="clang-15", + default="clang-16", help="a compiler to use", ) parser.add_argument( diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 086276bed55..d3bc3487035 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -9,7 +9,7 @@ trap 'kill $(jobs -pr) ||:' EXIT stage=${stage:-} # Compiler version, normally set by Dockerfile -export LLVM_VERSION=${LLVM_VERSION:-13} +export LLVM_VERSION=${LLVM_VERSION:-16} # A variable to pass additional flags to CMake. # Here we explicitly default it to nothing so that bash doesn't complain about diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 75f2a0af358..d2c8de7a211 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -15,7 +15,7 @@ stage=${stage:-} script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" echo "$script_dir" repo_dir=ch -BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-15_debug_none_unsplitted_disable_False_binary"} +BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-16_debug_none_unsplitted_disable_False_binary"} BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"} function git_clone_with_retry diff --git a/docker/test/keeper-jepsen/run.sh b/docker/test/keeper-jepsen/run.sh index 5e321b7c347..694d7fcd916 100644 --- a/docker/test/keeper-jepsen/run.sh +++ b/docker/test/keeper-jepsen/run.sh @@ -2,7 +2,7 @@ set -euo pipefail -CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-15_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"} +CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-16_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"} CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""} diff --git a/docker/test/server-jepsen/run.sh b/docker/test/server-jepsen/run.sh index 4a966d50f74..cea4da27866 100644 --- a/docker/test/server-jepsen/run.sh +++ b/docker/test/server-jepsen/run.sh @@ -2,7 +2,7 @@ set -euo pipefail -CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-15_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"} +CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-16_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"} CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""} diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index 0ee426f4e4d..9af2b8c1d2a 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -6,7 +6,7 @@ ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list # 15.0.2 -ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=15 +ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=16 RUN apt-get update \ && apt-get install \ diff --git a/docs/en/development/build.md b/docs/en/development/build.md index d52b018a5a7..40994214316 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -45,8 +45,8 @@ For other Linux distribution - check the availability of the [prebuild packages] #### Use the latest clang for Builds ``` bash -export CC=clang-15 -export CXX=clang++-15 +export CC=clang-16 +export CXX=clang++-16 ``` In this example we use version 15 that is the latest as of Sept 2022. @@ -159,4 +159,3 @@ The CI checks build the binaries on each commit to [ClickHouse](https://github.c 1. Find the type of package for your operating system that you need and download the files. ![build artifact check](images/find-build-artifact.png) - diff --git a/docs/en/development/continuous-integration.md b/docs/en/development/continuous-integration.md index 232eee5b3cf..738c5458cc3 100644 --- a/docs/en/development/continuous-integration.md +++ b/docs/en/development/continuous-integration.md @@ -102,7 +102,7 @@ Builds ClickHouse in various configurations for use in further steps. You have t ### Report Details -- **Compiler**: `clang-15`, optionally with the name of a target platform +- **Compiler**: `clang-16`, optionally with the name of a target platform - **Build type**: `Debug` or `RelWithDebInfo` (cmake). - **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan). - **Status**: `success` or `fail` diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index ace5ab79bb4..536abcb7130 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -146,7 +146,7 @@ While inside the `build` directory, configure your build by running CMake. Befor export CC=clang CXX=clang++ cmake .. -If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-15 CXX=clang++-15`. The clang version will be in the script output. +If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-16 CXX=clang++-16`. The clang version will be in the script output. The `CC` variable specifies the compiler for C (short for C Compiler), and `CXX` variable instructs which C++ compiler is to be used for building. diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 2f35b337cb3..89fde058507 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -8,7 +8,7 @@ BuildConfig = Dict[str, ConfValue] CI_CONFIG = { "build_config": { "package_release": { - "compiler": "clang-15", + "compiler": "clang-16", "build_type": "", "sanitizer": "", "package_type": "deb", @@ -18,7 +18,7 @@ CI_CONFIG = { "with_coverage": False, }, "coverity": { - "compiler": "clang-15", + "compiler": "clang-16", "build_type": "", "sanitizer": "", "package_type": "coverity", @@ -27,7 +27,7 @@ CI_CONFIG = { "official": False, }, "package_aarch64": { - "compiler": "clang-15-aarch64", + "compiler": "clang-16-aarch64", "build_type": "", "sanitizer": "", "package_type": "deb", @@ -37,7 +37,7 @@ CI_CONFIG = { "with_coverage": False, }, "package_asan": { - "compiler": "clang-15", + "compiler": "clang-16", "build_type": "", "sanitizer": "address", "package_type": "deb", @@ -45,7 +45,7 @@ CI_CONFIG = { "with_coverage": False, }, "package_ubsan": { - "compiler": "clang-15", + "compiler": "clang-16", "build_type": "", "sanitizer": "undefined", "package_type": "deb", @@ -53,7 +53,7 @@ CI_CONFIG = { "with_coverage": False, }, "package_tsan": { - "compiler": "clang-15", + "compiler": "clang-16", "build_type": "", "sanitizer": "thread", "package_type": "deb", @@ -61,7 +61,7 @@ CI_CONFIG = { "with_coverage": False, }, "package_msan": { - "compiler": "clang-15", + "compiler": "clang-16", "build_type": "", "sanitizer": "memory", "package_type": "deb", @@ -69,7 +69,7 @@ CI_CONFIG = { "with_coverage": False, }, "package_debug": { - "compiler": "clang-15", + "compiler": "clang-16", "build_type": "debug", "sanitizer": "", "package_type": "deb", @@ -77,7 +77,7 @@ CI_CONFIG = { "with_coverage": False, }, "binary_release": { - "compiler": "clang-15", + "compiler": "clang-16", "build_type": "", "sanitizer": "", "package_type": "binary", @@ -85,7 +85,7 @@ CI_CONFIG = { "with_coverage": False, }, "binary_tidy": { - "compiler": "clang-15", + "compiler": "clang-16", "build_type": "debug", "sanitizer": "", "package_type": "binary", @@ -94,7 +94,7 @@ CI_CONFIG = { "with_coverage": False, }, "binary_darwin": { - "compiler": "clang-15-darwin", + "compiler": "clang-16-darwin", "build_type": "", "sanitizer": "", "package_type": "binary", @@ -103,7 +103,7 @@ CI_CONFIG = { "with_coverage": False, }, "binary_aarch64": { - "compiler": "clang-15-aarch64", + "compiler": "clang-16-aarch64", "build_type": "", "sanitizer": "", "package_type": "binary", @@ -111,7 +111,7 @@ CI_CONFIG = { "with_coverage": False, }, "binary_aarch64_v80compat": { - "compiler": "clang-15-aarch64-v80compat", + "compiler": "clang-16-aarch64-v80compat", "build_type": "", "sanitizer": "", "package_type": "binary", @@ -120,7 +120,7 @@ CI_CONFIG = { "with_coverage": False, }, "binary_freebsd": { - "compiler": "clang-15-freebsd", + "compiler": "clang-16-freebsd", "build_type": "", "sanitizer": "", "package_type": "binary", @@ -129,7 +129,7 @@ CI_CONFIG = { "with_coverage": False, }, "binary_darwin_aarch64": { - "compiler": "clang-15-darwin-aarch64", + "compiler": "clang-16-darwin-aarch64", "build_type": "", "sanitizer": "", "package_type": "binary", @@ -138,7 +138,7 @@ CI_CONFIG = { "with_coverage": False, }, "binary_ppc64le": { - "compiler": "clang-15-ppc64le", + "compiler": "clang-16-ppc64le", "build_type": "", "sanitizer": "", "package_type": "binary", @@ -147,7 +147,7 @@ CI_CONFIG = { "with_coverage": False, }, "binary_amd64_compat": { - "compiler": "clang-15-amd64-compat", + "compiler": "clang-16-amd64-compat", "build_type": "", "sanitizer": "", "package_type": "binary", From 0f7f6201003d317c5e8bf33ea8e6a8f6b5cc59fc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 19 Mar 2023 01:07:11 +0100 Subject: [PATCH 006/689] Update one more place --- docker/test/codebrowser/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/codebrowser/Dockerfile b/docker/test/codebrowser/Dockerfile index b76b8234c81..804a665cfa8 100644 --- a/docker/test/codebrowser/Dockerfile +++ b/docker/test/codebrowser/Dockerfile @@ -31,7 +31,7 @@ RUN arch=${TARGETARCH:-amd64} \ arm64) rarch=aarch64 ;; \ *) exit 1 ;; \ esac \ - && ln -rsf /usr/lib/$rarch-linux-gnu/libclang-15.so.15 /usr/lib/$rarch-linux-gnu/libclang-15.so.1 + && ln -rsf /usr/lib/$rarch-linux-gnu/libclang-16.so.15 /usr/lib/$rarch-linux-gnu/libclang-16.so.1 # repo versions doesn't work correctly with C++17 # also we push reports to s3, so we add index.html to subfolder urls From 0f039f83590339ab636e9b2e59c25ed22227d246 Mon Sep 17 00:00:00 2001 From: Kuba Kaflik Date: Tue, 21 Mar 2023 09:09:57 +0100 Subject: [PATCH 007/689] Add Google Cloud Storage S3 compatible table function --- src/TableFunctions/TableFunctionS3.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h index 859da9e9201..a2e93476448 100644 --- a/src/TableFunctions/TableFunctionS3.h +++ b/src/TableFunctions/TableFunctionS3.h @@ -93,4 +93,18 @@ private: } +class TableFunctionGCS : public TableFunctionS3 +{ +public: + static constexpr auto name = "gcs"; + std::string getName() const override + { + return name; + } +private: + const char * getStorageTypeName() const override { return "GCS"; } +}; + +} + #endif From e6ddfc3486985393040222aec24ea70a4c60e7b8 Mon Sep 17 00:00:00 2001 From: Kuba Kaflik Date: Tue, 21 Mar 2023 09:51:37 +0100 Subject: [PATCH 008/689] Update GCS table function docs --- docs/en/sql-reference/table-functions/gcs.md | 184 +++++++++++++++++++ src/TableFunctions/TableFunctionS3.h | 8 + 2 files changed, 192 insertions(+) create mode 100644 docs/en/sql-reference/table-functions/gcs.md diff --git a/docs/en/sql-reference/table-functions/gcs.md b/docs/en/sql-reference/table-functions/gcs.md new file mode 100644 index 00000000000..8427a2db224 --- /dev/null +++ b/docs/en/sql-reference/table-functions/gcs.md @@ -0,0 +1,184 @@ +--- +slug: /en/sql-reference/table-functions/gcs +sidebar_position: 45 +sidebar_label: s3 +keywords: [gcs, bucket] +--- + +# gcs Table Function + +Provides a table-like interface to select/insert files in [Google Cloud Storage](https://cloud.google.com/storage/). + +**Syntax** + +``` sql +gcs(path [,hmac_key, hmac_secret] [,format] [,structure] [,compression]) +``` + +:::tip GCS +The GCS Table Function integrates with Google Cloud Storage by using the GCS XML API and HMAC keys. See the [Google interoperability docs]( https://cloud.google.com/storage/docs/interoperability) for more details about the endpoint and HMAC. + +::: + +**Arguments** + +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/gcs.md#wildcards-in-path). + + :::note GCS + The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API: + ``` + https://storage.googleapis.com/// + ``` + and not ~~https://storage.cloud.google.com~~. + ::: + +- `format` — The [format](../../interfaces/formats.md#formats) of the file. +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. +- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension. + +**Returned value** + +A table with the specified structure for reading or writing data in the specified file. + +**Examples** + +Selecting the first two rows from the table from S3 file `https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv`: + +``` sql +SELECT * +FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') +LIMIT 2; +``` + +``` text +┌─column1─┬─column2─┬─column3─┐ +│ 1 │ 2 │ 3 │ +│ 3 │ 2 │ 1 │ +└─────────┴─────────┴─────────┘ +``` + +The similar but from file with `gzip` compression: + +``` sql +SELECT * +FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip') +LIMIT 2; +``` + +``` text +┌─column1─┬─column2─┬─column3─┐ +│ 1 │ 2 │ 3 │ +│ 3 │ 2 │ 1 │ +└─────────┴─────────┴─────────┘ +``` + +## Usage + +Suppose that we have several files with following URIs on S3: + +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_1.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_2.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_3.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_4.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_1.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_2.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_3.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_4.csv' + +Count the amount of rows in files ending with numbers from 1 to 3: + +``` sql +SELECT count(*) +FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32') +``` + +``` text +┌─count()─┐ +│ 18 │ +└─────────┘ +``` + +Count the total amount of rows in all files in these two directories: + +``` sql +SELECT count(*) +FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32') +``` + +``` text +┌─count()─┐ +│ 24 │ +└─────────┘ +``` + +:::warning +If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. +::: + +Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: + +``` sql +SELECT count(*) +FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32'); +``` + +``` text +┌─count()─┐ +│ 12 │ +└─────────┘ +``` + +Insert data into file `test-data.csv.gz`: + +``` sql +INSERT INTO FUNCTION gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') +VALUES ('test-data', 1), ('test-data-2', 2); +``` + +Insert data into file `test-data.csv.gz` from existing table: + +``` sql +INSERT INTO FUNCTION gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') +SELECT name, value FROM existing_table; +``` + +Glob ** can be used for recursive directory traversal. Consider the below example, it will fetch all files from `my-test-bucket-768` directory recursively: + +``` sql +SELECT * FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**', 'CSV', 'name String, value UInt32', 'gzip'); +``` + +The below get data from all `test-data.csv.gz` files from any folder inside `my-test-bucket` directory recursively: + +``` sql +SELECT * FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); +``` + +## Partitioned Write + +If you specify `PARTITION BY` expression when inserting data into `S3` table, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency. + +**Examples** + +1. Using partition ID in a key creates separate files: + +```sql +INSERT INTO TABLE FUNCTION + gcs('http://bucket.amazonaws.com/my_bucket/file_{_partition_id}.csv', 'CSV', 'a String, b UInt32, c UInt32') + PARTITION BY a VALUES ('x', 2, 3), ('x', 4, 5), ('y', 11, 12), ('y', 13, 14), ('z', 21, 22), ('z', 23, 24); +``` +As a result, the data is written into three files: `file_x.csv`, `file_y.csv`, and `file_z.csv`. + +2. Using partition ID in a bucket name creates files in different buckets: + +```sql +INSERT INTO TABLE FUNCTION + gcs('http://bucket.amazonaws.com/my_bucket_{_partition_id}/file.csv', 'CSV', 'a UInt32, b UInt32, c UInt32') + PARTITION BY a VALUES (1, 2, 3), (1, 4, 5), (10, 11, 12), (10, 13, 14), (20, 21, 22), (20, 23, 24); +``` +As a result, the data is written into three files in different buckets: `my_bucket_1/file.csv`, `my_bucket_10/file.csv`, and `my_bucket_20/file.csv`. + +**See Also** + +- [S3 table function](s3.md) +- [S3 engine](../../engines/table-engines/integrations/s3.md) diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h index a2e93476448..ed8cd3bd41a 100644 --- a/src/TableFunctions/TableFunctionS3.h +++ b/src/TableFunctions/TableFunctionS3.h @@ -97,6 +97,14 @@ class TableFunctionGCS : public TableFunctionS3 { public: static constexpr auto name = "gcs"; + static constexpr auto signature = " - url\n" + " - url, format\n" + " - url, format, structure\n" + " - url, hmac_key, hmac_secret\n" + " - url, format, structure, compression_method\n" + " - url, hmac_key, hmac_secret, format\n" + " - url, hmac_key, hmac_secret, format, structure\n" + " - url, hmac_key, hmac_secret, format, structure, compression_method"; std::string getName() const override { return name; From e2c32c3bc072e2290620d975ada42c37bcabcc52 Mon Sep 17 00:00:00 2001 From: Kuba Kaflik Date: Tue, 21 Mar 2023 13:46:37 +0100 Subject: [PATCH 009/689] Update GCS table function docs --- docs/en/sql-reference/table-functions/gcs.md | 40 ++++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/docs/en/sql-reference/table-functions/gcs.md b/docs/en/sql-reference/table-functions/gcs.md index 8427a2db224..dcf49a5108b 100644 --- a/docs/en/sql-reference/table-functions/gcs.md +++ b/docs/en/sql-reference/table-functions/gcs.md @@ -42,11 +42,11 @@ A table with the specified structure for reading or writing data in the specifie **Examples** -Selecting the first two rows from the table from S3 file `https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv`: +Selecting the first two rows from the table from GCS file `https://storage.googleapis.com/my-test-bucket-768/data.csv`: ``` sql SELECT * -FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') +FROM gcs('https://storage.googleapis.com/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 2; ``` @@ -61,7 +61,7 @@ The similar but from file with `gzip` compression: ``` sql SELECT * -FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip') +FROM gcs('https://storage.googleapis.com/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip') LIMIT 2; ``` @@ -74,22 +74,22 @@ LIMIT 2; ## Usage -Suppose that we have several files with following URIs on S3: +Suppose that we have several files with following URIs on GCS: -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_1.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_2.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_3.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_4.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_1.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_2.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_3.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_4.csv' +- 'https://storage.googleapis.com/my-test-bucket-768/some_prefix/some_file_1.csv' +- 'https://storage.googleapis.com/my-test-bucket-768/some_prefix/some_file_2.csv' +- 'https://storage.googleapis.com/my-test-bucket-768/some_prefix/some_file_3.csv' +- 'https://storage.googleapis.com/my-test-bucket-768/some_prefix/some_file_4.csv' +- 'https://storage.googleapis.com/my-test-bucket-768/another_prefix/some_file_1.csv' +- 'https://storage.googleapis.com/my-test-bucket-768/another_prefix/some_file_2.csv' +- 'https://storage.googleapis.com/my-test-bucket-768/another_prefix/some_file_3.csv' +- 'https://storage.googleapis.com/my-test-bucket-768/another_prefix/some_file_4.csv' Count the amount of rows in files ending with numbers from 1 to 3: ``` sql SELECT count(*) -FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32') +FROM gcs('https://storage.googleapis.com/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32') ``` ``` text @@ -102,7 +102,7 @@ Count the total amount of rows in all files in these two directories: ``` sql SELECT count(*) -FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32') +FROM gcs('https://storage.googleapis.com/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32') ``` ``` text @@ -119,7 +119,7 @@ Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, ``` sql SELECT count(*) -FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32'); +FROM gcs('https://storage.googleapis.com/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32'); ``` ``` text @@ -131,32 +131,32 @@ FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768 Insert data into file `test-data.csv.gz`: ``` sql -INSERT INTO FUNCTION gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') +INSERT INTO FUNCTION gcs('https://storage.googleapis.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') VALUES ('test-data', 1), ('test-data-2', 2); ``` Insert data into file `test-data.csv.gz` from existing table: ``` sql -INSERT INTO FUNCTION gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') +INSERT INTO FUNCTION gcs('https://storage.googleapis.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') SELECT name, value FROM existing_table; ``` Glob ** can be used for recursive directory traversal. Consider the below example, it will fetch all files from `my-test-bucket-768` directory recursively: ``` sql -SELECT * FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**', 'CSV', 'name String, value UInt32', 'gzip'); +SELECT * FROM gcs('https://storage.googleapis.com/my-test-bucket-768/**', 'CSV', 'name String, value UInt32', 'gzip'); ``` The below get data from all `test-data.csv.gz` files from any folder inside `my-test-bucket` directory recursively: ``` sql -SELECT * FROM gcs('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); +SELECT * FROM gcs('https://storage.googleapis.com/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); ``` ## Partitioned Write -If you specify `PARTITION BY` expression when inserting data into `S3` table, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency. +If you specify `PARTITION BY` expression when inserting data into `GCS` table, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency. **Examples** From d0a54ab21b2107ee6893a7480533c79c2919fd75 Mon Sep 17 00:00:00 2001 From: Kuba Kaflik Date: Tue, 21 Mar 2023 14:45:58 +0100 Subject: [PATCH 010/689] Update GCS table function docs --- docs/en/sql-reference/table-functions/gcs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/table-functions/gcs.md b/docs/en/sql-reference/table-functions/gcs.md index dcf49a5108b..bfa7f36fa48 100644 --- a/docs/en/sql-reference/table-functions/gcs.md +++ b/docs/en/sql-reference/table-functions/gcs.md @@ -22,7 +22,7 @@ The GCS Table Function integrates with Google Cloud Storage by using the GCS XML **Arguments** -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/gcs.md#wildcards-in-path). +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. :::note GCS The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API: From 576efc1da3384148664202acef1cdbd27ddc8a08 Mon Sep 17 00:00:00 2001 From: Kuba Kaflik Date: Wed, 22 Mar 2023 06:58:09 +0100 Subject: [PATCH 011/689] register GCP function in factory --- src/TableFunctions/TableFunctionS3.cpp | 5 +++++ src/TableFunctions/registerTableFunctions.cpp | 1 + src/TableFunctions/registerTableFunctions.h | 1 + 3 files changed, 7 insertions(+) diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index f082b192ee0..6f4e6acec8a 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -183,6 +183,11 @@ void registerTableFunctionOSS(TableFunctionFactory & factory) factory.registerFunction(); } +void registerTableFunctionGCS(TableFunctionFactory & factory) +{ + factory.registerFunction(); +} + } #endif diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index 7b2b989e724..c692173e689 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -28,6 +28,7 @@ void registerTableFunctions() registerTableFunctionS3Cluster(factory); registerTableFunctionCOS(factory); registerTableFunctionOSS(factory); + registerTableFunctionGCS(factory); registerTableFunctionHudi(factory); registerTableFunctionDeltaLake(factory); #if USE_AVRO diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index 911aae199e2..af1b7129ec4 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -25,6 +25,7 @@ void registerTableFunctionS3(TableFunctionFactory & factory); void registerTableFunctionS3Cluster(TableFunctionFactory & factory); void registerTableFunctionCOS(TableFunctionFactory & factory); void registerTableFunctionOSS(TableFunctionFactory & factory); +void registerTableFunctionGCS(TableFunctionFactory & factory); void registerTableFunctionHudi(TableFunctionFactory & factory); void registerTableFunctionDeltaLake(TableFunctionFactory & factory); #if USE_AVRO From d8e8dc39e8b929ce0299f96a1be4065a74b45b99 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 3 Apr 2023 01:29:55 +0200 Subject: [PATCH 012/689] Fix test --- .../test_vertical_merges_from_compact_parts.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py index 1781ed7c976..6fdf0942ee4 100644 --- a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py +++ b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py @@ -41,7 +41,9 @@ def test_vertical_merges_from_comapact_parts(start_cluster): vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 1, min_bytes_for_wide_part = 0, - min_rows_for_wide_part = 100; + min_rows_for_wide_part = 100, + compress_marks = 0, + compress_primary_key = 0; """.format( i ) From 01728d6d391c07e40c42cbd50151be5de472a62e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Apr 2023 01:05:06 +0200 Subject: [PATCH 013/689] Fix tests --- .../configs/no_compress_marks.xml | 6 ++++++ .../test_backward_compatibility/configs/wide_parts_only.xml | 4 +--- tests/integration/test_backward_compatibility/test.py | 2 +- .../test_vertical_merges_from_compact_parts.py | 5 ++--- 4 files changed, 10 insertions(+), 7 deletions(-) create mode 100644 tests/integration/test_backward_compatibility/configs/no_compress_marks.xml diff --git a/tests/integration/test_backward_compatibility/configs/no_compress_marks.xml b/tests/integration/test_backward_compatibility/configs/no_compress_marks.xml new file mode 100644 index 00000000000..cc968525bbb --- /dev/null +++ b/tests/integration/test_backward_compatibility/configs/no_compress_marks.xml @@ -0,0 +1,6 @@ + + + 0 + 0 + + diff --git a/tests/integration/test_backward_compatibility/configs/wide_parts_only.xml b/tests/integration/test_backward_compatibility/configs/wide_parts_only.xml index c823dd02d5a..e9cf053f1c5 100644 --- a/tests/integration/test_backward_compatibility/configs/wide_parts_only.xml +++ b/tests/integration/test_backward_compatibility/configs/wide_parts_only.xml @@ -1,7 +1,5 @@ 0 - 0 - 0 - + diff --git a/tests/integration/test_backward_compatibility/test.py b/tests/integration/test_backward_compatibility/test.py index 01ed02720f8..e4ee6aa2de9 100644 --- a/tests/integration/test_backward_compatibility/test.py +++ b/tests/integration/test_backward_compatibility/test.py @@ -12,7 +12,7 @@ node1 = cluster.add_instance( with_installed_binary=True, ) node2 = cluster.add_instance( - "node2", main_configs=["configs/wide_parts_only.xml"], with_zookeeper=True + "node2", main_configs=["configs/wide_parts_only.xml", "configs/no_compress_marks.xml"], with_zookeeper=True ) diff --git a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py index 6fdf0942ee4..4b144a37ce9 100644 --- a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py +++ b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py @@ -14,6 +14,7 @@ node_old = cluster.add_instance( ) node_new = cluster.add_instance( "node2", + main_configs=["configs/no_compress_marks.xml"], with_zookeeper=True, stay_alive=True, ) @@ -41,9 +42,7 @@ def test_vertical_merges_from_comapact_parts(start_cluster): vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 1, min_bytes_for_wide_part = 0, - min_rows_for_wide_part = 100, - compress_marks = 0, - compress_primary_key = 0; + min_rows_for_wide_part = 100; """.format( i ) From 806c7a5f9d1a6869cc464f79b6bfd83028874540 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 3 Apr 2023 23:13:50 +0000 Subject: [PATCH 014/689] Automatic style fix --- tests/integration/test_backward_compatibility/test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_backward_compatibility/test.py b/tests/integration/test_backward_compatibility/test.py index e4ee6aa2de9..ea1d3ab9c07 100644 --- a/tests/integration/test_backward_compatibility/test.py +++ b/tests/integration/test_backward_compatibility/test.py @@ -12,7 +12,9 @@ node1 = cluster.add_instance( with_installed_binary=True, ) node2 = cluster.add_instance( - "node2", main_configs=["configs/wide_parts_only.xml", "configs/no_compress_marks.xml"], with_zookeeper=True + "node2", + main_configs=["configs/wide_parts_only.xml", "configs/no_compress_marks.xml"], + with_zookeeper=True, ) From 49c95a535ab5982a03b3dea731692893ed559806 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Wed, 12 Apr 2023 20:26:57 +0200 Subject: [PATCH 015/689] Updated to add error or completed status in zookeeper for a cluster for backup/restore, to avoid interpreting previously failed backup/restore when zookeeper is unable to remove nodes --- src/Backups/BackupCoordinationLocal.cpp | 2 +- src/Backups/BackupCoordinationLocal.h | 2 +- src/Backups/BackupCoordinationRemote.cpp | 12 ++++++---- src/Backups/BackupCoordinationRemote.h | 2 +- src/Backups/BackupCoordinationStage.h | 4 ++++ src/Backups/BackupCoordinationStageSync.cpp | 14 ++++++++++++ src/Backups/BackupCoordinationStageSync.h | 1 + src/Backups/BackupsWorker.cpp | 25 +++++++++++++-------- src/Backups/IBackupCoordination.h | 2 +- src/Backups/IRestoreCoordination.h | 2 +- src/Backups/RestoreCoordinationLocal.cpp | 2 +- src/Backups/RestoreCoordinationLocal.h | 2 +- src/Backups/RestoreCoordinationRemote.cpp | 12 ++++++---- src/Backups/RestoreCoordinationRemote.h | 2 +- 14 files changed, 59 insertions(+), 25 deletions(-) diff --git a/src/Backups/BackupCoordinationLocal.cpp b/src/Backups/BackupCoordinationLocal.cpp index 27e0f173cf3..47b67693114 100644 --- a/src/Backups/BackupCoordinationLocal.cpp +++ b/src/Backups/BackupCoordinationLocal.cpp @@ -15,7 +15,7 @@ BackupCoordinationLocal::BackupCoordinationLocal(bool plain_backup_) BackupCoordinationLocal::~BackupCoordinationLocal() = default; -void BackupCoordinationLocal::setStage(const String &, const String &) +void BackupCoordinationLocal::setStage(const String &, const String &, const bool &) { } diff --git a/src/Backups/BackupCoordinationLocal.h b/src/Backups/BackupCoordinationLocal.h index 60fcc014720..1f6bb84972e 100644 --- a/src/Backups/BackupCoordinationLocal.h +++ b/src/Backups/BackupCoordinationLocal.h @@ -22,7 +22,7 @@ public: BackupCoordinationLocal(bool plain_backup_); ~BackupCoordinationLocal() override; - void setStage(const String & new_stage, const String & message) override; + void setStage(const String & new_stage, const String & message, const bool & for_cluster = false) override; void setError(const Exception & exception) override; Strings waitForStage(const String & stage_to_wait) override; Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override; diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp index 8e6b5db91b1..48f1ce3eef7 100644 --- a/src/Backups/BackupCoordinationRemote.cpp +++ b/src/Backups/BackupCoordinationRemote.cpp @@ -252,13 +252,17 @@ void BackupCoordinationRemote::removeAllNodes() } -void BackupCoordinationRemote::setStage(const String & new_stage, const String & message) +void BackupCoordinationRemote::setStage(const String & new_stage, const String & message, const bool & for_cluster) { - stage_sync->set(current_host, new_stage, message); + if (for_cluster) + stage_sync->setStageForCluster(new_stage); + else + stage_sync->set(current_host, new_stage, message); } void BackupCoordinationRemote::setError(const Exception & exception) { + stage_sync->setStageForCluster(Stage::ERROR); stage_sync->setError(current_host, exception); } @@ -777,8 +781,8 @@ bool BackupCoordinationRemote::hasConcurrentBackups(const std::atomic &) String status; if (zk->tryGet(root_zookeeper_path + "/" + existing_backup_path + "/stage", status)) { - /// If status is not COMPLETED it could be because the backup failed, check if 'error' exists - if (status != Stage::COMPLETED && !zk->exists(root_zookeeper_path + "/" + existing_backup_path + "/error")) + /// Check if some other restore is in progress + if (status == Stage::SCHEDULED_TO_START) { LOG_WARNING(log, "Found a concurrent backup: {}, current backup: {}", existing_backup_uuid, toString(backup_uuid)); result = true; diff --git a/src/Backups/BackupCoordinationRemote.h b/src/Backups/BackupCoordinationRemote.h index 949dd9c9bf0..40ce2ae6ccc 100644 --- a/src/Backups/BackupCoordinationRemote.h +++ b/src/Backups/BackupCoordinationRemote.h @@ -33,7 +33,7 @@ public: ~BackupCoordinationRemote() override; - void setStage(const String & new_stage, const String & message) override; + void setStage(const String & new_stage, const String & message, const bool & for_cluster = false) override; void setError(const Exception & exception) override; Strings waitForStage(const String & stage_to_wait) override; Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override; diff --git a/src/Backups/BackupCoordinationStage.h b/src/Backups/BackupCoordinationStage.h index 40a4b262caa..41cd66346a2 100644 --- a/src/Backups/BackupCoordinationStage.h +++ b/src/Backups/BackupCoordinationStage.h @@ -43,6 +43,10 @@ namespace BackupCoordinationStage /// Coordination stage meaning that a host finished its work. constexpr const char * COMPLETED = "completed"; + + /// Coordination stage meaning that backup/restore has failed due to an error + /// Check '/error' for the error message + constexpr const char * ERROR = "error"; } } diff --git a/src/Backups/BackupCoordinationStageSync.cpp b/src/Backups/BackupCoordinationStageSync.cpp index effb00085c3..5cbeec0ec76 100644 --- a/src/Backups/BackupCoordinationStageSync.cpp +++ b/src/Backups/BackupCoordinationStageSync.cpp @@ -61,6 +61,20 @@ void BackupCoordinationStageSync::set(const String & current_host, const String }); } +void BackupCoordinationStageSync::setStageForCluster(const String & new_stage) +{ + auto holder = with_retries.createRetriesControlHolder("setStageForCluster"); + holder.retries_ctl.retryLoop( + [&, &zookeeper = holder.faulty_zookeeper]() + { + with_retries.renewZooKeeper(zookeeper); + zookeeper->trySet(zookeeper_path, new_stage); + auto code = zookeeper->trySet(zookeeper_path, new_stage); + if (code != Coordination::Error::ZOK) + throw zkutil::KeeperException(code, zookeeper_path); + }); +} + void BackupCoordinationStageSync::setError(const String & current_host, const Exception & exception) { auto holder = with_retries.createRetriesControlHolder("setError"); diff --git a/src/Backups/BackupCoordinationStageSync.h b/src/Backups/BackupCoordinationStageSync.h index 56081f8779c..9dde4e3095f 100644 --- a/src/Backups/BackupCoordinationStageSync.h +++ b/src/Backups/BackupCoordinationStageSync.h @@ -16,6 +16,7 @@ public: /// Sets the stage of the current host and signal other hosts if there were other hosts waiting for that. void set(const String & current_host, const String & new_stage, const String & message); + void setStageForCluster(const String & new_stage); void setError(const String & current_host, const Exception & exception); /// Sets the stage of the current host and waits until all hosts come to the same stage. diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 4b17174a8de..aae9cfd620f 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -368,6 +368,7 @@ void BackupsWorker::doBackup( /// Wait until all the hosts have written their backup entries. backup_coordination->waitForStage(Stage::COMPLETED); + backup_coordination->setStage(Stage::COMPLETED, /* message */ "", /* for_cluster */ true); } else { @@ -654,12 +655,26 @@ void BackupsWorker::doRestore( /// (If this isn't ON CLUSTER query RestorerFromBackup will check access rights later.) ClusterPtr cluster; bool on_cluster = !restore_query->cluster.empty(); + if (on_cluster) { restore_query->cluster = context->getMacros()->expand(restore_query->cluster); cluster = context->getCluster(restore_query->cluster); restore_settings.cluster_host_ids = cluster->getHostIDs(); + } + /// Make a restore coordination. + if (!restore_coordination) + restore_coordination = makeRestoreCoordination(context, restore_settings, /* remote= */ on_cluster); + + if (!allow_concurrent_restores && restore_coordination->hasConcurrentRestores(std::ref(num_active_restores))) + throw Exception( + ErrorCodes::CONCURRENT_ACCESS_NOT_SUPPORTED, + "Concurrent restores not supported, turn on setting 'allow_concurrent_restores'"); + + + if (on_cluster) + { /// We cannot just use access checking provided by the function executeDDLQueryOnCluster(): it would be incorrect /// because different replicas can contain different set of tables and so the required access rights can differ too. /// So the right way is pass through the entire cluster and check access for each host. @@ -676,15 +691,6 @@ void BackupsWorker::doRestore( } } - /// Make a restore coordination. - if (!restore_coordination) - restore_coordination = makeRestoreCoordination(context, restore_settings, /* remote= */ on_cluster); - - if (!allow_concurrent_restores && restore_coordination->hasConcurrentRestores(std::ref(num_active_restores))) - throw Exception( - ErrorCodes::CONCURRENT_ACCESS_NOT_SUPPORTED, - "Concurrent restores not supported, turn on setting 'allow_concurrent_restores'"); - /// Do RESTORE. if (on_cluster) { @@ -703,6 +709,7 @@ void BackupsWorker::doRestore( /// Wait until all the hosts have written their backup entries. restore_coordination->waitForStage(Stage::COMPLETED); + restore_coordination->setStage(Stage::COMPLETED, /* message */ "", /* for_cluster */ true); } else { diff --git a/src/Backups/IBackupCoordination.h b/src/Backups/IBackupCoordination.h index 75d9202374b..614e6a16db8 100644 --- a/src/Backups/IBackupCoordination.h +++ b/src/Backups/IBackupCoordination.h @@ -21,7 +21,7 @@ public: virtual ~IBackupCoordination() = default; /// Sets the current stage and waits for other hosts to come to this stage too. - virtual void setStage(const String & new_stage, const String & message) = 0; + virtual void setStage(const String & new_stage, const String & message, const bool & for_cluster = false) = 0; virtual void setError(const Exception & exception) = 0; virtual Strings waitForStage(const String & stage_to_wait) = 0; virtual Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) = 0; diff --git a/src/Backups/IRestoreCoordination.h b/src/Backups/IRestoreCoordination.h index 2f9e8d171f6..599a698a1f9 100644 --- a/src/Backups/IRestoreCoordination.h +++ b/src/Backups/IRestoreCoordination.h @@ -18,7 +18,7 @@ public: virtual ~IRestoreCoordination() = default; /// Sets the current stage and waits for other hosts to come to this stage too. - virtual void setStage(const String & new_stage, const String & message) = 0; + virtual void setStage(const String & new_stage, const String & message, const bool & for_cluster = false) = 0; virtual void setError(const Exception & exception) = 0; virtual Strings waitForStage(const String & stage_to_wait) = 0; virtual Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) = 0; diff --git a/src/Backups/RestoreCoordinationLocal.cpp b/src/Backups/RestoreCoordinationLocal.cpp index 068c4fe7e52..f689277f5b6 100644 --- a/src/Backups/RestoreCoordinationLocal.cpp +++ b/src/Backups/RestoreCoordinationLocal.cpp @@ -11,7 +11,7 @@ RestoreCoordinationLocal::RestoreCoordinationLocal() : log(&Poco::Logger::get("R RestoreCoordinationLocal::~RestoreCoordinationLocal() = default; -void RestoreCoordinationLocal::setStage(const String &, const String &) +void RestoreCoordinationLocal::setStage(const String &, const String &, const bool &) { } diff --git a/src/Backups/RestoreCoordinationLocal.h b/src/Backups/RestoreCoordinationLocal.h index e27f0d1ef88..4456ad966d4 100644 --- a/src/Backups/RestoreCoordinationLocal.h +++ b/src/Backups/RestoreCoordinationLocal.h @@ -19,7 +19,7 @@ public: ~RestoreCoordinationLocal() override; /// Sets the current stage and waits for other hosts to come to this stage too. - void setStage(const String & new_stage, const String & message) override; + void setStage(const String & new_stage, const String & message, const bool & for_cluster = false) override; void setError(const Exception & exception) override; Strings waitForStage(const String & stage_to_wait) override; Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override; diff --git a/src/Backups/RestoreCoordinationRemote.cpp b/src/Backups/RestoreCoordinationRemote.cpp index cc03f0c4a2a..0a89b1cd4e7 100644 --- a/src/Backups/RestoreCoordinationRemote.cpp +++ b/src/Backups/RestoreCoordinationRemote.cpp @@ -90,13 +90,17 @@ void RestoreCoordinationRemote::createRootNodes() }); } -void RestoreCoordinationRemote::setStage(const String & new_stage, const String & message) +void RestoreCoordinationRemote::setStage(const String & new_stage, const String & message, const bool & for_cluster) { - stage_sync->set(current_host, new_stage, message); + if (for_cluster) + stage_sync->setStageForCluster(new_stage); + else + stage_sync->set(current_host, new_stage, message); } void RestoreCoordinationRemote::setError(const Exception & exception) { + stage_sync->setStageForCluster(Stage::ERROR); stage_sync->setError(current_host, exception); } @@ -282,8 +286,8 @@ bool RestoreCoordinationRemote::hasConcurrentRestores(const std::atomic String status; if (zk->tryGet(root_zookeeper_path + "/" + existing_restore_path + "/stage", status)) { - /// If status is not COMPLETED it could be because the restore failed, check if 'error' exists - if (status != Stage::COMPLETED && !zk->exists(root_zookeeper_path + "/" + existing_restore_path + "/error")) + /// Check if some other restore is in progress + if (status == Stage::SCHEDULED_TO_START) { LOG_WARNING(log, "Found a concurrent restore: {}, current restore: {}", existing_restore_uuid, toString(restore_uuid)); result = true; diff --git a/src/Backups/RestoreCoordinationRemote.h b/src/Backups/RestoreCoordinationRemote.h index eb0fcff9c2d..21a38f01fa6 100644 --- a/src/Backups/RestoreCoordinationRemote.h +++ b/src/Backups/RestoreCoordinationRemote.h @@ -26,7 +26,7 @@ public: ~RestoreCoordinationRemote() override; /// Sets the current stage and waits for other hosts to come to this stage too. - void setStage(const String & new_stage, const String & message) override; + void setStage(const String & new_stage, const String & message, const bool & for_cluster = false) override; void setError(const Exception & exception) override; Strings waitForStage(const String & stage_to_wait) override; Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override; From d4b2297e9fa53336cd4d05919a1048ad742018cd Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Thu, 13 Apr 2023 09:53:39 +0200 Subject: [PATCH 016/689] Fixed comment --- src/Backups/BackupCoordinationRemote.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp index 48f1ce3eef7..cd4901eb5ae 100644 --- a/src/Backups/BackupCoordinationRemote.cpp +++ b/src/Backups/BackupCoordinationRemote.cpp @@ -781,7 +781,7 @@ bool BackupCoordinationRemote::hasConcurrentBackups(const std::atomic &) String status; if (zk->tryGet(root_zookeeper_path + "/" + existing_backup_path + "/stage", status)) { - /// Check if some other restore is in progress + /// Check if some other backup is in progress if (status == Stage::SCHEDULED_TO_START) { LOG_WARNING(log, "Found a concurrent backup: {}, current backup: {}", existing_backup_uuid, toString(backup_uuid)); From cf5d9a175ae7c61ad0a7293fd49a86a4f7e09f01 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Fri, 14 Apr 2023 16:34:19 +0200 Subject: [PATCH 017/689] Revert "Merge pull request #48760 from ClickHouse/revert-46089-background-memory-tracker" This reverts commit a61ed332239e4a35af1b9cd31479560da6f08cca, reversing changes made to 5f01b8a2b59f6f340c65bfbc4ed0dd655d997ff9. --- .../settings.md | 33 ++++++++++++++-- programs/server/Server.cpp | 20 ++++++++++ src/Common/CurrentMetrics.cpp | 1 + src/Common/MemoryTracker.cpp | 8 ++++ src/Common/MemoryTracker.h | 19 ++++++++++ src/Core/ServerSettings.h | 2 + src/Interpreters/ThreadStatusExt.cpp | 1 + src/Storages/MergeTree/MergeList.cpp | 5 +++ src/Storages/MergeTree/MergeList.h | 2 + src/Storages/StorageMergeTree.cpp | 11 +++++- src/Storages/StorageReplicatedMergeTree.cpp | 10 ++++- .../test_merges_memory_limit/__init__.py | 0 .../test_merges_memory_limit/test.py | 38 +++++++++++++++++++ 13 files changed, 145 insertions(+), 5 deletions(-) create mode 100644 tests/integration/test_merges_memory_limit/__init__.py create mode 100644 tests/integration/test_merges_memory_limit/test.py diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 7c97d0ab640..a9f0cc276ff 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1045,7 +1045,7 @@ Default value: `0`. ## background_pool_size {#background_pool_size} -Sets the number of threads performing background merges and mutations for tables with MergeTree engines. This setting is also could be applied at server startup from the `default` profile configuration for backward compatibility at the ClickHouse server start. You can only increase the number of threads at runtime. To lower the number of threads you have to restart the server. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance. +Sets the number of threads performing background merges and mutations for tables with MergeTree engines. This setting is also could be applied at server startup from the `default` profile configuration for backward compatibility at the ClickHouse server start. You can only increase the number of threads at runtime. To lower the number of threads you have to restart the server. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance. Before changing it, please also take a look at related MergeTree settings, such as [number_of_free_entries_in_pool_to_lower_max_size_of_merge](../../operations/settings/merge-tree-settings.md#number-of-free-entries-in-pool-to-lower-max-size-of-merge) and [number_of_free_entries_in_pool_to_execute_mutation](../../operations/settings/merge-tree-settings.md#number-of-free-entries-in-pool-to-execute-mutation). @@ -1063,8 +1063,8 @@ Default value: 16. ## background_merges_mutations_concurrency_ratio {#background_merges_mutations_concurrency_ratio} -Sets a ratio between the number of threads and the number of background merges and mutations that can be executed concurrently. For example if the ratio equals to 2 and -`background_pool_size` is set to 16 then ClickHouse can execute 32 background merges concurrently. This is possible, because background operation could be suspended and postponed. This is needed to give small merges more execution priority. You can only increase this ratio at runtime. To lower it you have to restart the server. +Sets a ratio between the number of threads and the number of background merges and mutations that can be executed concurrently. For example, if the ratio equals to 2 and +`background_pool_size` is set to 16 then ClickHouse can execute 32 background merges concurrently. This is possible, because background operations could be suspended and postponed. This is needed to give small merges more execution priority. You can only increase this ratio at runtime. To lower it you have to restart the server. The same as for `background_pool_size` setting `background_merges_mutations_concurrency_ratio` could be applied from the `default` profile for backward compatibility. Possible values: @@ -1079,6 +1079,33 @@ Default value: 2. 3 ``` +## merges_mutations_memory_usage_soft_limit {#merges_mutations_memory_usage_soft_limit} + +Sets the limit on how much RAM is allowed to use for performing merge and mutation operations. +Zero means unlimited. +If ClickHouse reaches this limit, it won't schedule any new background merge or mutation operations but will continue to execute already scheduled tasks. + +Possible values: + +- Any positive integer. + +**Example** + +```xml +0 +``` + +## merges_mutations_memory_usage_to_ram_ratio {#merges_mutations_memory_usage_to_ram_ratio} + +The default `merges_mutations_memory_usage_soft_limit` value is calculated as `memory_amount * merges_mutations_memory_usage_to_ram_ratio`. + +Default value: `0.5`. + +**See also** + +- [max_memory_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage) +- [merges_mutations_memory_usage_soft_limit](#merges_mutations_memory_usage_soft_limit) + ## background_merges_mutations_scheduling_policy {#background_merges_mutations_scheduling_policy} Algorithm used to select next merge or mutation to be executed by background thread pool. Policy may be changed at runtime without server restart. diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 8c0d50bae55..cba7a4c4778 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -135,6 +135,7 @@ namespace CurrentMetrics extern const Metric Revision; extern const Metric VersionInteger; extern const Metric MemoryTracking; + extern const Metric MergesMutationsMemoryTracking; extern const Metric MaxDDLEntryID; extern const Metric MaxPushedDDLEntryID; } @@ -1225,6 +1226,25 @@ try total_memory_tracker.setDescription("(total)"); total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking); + size_t merges_mutations_memory_usage_soft_limit = server_settings_.merges_mutations_memory_usage_soft_limit; + + size_t default_merges_mutations_server_memory_usage = static_cast(memory_amount * server_settings_.merges_mutations_memory_usage_to_ram_ratio); + if (merges_mutations_memory_usage_soft_limit == 0 || merges_mutations_memory_usage_soft_limit > default_merges_mutations_server_memory_usage) + { + merges_mutations_memory_usage_soft_limit = default_merges_mutations_server_memory_usage; + LOG_WARNING(log, "Setting merges_mutations_memory_usage_soft_limit was set to {}" + " ({} available * {:.2f} merges_mutations_memory_usage_to_ram_ratio)", + formatReadableSizeWithBinarySuffix(merges_mutations_memory_usage_soft_limit), + formatReadableSizeWithBinarySuffix(memory_amount), + server_settings_.merges_mutations_memory_usage_to_ram_ratio); + } + + LOG_INFO(log, "Merges and mutations memory limit is set to {}", + formatReadableSizeWithBinarySuffix(merges_mutations_memory_usage_soft_limit)); + background_memory_tracker.setSoftLimit(merges_mutations_memory_usage_soft_limit); + background_memory_tracker.setDescription("(background)"); + background_memory_tracker.setMetric(CurrentMetrics::MergesMutationsMemoryTracking); + total_memory_tracker.setAllowUseJemallocMemory(server_settings_.allow_use_jemalloc_memory); auto * global_overcommit_tracker = global_context->getGlobalOvercommitTracker(); diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 81c1481e2de..ea248d996a7 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -53,6 +53,7 @@ M(QueryThread, "Number of query processing threads") \ M(ReadonlyReplica, "Number of Replicated tables that are currently in readonly state due to re-initialization after ZooKeeper session loss or due to startup without ZooKeeper configured.") \ M(MemoryTracking, "Total amount of memory (bytes) allocated by the server.") \ + M(MergesMutationsMemoryTracking, "Total amount of memory (bytes) allocated by background tasks (merges and mutations).") \ M(EphemeralNode, "Number of ephemeral nodes hold in ZooKeeper.") \ M(ZooKeeperSession, "Number of sessions (connections) to ZooKeeper. Should be no more than one, because using more than one connection to ZooKeeper may lead to bugs due to lack of linearizability (stale reads) that ZooKeeper consistency model allows.") \ M(ZooKeeperWatch, "Number of watches (event subscriptions) in ZooKeeper.") \ diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 674d8d469af..9bff365483d 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -96,6 +96,7 @@ using namespace std::chrono_literals; static constexpr size_t log_peak_memory_usage_every = 1ULL << 30; MemoryTracker total_memory_tracker(nullptr, VariableContext::Global); +MemoryTracker background_memory_tracker(&total_memory_tracker, VariableContext::User); std::atomic MemoryTracker::free_memory_in_allocator_arenas; @@ -528,3 +529,10 @@ void MemoryTracker::setOrRaiseProfilerLimit(Int64 value) while ((value == 0 || old_value < value) && !profiler_limit.compare_exchange_weak(old_value, value)) ; } + +bool canEnqueueBackgroundTask() +{ + auto limit = background_memory_tracker.getSoftLimit(); + auto amount = background_memory_tracker.get(); + return limit == 0 || amount < limit; +} diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h index 0d7748856bd..260005fd536 100644 --- a/src/Common/MemoryTracker.h +++ b/src/Common/MemoryTracker.h @@ -110,6 +110,22 @@ public: return amount.load(std::memory_order_relaxed); } + // Merges and mutations may pass memory ownership to other threads thus in the end of execution + // MemoryTracker for background task may have a non-zero counter. + // This method is intended to fix the counter inside of background_memory_tracker. + // NOTE: We can't use alloc/free methods to do it, because they also will change the value inside + // of total_memory_tracker. + void adjustOnBackgroundTaskEnd(const MemoryTracker * child) + { + auto background_memory_consumption = child->amount.load(std::memory_order_relaxed); + amount.fetch_sub(background_memory_consumption, std::memory_order_relaxed); + + // Also fix CurrentMetrics::MergesMutationsMemoryTracking + auto metric_loaded = metric.load(std::memory_order_relaxed); + if (metric_loaded != CurrentMetrics::end()) + CurrentMetrics::sub(metric_loaded, background_memory_consumption); + } + Int64 getPeak() const { return peak.load(std::memory_order_relaxed); @@ -220,3 +236,6 @@ public: }; extern MemoryTracker total_memory_tracker; +extern MemoryTracker background_memory_tracker; + +bool canEnqueueBackgroundTask(); diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index aabc89cc6d7..2d8c37783db 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -40,6 +40,8 @@ namespace DB M(String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0) \ M(UInt64, max_server_memory_usage, 0, "Limit on total memory usage. Zero means Unlimited.", 0) \ M(Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to ram ratio. Allows to lower max memory on low-memory systems.", 0) \ + M(UInt64, merges_mutations_memory_usage_soft_limit, 0, "Limit on total memory usage for merges and mutations. Zero means Unlimited.", 0) \ + M(Double, merges_mutations_memory_usage_to_ram_ratio, 0.5, "Same as merges_mutations_memory_usage_soft_limit but in to ram ratio. Allows to lower memory limit on low-memory systems.", 0) \ M(Bool, allow_use_jemalloc_memory, true, "Allows to use jemalloc memory.", 0) \ \ M(UInt64, max_concurrent_queries, 0, "Limit on total number of concurrently executed queries. Zero means Unlimited.", 0) \ diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index fd4a6b5e996..559652fe56c 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -84,6 +84,7 @@ ThreadGroupPtr ThreadGroup::createForBackgroundProcess(ContextPtr storage_contex group->memory_tracker.setProfilerStep(settings.memory_profiler_step); group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); group->memory_tracker.setSoftLimit(settings.memory_overcommit_ratio_denominator); + group->memory_tracker.setParent(&background_memory_tracker); if (settings.memory_tracker_fault_probability > 0.0) group->memory_tracker.setFaultProbability(settings.memory_tracker_fault_probability); diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp index 0bf662921ad..d54079bc7a5 100644 --- a/src/Storages/MergeTree/MergeList.cpp +++ b/src/Storages/MergeTree/MergeList.cpp @@ -78,4 +78,9 @@ MergeInfo MergeListElement::getInfo() const return res; } +MergeListElement::~MergeListElement() +{ + background_memory_tracker.adjustOnBackgroundTaskEnd(&getMemoryTracker()); +} + } diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h index 9c8c2ebd1e4..d8271a66b45 100644 --- a/src/Storages/MergeTree/MergeList.h +++ b/src/Storages/MergeTree/MergeList.h @@ -113,6 +113,8 @@ struct MergeListElement : boost::noncopyable MergeListElement * ptr() { return this; } MergeListElement & ref() { return *this; } + + ~MergeListElement(); }; /** Maintains a list of currently running merges. diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 34bf5d55270..6cb3ce35e5b 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,7 @@ #include #include #include +#include namespace DB { @@ -918,7 +920,14 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( SelectPartsDecision select_decision = SelectPartsDecision::CANNOT_SELECT; - if (partition_id.empty()) + if (!canEnqueueBackgroundTask()) + { + if (out_disable_reason) + *out_disable_reason = fmt::format("Current background tasks memory usage ({}) is more than the limit ({})", + formatReadableSizeWithBinarySuffix(background_memory_tracker.get()), + formatReadableSizeWithBinarySuffix(background_memory_tracker.getSoftLimit())); + } + else if (partition_id.empty()) { UInt64 max_source_parts_size = merger_mutator.getMaxSourcePartsSizeForMerge(); bool merge_with_ttl_allowed = getTotalMergesWithTTLInMergeList() < data_settings->max_number_of_merges_with_ttl_in_pool; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 5cd02c33d55..4a36cf03c2a 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -3223,7 +3224,14 @@ void StorageReplicatedMergeTree::mergeSelectingTask() auto merges_and_mutations_queued = queue.countMergesAndPartMutations(); size_t merges_and_mutations_sum = merges_and_mutations_queued.merges + merges_and_mutations_queued.mutations; - if (merges_and_mutations_sum >= storage_settings_ptr->max_replicated_merges_in_queue) + if (!canEnqueueBackgroundTask()) + { + LOG_TRACE(log, "Reached memory limit for the background tasks ({}), so won't select new parts to merge or mutate." + "Current background tasks memory usage: {}.", + formatReadableSizeWithBinarySuffix(background_memory_tracker.getSoftLimit()), + formatReadableSizeWithBinarySuffix(background_memory_tracker.get())); + } + else if (merges_and_mutations_sum >= storage_settings_ptr->max_replicated_merges_in_queue) { LOG_TRACE(log, "Number of queued merges ({}) and part mutations ({})" " is greater than max_replicated_merges_in_queue ({}), so won't select new parts to merge or mutate.", diff --git a/tests/integration/test_merges_memory_limit/__init__.py b/tests/integration/test_merges_memory_limit/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_merges_memory_limit/test.py b/tests/integration/test_merges_memory_limit/test.py new file mode 100644 index 00000000000..04729f3a01c --- /dev/null +++ b/tests/integration/test_merges_memory_limit/test.py @@ -0,0 +1,38 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance("node") + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_memory_limit_success(): + node.query( + "CREATE TABLE test_merge_oom ENGINE=AggregatingMergeTree ORDER BY id EMPTY AS SELECT number%1024 AS id, arrayReduce( 'groupArrayState', arrayMap( x-> randomPrintableASCII(100), range(8192))) fat_state FROM numbers(20000)" + ) + node.query("SYSTEM STOP MERGES test_merge_oom") + node.query( + "INSERT INTO test_merge_oom SELECT number%1024 AS id, arrayReduce( 'groupArrayState', arrayMap( x-> randomPrintableASCII(100), range(8192))) fat_state FROM numbers(10000)" + ) + node.query( + "INSERT INTO test_merge_oom SELECT number%1024 AS id, arrayReduce( 'groupArrayState', arrayMap( x-> randomPrintableASCII(100), range(8192))) fat_state FROM numbers(10000)" + ) + node.query( + "INSERT INTO test_merge_oom SELECT number%1024 AS id, arrayReduce( 'groupArrayState', arrayMap( x-> randomPrintableASCII(100), range(8192))) fat_state FROM numbers(10000)" + ) + _, error = node.query_and_get_answer_with_error( + "SYSTEM START MERGES test_merge_oom;SET optimize_throw_if_noop=1;OPTIMIZE TABLE test_merge_oom FINAL" + ) + + assert not error + node.query("DROP TABLE test_merge_oom") From a1f21a5fc427e860199aa9e1db3c1c4177a340f7 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Fri, 14 Apr 2023 16:00:32 +0200 Subject: [PATCH 018/689] Do not log peak for background memory tracker --- src/Common/MemoryTracker.cpp | 8 ++++++-- src/Common/MemoryTracker.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 9bff365483d..81cac2617c5 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -96,13 +96,17 @@ using namespace std::chrono_literals; static constexpr size_t log_peak_memory_usage_every = 1ULL << 30; MemoryTracker total_memory_tracker(nullptr, VariableContext::Global); -MemoryTracker background_memory_tracker(&total_memory_tracker, VariableContext::User); +MemoryTracker background_memory_tracker(&total_memory_tracker, VariableContext::User, false); std::atomic MemoryTracker::free_memory_in_allocator_arenas; MemoryTracker::MemoryTracker(VariableContext level_) : parent(&total_memory_tracker), level(level_) {} MemoryTracker::MemoryTracker(MemoryTracker * parent_, VariableContext level_) : parent(parent_), level(level_) {} - +MemoryTracker::MemoryTracker(MemoryTracker * parent_, VariableContext level_, bool log_peak_memory_usage_in_destructor_) + : parent(parent_) + , log_peak_memory_usage_in_destructor(log_peak_memory_usage_in_destructor_) + , level(level_) +{} MemoryTracker::~MemoryTracker() { diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h index 260005fd536..4e29d40c953 100644 --- a/src/Common/MemoryTracker.h +++ b/src/Common/MemoryTracker.h @@ -98,6 +98,7 @@ public: explicit MemoryTracker(VariableContext level_ = VariableContext::Thread); explicit MemoryTracker(MemoryTracker * parent_, VariableContext level_ = VariableContext::Thread); + MemoryTracker(MemoryTracker * parent_, VariableContext level_, bool log_peak_memory_usage_in_destructor_); ~MemoryTracker(); From 74c6ca558b3301427368941f3e0df031b04cc10d Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Fri, 14 Apr 2023 18:03:46 +0200 Subject: [PATCH 019/689] Removed line from test_disallow_concurrrency for CI checks --- .../test_backup_restore_on_cluster/test_disallow_concurrency.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py index 0d8fad96438..a76af00d339 100644 --- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py +++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py @@ -6,7 +6,6 @@ import concurrent from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV, assert_eq_with_retry - cluster = ClickHouseCluster(__file__) num_nodes = 10 From 93572ab42768195fccc77a809e149355a3f8065d Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Sat, 15 Apr 2023 13:43:04 +0200 Subject: [PATCH 020/689] Removed parameter from setStage function and added function setStageForCluster --- src/Backups/BackupCoordinationLocal.cpp | 6 +++++- src/Backups/BackupCoordinationLocal.h | 3 ++- src/Backups/BackupCoordinationRemote.cpp | 12 +++++++----- src/Backups/BackupCoordinationRemote.h | 3 ++- src/Backups/BackupsWorker.cpp | 4 ++-- src/Backups/IBackupCoordination.h | 3 ++- src/Backups/IRestoreCoordination.h | 3 ++- src/Backups/RestoreCoordinationLocal.cpp | 6 +++++- src/Backups/RestoreCoordinationLocal.h | 3 ++- src/Backups/RestoreCoordinationRemote.cpp | 12 +++++++----- src/Backups/RestoreCoordinationRemote.h | 3 ++- 11 files changed, 38 insertions(+), 20 deletions(-) diff --git a/src/Backups/BackupCoordinationLocal.cpp b/src/Backups/BackupCoordinationLocal.cpp index 47b67693114..5b7ee37618b 100644 --- a/src/Backups/BackupCoordinationLocal.cpp +++ b/src/Backups/BackupCoordinationLocal.cpp @@ -15,7 +15,11 @@ BackupCoordinationLocal::BackupCoordinationLocal(bool plain_backup_) BackupCoordinationLocal::~BackupCoordinationLocal() = default; -void BackupCoordinationLocal::setStage(const String &, const String &, const bool &) +void BackupCoordinationLocal::setStage(const String &, const String &) +{ +} + +void BackupCoordinationLocal::setStageForCluster(const String &) { } diff --git a/src/Backups/BackupCoordinationLocal.h b/src/Backups/BackupCoordinationLocal.h index 1f6bb84972e..f1ffa8e8517 100644 --- a/src/Backups/BackupCoordinationLocal.h +++ b/src/Backups/BackupCoordinationLocal.h @@ -22,7 +22,8 @@ public: BackupCoordinationLocal(bool plain_backup_); ~BackupCoordinationLocal() override; - void setStage(const String & new_stage, const String & message, const bool & for_cluster = false) override; + void setStage(const String & new_stage, const String & message) override; + void setStageForCluster(const String & new_stage) override; /// Sets stage for cluster void setError(const Exception & exception) override; Strings waitForStage(const String & stage_to_wait) override; Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override; diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp index cd4901eb5ae..c5c4efa3530 100644 --- a/src/Backups/BackupCoordinationRemote.cpp +++ b/src/Backups/BackupCoordinationRemote.cpp @@ -252,12 +252,14 @@ void BackupCoordinationRemote::removeAllNodes() } -void BackupCoordinationRemote::setStage(const String & new_stage, const String & message, const bool & for_cluster) +void BackupCoordinationRemote::setStage(const String & new_stage, const String & message) { - if (for_cluster) - stage_sync->setStageForCluster(new_stage); - else - stage_sync->set(current_host, new_stage, message); + stage_sync->set(current_host, new_stage, message); +} + +void BackupCoordinationRemote::setStageForCluster(const String & new_stage) +{ + stage_sync->setStageForCluster(new_stage); } void BackupCoordinationRemote::setError(const Exception & exception) diff --git a/src/Backups/BackupCoordinationRemote.h b/src/Backups/BackupCoordinationRemote.h index 40ce2ae6ccc..c659cb0d459 100644 --- a/src/Backups/BackupCoordinationRemote.h +++ b/src/Backups/BackupCoordinationRemote.h @@ -33,7 +33,8 @@ public: ~BackupCoordinationRemote() override; - void setStage(const String & new_stage, const String & message, const bool & for_cluster = false) override; + void setStage(const String & new_stage, const String & message) override; + void setStageForCluster(const String & new_stage) override; /// Sets stage for cluster void setError(const Exception & exception) override; Strings waitForStage(const String & stage_to_wait) override; Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override; diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index aae9cfd620f..de05cc2b092 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -368,7 +368,7 @@ void BackupsWorker::doBackup( /// Wait until all the hosts have written their backup entries. backup_coordination->waitForStage(Stage::COMPLETED); - backup_coordination->setStage(Stage::COMPLETED, /* message */ "", /* for_cluster */ true); + backup_coordination->setStageForCluster(Stage::COMPLETED); } else { @@ -709,7 +709,7 @@ void BackupsWorker::doRestore( /// Wait until all the hosts have written their backup entries. restore_coordination->waitForStage(Stage::COMPLETED); - restore_coordination->setStage(Stage::COMPLETED, /* message */ "", /* for_cluster */ true); + restore_coordination->setStageForCluster(Stage::COMPLETED); } else { diff --git a/src/Backups/IBackupCoordination.h b/src/Backups/IBackupCoordination.h index 614e6a16db8..6caae1dd741 100644 --- a/src/Backups/IBackupCoordination.h +++ b/src/Backups/IBackupCoordination.h @@ -21,7 +21,8 @@ public: virtual ~IBackupCoordination() = default; /// Sets the current stage and waits for other hosts to come to this stage too. - virtual void setStage(const String & new_stage, const String & message, const bool & for_cluster = false) = 0; + virtual void setStage(const String & new_stage, const String & message) = 0; + virtual void setStageForCluster(const String & new_stage) = 0; virtual void setError(const Exception & exception) = 0; virtual Strings waitForStage(const String & stage_to_wait) = 0; virtual Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) = 0; diff --git a/src/Backups/IRestoreCoordination.h b/src/Backups/IRestoreCoordination.h index 599a698a1f9..a5c8db84c86 100644 --- a/src/Backups/IRestoreCoordination.h +++ b/src/Backups/IRestoreCoordination.h @@ -18,7 +18,8 @@ public: virtual ~IRestoreCoordination() = default; /// Sets the current stage and waits for other hosts to come to this stage too. - virtual void setStage(const String & new_stage, const String & message, const bool & for_cluster = false) = 0; + virtual void setStage(const String & new_stage, const String & message) = 0; + virtual void setStageForCluster(const String & new_stage) = 0; /// Sets stage for cluster virtual void setError(const Exception & exception) = 0; virtual Strings waitForStage(const String & stage_to_wait) = 0; virtual Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) = 0; diff --git a/src/Backups/RestoreCoordinationLocal.cpp b/src/Backups/RestoreCoordinationLocal.cpp index f689277f5b6..513204c931c 100644 --- a/src/Backups/RestoreCoordinationLocal.cpp +++ b/src/Backups/RestoreCoordinationLocal.cpp @@ -11,7 +11,11 @@ RestoreCoordinationLocal::RestoreCoordinationLocal() : log(&Poco::Logger::get("R RestoreCoordinationLocal::~RestoreCoordinationLocal() = default; -void RestoreCoordinationLocal::setStage(const String &, const String &, const bool &) +void RestoreCoordinationLocal::setStage(const String &, const String &) +{ +} + +void RestoreCoordinationLocal::setStageForCluster(const String &) { } diff --git a/src/Backups/RestoreCoordinationLocal.h b/src/Backups/RestoreCoordinationLocal.h index 4456ad966d4..0e4f4f01846 100644 --- a/src/Backups/RestoreCoordinationLocal.h +++ b/src/Backups/RestoreCoordinationLocal.h @@ -19,7 +19,8 @@ public: ~RestoreCoordinationLocal() override; /// Sets the current stage and waits for other hosts to come to this stage too. - void setStage(const String & new_stage, const String & message, const bool & for_cluster = false) override; + void setStage(const String & new_stage, const String & message) override; + void setStageForCluster(const String & new_stage) override; void setError(const Exception & exception) override; Strings waitForStage(const String & stage_to_wait) override; Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override; diff --git a/src/Backups/RestoreCoordinationRemote.cpp b/src/Backups/RestoreCoordinationRemote.cpp index 0a89b1cd4e7..2c2187a1eb5 100644 --- a/src/Backups/RestoreCoordinationRemote.cpp +++ b/src/Backups/RestoreCoordinationRemote.cpp @@ -90,12 +90,14 @@ void RestoreCoordinationRemote::createRootNodes() }); } -void RestoreCoordinationRemote::setStage(const String & new_stage, const String & message, const bool & for_cluster) +void RestoreCoordinationRemote::setStage(const String & new_stage, const String & message) { - if (for_cluster) - stage_sync->setStageForCluster(new_stage); - else - stage_sync->set(current_host, new_stage, message); + stage_sync->set(current_host, new_stage, message); +} + +void RestoreCoordinationRemote::setStageForCluster(const String & new_stage) +{ + stage_sync->setStageForCluster(new_stage); } void RestoreCoordinationRemote::setError(const Exception & exception) diff --git a/src/Backups/RestoreCoordinationRemote.h b/src/Backups/RestoreCoordinationRemote.h index 21a38f01fa6..947d08a66e5 100644 --- a/src/Backups/RestoreCoordinationRemote.h +++ b/src/Backups/RestoreCoordinationRemote.h @@ -26,7 +26,8 @@ public: ~RestoreCoordinationRemote() override; /// Sets the current stage and waits for other hosts to come to this stage too. - void setStage(const String & new_stage, const String & message, const bool & for_cluster = false) override; + void setStage(const String & new_stage, const String & message) override; + void setStageForCluster(const String & new_stage) override; void setError(const Exception & exception) override; Strings waitForStage(const String & stage_to_wait) override; Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override; From 6f501f983ed0f9d63df4abdbbe25afe64283776b Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Mon, 17 Apr 2023 17:47:03 +0200 Subject: [PATCH 021/689] Update test.py --- tests/integration/test_merges_memory_limit/test.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/integration/test_merges_memory_limit/test.py b/tests/integration/test_merges_memory_limit/test.py index 04729f3a01c..22bf143f4f7 100644 --- a/tests/integration/test_merges_memory_limit/test.py +++ b/tests/integration/test_merges_memory_limit/test.py @@ -27,9 +27,6 @@ def test_memory_limit_success(): node.query( "INSERT INTO test_merge_oom SELECT number%1024 AS id, arrayReduce( 'groupArrayState', arrayMap( x-> randomPrintableASCII(100), range(8192))) fat_state FROM numbers(10000)" ) - node.query( - "INSERT INTO test_merge_oom SELECT number%1024 AS id, arrayReduce( 'groupArrayState', arrayMap( x-> randomPrintableASCII(100), range(8192))) fat_state FROM numbers(10000)" - ) _, error = node.query_and_get_answer_with_error( "SYSTEM START MERGES test_merge_oom;SET optimize_throw_if_noop=1;OPTIMIZE TABLE test_merge_oom FINAL" ) From 8cc425cd2946243f3756eed577d128705e781ceb Mon Sep 17 00:00:00 2001 From: Manas Alekar Date: Tue, 18 Apr 2023 00:14:35 -0700 Subject: [PATCH 022/689] INTO OUTFILE enhancements --- src/Client/ClientBase.cpp | 14 +++++++++++++- src/Parsers/ASTQueryWithOutput.h | 1 + src/Parsers/ParserQueryWithOutput.cpp | 6 ++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 64a18479ca9..91c8c7e6e79 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -571,6 +571,12 @@ try CompressionMethod compression_method = chooseCompressionMethod(out_file, compression_method_string); UInt64 compression_level = 3; + if (query_with_output->is_outfile_append && compression_method != CompressionMethod::None) { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Cannot append to compressed file. Please use uncompressed file or remove APPEND keyword."); + } + if (query_with_output->compression_level) { const auto & compression_level_node = query_with_output->compression_level->as(); @@ -585,8 +591,14 @@ try range.second); } + auto flags = O_WRONLY | O_EXCL; + if (query_with_output->is_outfile_append) + flags |= O_APPEND; + else + flags |= O_CREAT; + out_file_buf = wrapWriteBufferWithCompressionMethod( - std::make_unique(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT), + std::make_unique(out_file, DBMS_DEFAULT_BUFFER_SIZE, flags), compression_method, static_cast(compression_level) ); diff --git a/src/Parsers/ASTQueryWithOutput.h b/src/Parsers/ASTQueryWithOutput.h index 892d911e2e2..09f08772468 100644 --- a/src/Parsers/ASTQueryWithOutput.h +++ b/src/Parsers/ASTQueryWithOutput.h @@ -16,6 +16,7 @@ class ASTQueryWithOutput : public IAST public: ASTPtr out_file; bool is_into_outfile_with_stdout; + bool is_outfile_append; ASTPtr format; ASTPtr settings_ast; ASTPtr compression; diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index 2fb7c406d74..1ba44fc9939 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -100,6 +100,12 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!out_file_p.parse(pos, query_with_output.out_file, expected)) return false; + ParserKeyword s_append("APPEND"); + if (s_append.ignore(pos, expected)) + { + query_with_output.is_outfile_append = true; + } + ParserKeyword s_stdout("AND STDOUT"); if (s_stdout.ignore(pos, expected)) { From 2efc7492270a7583654d88cb4a5391d01dccab92 Mon Sep 17 00:00:00 2001 From: Manas Alekar Date: Tue, 18 Apr 2023 00:30:00 -0700 Subject: [PATCH 023/689] Add test for INTO FILE ... APPEND. --- .../02001_append_output_file.reference | 2 ++ .../0_stateless/02001_append_output_file.sh | 15 +++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 tests/queries/0_stateless/02001_append_output_file.reference create mode 100755 tests/queries/0_stateless/02001_append_output_file.sh diff --git a/tests/queries/0_stateless/02001_append_output_file.reference b/tests/queries/0_stateless/02001_append_output_file.reference new file mode 100644 index 00000000000..6f51dfc24e1 --- /dev/null +++ b/tests/queries/0_stateless/02001_append_output_file.reference @@ -0,0 +1,2 @@ +Hello, World! From client. +Hello, World! From local. diff --git a/tests/queries/0_stateless/02001_append_output_file.sh b/tests/queries/0_stateless/02001_append_output_file.sh new file mode 100755 index 00000000000..5cbae5b8cb3 --- /dev/null +++ b/tests/queries/0_stateless/02001_append_output_file.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -e + +[ -e "${CLICKHOUSE_TMP}"/test_append_to_output_file] && rm "${CLICKHOUSE_TMP}"/test_append_to_output_file + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM (SELECT 'Hello, World! From client.') INTO OUTFILE '${CLICKHOUSE_TMP}/test_append_to_output_file'" +${CLICKHOUSE_LOCAL} --query "SELECT * FROM (SELECT 'Hello, World! From local.') INTO OUTFILE '${CLICKHOUSE_TMP}/test_append_to_output_file' APPEND" +cat ${CLICKHOUSE_TMP}/test_append_to_output_file + +rm -f "${CLICKHOUSE_TMP}/test_append_to_output_file" From 8874ede98a05a5e41670e5c9a666c2eaa6faced0 Mon Sep 17 00:00:00 2001 From: Manas Alekar Date: Tue, 18 Apr 2023 00:56:14 -0700 Subject: [PATCH 024/689] Fix style in test script. --- tests/queries/0_stateless/02001_append_output_file.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02001_append_output_file.sh b/tests/queries/0_stateless/02001_append_output_file.sh index 5cbae5b8cb3..47ac0183d91 100755 --- a/tests/queries/0_stateless/02001_append_output_file.sh +++ b/tests/queries/0_stateless/02001_append_output_file.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -[ -e "${CLICKHOUSE_TMP}"/test_append_to_output_file] && rm "${CLICKHOUSE_TMP}"/test_append_to_output_file +[ -e "${CLICKHOUSE_TMP}"/test_append_to_output_file ] && rm "${CLICKHOUSE_TMP}"/test_append_to_output_file ${CLICKHOUSE_CLIENT} --query "SELECT * FROM (SELECT 'Hello, World! From client.') INTO OUTFILE '${CLICKHOUSE_TMP}/test_append_to_output_file'" ${CLICKHOUSE_LOCAL} --query "SELECT * FROM (SELECT 'Hello, World! From local.') INTO OUTFILE '${CLICKHOUSE_TMP}/test_append_to_output_file' APPEND" From 3fe36a3db6215b57bd751362914f6da4ab93a14d Mon Sep 17 00:00:00 2001 From: serxa Date: Sun, 9 Apr 2023 12:50:26 +0000 Subject: [PATCH 025/689] wip AsyncLoader --- src/Common/AsyncLoader.h | 556 ++++++++++++++++++++++++++++++++++++++ src/Common/ErrorCodes.cpp | 3 + 2 files changed, 559 insertions(+) create mode 100644 src/Common/AsyncLoader.h diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h new file mode 100644 index 00000000000..dd35621443d --- /dev/null +++ b/src/Common/AsyncLoader.h @@ -0,0 +1,556 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class LoadJob; +using LoadJobPtr = std::shared_ptr; +using LoadJobSet = std::unordered_set; +class LoadTask; +class AsyncLoader; + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int ASYNC_LOAD_FAILED; + extern const int ASYNC_LOAD_CANCELED; + extern const int ASYNC_LOAD_DEPENDENCY_FAILED; +} + +enum class LoadStatus +{ + PENDING, // Load is not finished yet + SUCCESS, // Load was successful + FAILED // Load failed or canceled +}; + +class LoadJob : private boost::noncopyable +{ +public: + template + LoadJob(LoadJobSet && dependencies_, const String & name_, Func && func_) + : dependencies(std::move(dependencies_)) + , name(name_) + , func(std::forward(func_)) + {} + + LoadStatus status() const + { + std::unique_lock lock{mutex}; + return !is_finished ? LoadStatus::PENDING : (exception ? LoadStatus::FAILED : LoadStatus::SUCCESS); + } + + void wait() + { + std::unique_lock lock{mutex}; + finished.wait(lock, [this] { return is_finished; }); + } + + void get() + { + std::unique_lock lock{mutex}; + finished.wait(lock, [this] { return is_finished; }); + if (exception) + std::rethrow_exception(exception); + } + + const LoadJobSet dependencies; // jobs to be done before this one, with ownership + const String name; + std::atomic priority{0}; + +private: + friend class AsyncLoader; + + void setSuccess() + { + std::unique_lock lock{mutex}; + is_finished = true; + finished.notify_all(); + } + + void setFailure(const std::exception_ptr & ptr) + { + std::unique_lock lock{mutex}; + is_finished = true; + exception = ptr; + finished.notify_all(); + } + + std::function func; + + std::mutex mutex; + std::condition_variable finished; + bool is_finished = false; + std::exception_ptr exception; +}; + +template +LoadJobPtr makeLoadJob(LoadJobSet && dependencies, const String & name, Func && func) +{ + return std::make_shared(std::move(dependencies), name, std::forward(func)); +} + +// TODO(serxa): write good comment +class AsyncLoader : private boost::noncopyable +{ +private: + // Key of a pending job in ready queue + struct ReadyKey { + ssize_t priority; + UInt64 ready_seqno; + + bool operator<(const ReadyKey & rhs) const + { + if (priority == rhs.priority) + return ready_seqno < rhs.ready_seqno; + return priority > rhs.priority; + } + }; + + // Scheduling information for a pending job + struct Info { + ssize_t priority = 0; + size_t dependencies_left = 0; + UInt64 ready_seqno = 0; // zero means that job is not in ready queue + LoadJobSet dependent_jobs; // TODO(serxa): clean it on remove jobs + + ReadyKey key() const + { + return {priority, ready_seqno}; + } + }; + +public: + using Metric = CurrentMetrics::Metric; + + // Helper class that removes all not started jobs in destructor and wait all executing jobs to finish + class Task + { + public: + Task() + : loader(nullptr) + {} + + Task(AsyncLoader * loader_, LoadJobSet && jobs_) + : loader(loader_) + , jobs(std::move(jobs_)) + {} + + Task(const Task & o) = delete; + Task & operator=(const Task & o) = delete; + + Task(Task && o) noexcept + : loader(std::exchange(o.loader, nullptr)) + , jobs(std::move(o.jobs)) + {} + + Task & operator=(Task && o) noexcept + { + loader = std::exchange(o.loader, nullptr); + jobs = std::move(o.jobs); + return *this; + } + + void merge(Task && o) + { + if (!loader) + { + *this = std::move(o); + } + else + { + chassert(loader == o.loader); + jobs.merge(o.jobs); + o.loader = nullptr; + } + } + + ~Task() + { + // Remove jobs that are not ready and wait for jobs that are in progress + if (loader) + loader->remove(jobs); + } + + // Do not track jobs in this task + void detach() + { + loader = nullptr; + jobs.clear(); + } + + private: + AsyncLoader * loader; + LoadJobSet jobs; + }; + + AsyncLoader(Metric metric_threads, Metric metric_active_threads, size_t max_threads_) + : max_threads(max_threads_) + , pool(metric_threads, metric_active_threads, max_threads) + {} + + void start() + { + std::unique_lock lock{mutex}; + is_running = true; + for (size_t i = 0; workers < max_threads && i < ready_queue.size(); i++) + spawn(lock); + } + + void stop() + { + std::unique_lock lock{mutex}; + is_stopping = true; + } + + [[nodiscard]] Task schedule(LoadJobSet && jobs, ssize_t priority = 0) + { + std::unique_lock lock{mutex}; + + // Sanity checks + for (const auto & job : jobs) + { + if (job->status() != LoadStatus::PENDING) + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Trying to schedule already finished load job '{}'", job->name); + if (scheduled_jobs.contains(job)) + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Load job '{}' has been already scheduled", job->name); + } + + // TODO(serxa): ensure scheduled_jobs graph will have no cycles, otherwise we have infinite recursion and other strange stuff? + + // We do not want any exception to be throws after this point, because the following code is not exception-safe + DENY_ALLOCATIONS_IN_SCOPE; + + // Schedule all incoming jobs + for (const auto & job : jobs) + { + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + scheduled_jobs.emplace(job, Info{.priority = priority}); + )}; + job->priority.store(priority); // Set user-facing priority + } + + // Process incoming dependencies + for (const auto & job : jobs) + { + Info & info = scheduled_jobs.find(job)->second; + for (const auto & dep : job->dependencies) + { + // Register every dependency on scheduled job with back-link to dependent job + if (auto dep_info = scheduled_jobs.find(dep); dep_info != scheduled_jobs.end()) + { + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + dep_info->second.dependent_jobs.insert(job); + }); + info.dependencies_left++; + } + else + { + // TODO(serxa): check status: (1) pending: it is wrong - throw? (2) success: good - no dep. (3) failed: propagate failure! + } + + // Priority inheritance: prioritize deps to have at least given `priority` to avoid priority inversion + prioritize(dep, priority, lock); + } + + // Place jobs w/o dependencies to ready queue + if (info.dependencies_left == 0) + enqueue(info, job, lock); + } + + return Task(this, std::move(jobs)); + } + + // Increase priority of a job and all its dependencies recursively + void prioritize(const LoadJobPtr & job, ssize_t new_priority) + { + DENY_ALLOCATIONS_IN_SCOPE; + std::unique_lock lock{mutex}; + prioritize(job, new_priority, lock); + } + + // Remove finished jobs, cancel scheduled jobs, wait for executing jobs to finish and remove them + void remove(const LoadJobSet & jobs) + { + DENY_ALLOCATIONS_IN_SCOPE; + std::unique_lock lock{mutex}; + for (const auto & job : jobs) + { + if (auto it = finished_jobs.find(job); it != finished_jobs.end()) // Job is already finished + { + finished_jobs.erase(it); + } + else if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end()) + { + if (info->second.dependencies_left > 0) // Job is not ready yet + canceled(job); + else if (info->second.ready_seqno) // Job is enqueued in ready queue + { + ready_queue.erase(info->second.key()); + info->second.ready_seqno = 0; + canceled(job); + } + else // Job is currently executing + { + lock.unlock(); + job->wait(); // Wait for job to finish + lock.lock(); + } + finished_jobs.erase(job); + } + } + } + +private: + void canceled(const LoadJobPtr & job, std::unique_lock &) + { + std::exception_ptr e; + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + e = std::make_exception_ptr( + Exception(ASYNC_LOAD_CANCELED, + "Load job '{}' canceled", + job->name)); + )}; + failed(job, e, lock); + } + + void loaded(const LoadJobPtr & job, std::unique_lock & lock) + { + // Notify waiters + job->setSuccess(); + + // Update dependent jobs and enqueue if ready + chassert(scheduled_jobs.contains(job)); // Job was pending + for (const auto & dep : scheduled_jobs[job].dependent_jobs) + { + chassert(scheduled_jobs.contains(dep)); // All depended jobs must be pending + Info & dep_info = scheduled_jobs[dep]; + if (--dep_info.dependencies_left == 0) + enqueue(dep_info, dep, lock); + } + + finish(job); + } + + void failed(const LoadJobPtr & job, std::exception_ptr exception_from_job, std::unique_lock & lock) + { + // Notify waiters + job->setFailure(exception_from_job); + + // Recurse into all dependent jobs + chassert(scheduled_jobs.contains(job)); // Job was pending + Info & info = scheduled_jobs[job]; + LoadJobSet dependent; + dependent.swap(info.dependent_jobs); // To avoid container modification during recursion + for (const auto & dep : dependent) + { + std::exception_ptr e; + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + e = std::make_exception_ptr( + Exception(ASYNC_LOAD_DEPENDENCY_FAILED, + "Load job '{}' -> {}", + dep->name, + getExceptionMessage(exception_from_job, /* with_stack_trace = */ false))); + }); + failed(dep, e, lock); + } + + // Clean dependency graph edges + for (const auto & dep : job->dependencies) + if (auto dep_info = scheduled_jobs.find(dep); info != scheduled_jobs.end()) + dep_info->second.dependent_jobs.erase(job); + + // Job became finished + finish(job); + } + + void finish(const LoadJobPtr & job, std::unique_lock &) + { + scheduled_jobs.erase(job); + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + finished_jobs.insert(job); + )}; + } + + void prioritize(const LoadJobPtr & job, ssize_t new_priority, std::unique_lock & lock) + { + if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end()) + { + if (info->second.priority >= new_priority) + return; // Never lower priority + + // Update priority and push job forward through ready queue if needed + if (info->second.ready_seqno) + ready_queue.erase(info->second.key()); + info->second.priority = new_priority; + job->priority.store(new_priority); // Set user-facing priority (may affect executing jobs) + if (info->second.ready_seqno) + { + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + ready_queue.emplace(info->second.key(), job); + }); + } + + // Recurse into dependencies + for (const auto & dep : job->dependencies) + prioritize(dep, new_priority, lock); + } + } + + void enqueue(Info & info, const LoadJobPtr & job, std::unique_lock & lock) + { + chassert(info.dependencies_left == 0); + chassert(info.ready_seqno == 0); + info.ready_seqno = ++last_ready_seqno; + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + ready_queue.emplace(info.key(), job); + }); + + if (is_running && workers < max_threads) // TODO(serxa): Can we make max_thread changeable in runtime + spawn(lock); + } + + void spawn(std::unique_lock &) + { + // TODO(serxa): add metrics for number of active workers or executing jobs + workers++; + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + pool.scheduleOrThrowOnError([this] { worker(); }); + }); + } + + void worker() + { + DENY_ALLOCATIONS_IN_SCOPE; + + LoadJobPtr job; + std::exception_ptr exception_from_job; + while (true) { // TODO(serxa): shutdown + + /// This is inside the loop to also reset previous thread names set inside the jobs + setThreadName("AsyncLoader"); + + { + std::unique_lock lock{mutex}; + + // Handle just executed job + if (exception_from_job) + failed(job, exception_from_job, lock); + else if (job) + loaded(job, lock); + + if (ready_queue.empty()) + { + workers--; + return; + } + + // Take next job to be executed from the ready queue + auto it = ready_queue.begin(); + job = it->second; + ready_queue.erase(it); + scheduled_jobs.find(job)->second.ready_seqno = 0; // This job is no longer in the ready queue + } + + try + { + ALLOW_ALLOCATIONS_IN_SCOPE; + job->func(*job); + exception_from_job = {}; + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + exception_from_job = std::make_exception_ptr( + Exception(ASYNC_LOAD_FAILED, + "Load job '{}' failed: {}", + job->name, + getCurrentExceptionMessage(/* with_stacktrace = */ true))); + }); + } + } + } + + std::mutex mutex; + bool is_running = false; + bool is_stopping = false; + + // Full set of scheduled pending jobs along with scheduling info + std::unordered_map scheduled_jobs; + + // Subset of scheduled pending jobs with resolved dependencies (waiting for a thread to be executed) + // Represent a queue of jobs in order of decreasing priority and FIFO for jobs with equal priorities + std::map ready_queue; + + // Set of finished jobs (for introspection only, until job is removed) + LoadJobSet finished_jobs; + + // Increasing counter for ReadyKey assignment (to preserve FIFO order of jobs with equal priority) + UInt64 last_ready_seqno = 0; + + // For executing jobs. Note that we avoid using an internal queue of the pool to be able to prioritize jobs + size_t max_threads; + size_t workers = 0; + ThreadPool pool; +}; + +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace CurrentMetrics +{ + extern const Metric TablesLoaderThreads; + extern const Metric TablesLoaderThreadsActive; +} + +namespace DB +{ + +void test() +{ + AsyncLoader loader(CurrentMetrics::TablesLoaderThreads, CurrentMetrics::TablesLoaderThreadsActive, 2); + + auto job_func = [] (const LoadJob & self) { + std::cout << self.name << " done with priority " << self.priority << std::endl; + }; + + auto job1 = makeLoadJob({}, "job1", job_func); + auto job2 = makeLoadJob({ job1 }, "job2", job_func); + auto task1 = loader.schedule({ job1, job2 }); + + auto job3 = makeLoadJob({ job2 }, "job3", job_func); + auto job4 = makeLoadJob({ job2 }, "job4", job_func); + auto task2 = loader.schedule({ job3, job4 }); + auto job5 = makeLoadJob({ job3, job4 }, "job5", job_func); + task2.merge(loader.schedule({ job5 }, -1)); +} + +} diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 9abf3bba8ff..fe99006eff4 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -650,6 +650,9 @@ M(679, IO_URING_SUBMIT_ERROR) \ M(690, MIXED_ACCESS_PARAMETER_TYPES) \ M(691, UNKNOWN_ELEMENT_OF_ENUM) \ + M(692, ASYNC_LOAD_FAILED) \ + M(693, ASYNC_LOAD_CANCELED) \ + M(694, ASYNC_LOAD_DEPENDENCY_FAILED) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ From 5d11706929e38ff6b2aa84161e709d43828cb4db Mon Sep 17 00:00:00 2001 From: serxa Date: Mon, 10 Apr 2023 23:14:20 +0000 Subject: [PATCH 026/689] make t work, add simple test --- src/Common/AsyncLoader.h | 87 ++++++++++--------------- src/Common/tests/gtest_async_loader.cpp | 66 +++++++++++++++++++ 2 files changed, 102 insertions(+), 51 deletions(-) create mode 100644 src/Common/tests/gtest_async_loader.cpp diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index dd35621443d..944251653e4 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -95,7 +95,7 @@ private: std::function func; - std::mutex mutex; + mutable std::mutex mutex; std::condition_variable finished; bool is_finished = false; std::exception_ptr exception; @@ -129,7 +129,7 @@ private: ssize_t priority = 0; size_t dependencies_left = 0; UInt64 ready_seqno = 0; // zero means that job is not in ready queue - LoadJobSet dependent_jobs; // TODO(serxa): clean it on remove jobs + LoadJobSet dependent_jobs; ReadyKey key() const { @@ -206,6 +206,7 @@ public: , pool(metric_threads, metric_active_threads, max_threads) {} + // Start workers to execute scheduled load jobs void start() { std::unique_lock lock{mutex}; @@ -214,10 +215,24 @@ public: spawn(lock); } + // Wait for all load jobs to finish, including all new jobs. So at first take care to stop adding new jobs. + void wait() + { + pool.wait(); + } + + // Wait for currently executing jobs to finish, but do not run any other pending jobs. + // Not finished jobs are left in pending state: + // - they can be resumed by calling start() again; + // - or canceled using ~Task() or remove() later. void stop() { - std::unique_lock lock{mutex}; - is_stopping = true; + { + std::unique_lock lock{mutex}; + is_running = false; + // NOTE: there is no need to notify because workers never wait + } + pool.wait(); } [[nodiscard]] Task schedule(LoadJobSet && jobs, ssize_t priority = 0) @@ -244,7 +259,7 @@ public: NOEXCEPT_SCOPE({ ALLOW_ALLOCATIONS_IN_SCOPE; scheduled_jobs.emplace(job, Info{.priority = priority}); - )}; + }); job->priority.store(priority); // Set user-facing priority } @@ -302,12 +317,12 @@ public: else if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end()) { if (info->second.dependencies_left > 0) // Job is not ready yet - canceled(job); + canceled(job, lock); else if (info->second.ready_seqno) // Job is enqueued in ready queue { ready_queue.erase(info->second.key()); info->second.ready_seqno = 0; - canceled(job); + canceled(job, lock); } else // Job is currently executing { @@ -321,16 +336,16 @@ public: } private: - void canceled(const LoadJobPtr & job, std::unique_lock &) + void canceled(const LoadJobPtr & job, std::unique_lock & lock) { std::exception_ptr e; NOEXCEPT_SCOPE({ ALLOW_ALLOCATIONS_IN_SCOPE; e = std::make_exception_ptr( - Exception(ASYNC_LOAD_CANCELED, + Exception(ErrorCodes::ASYNC_LOAD_CANCELED, "Load job '{}' canceled", job->name)); - )}; + }); failed(job, e, lock); } @@ -349,7 +364,7 @@ private: enqueue(dep_info, dep, lock); } - finish(job); + finish(job, lock); } void failed(const LoadJobPtr & job, std::exception_ptr exception_from_job, std::unique_lock & lock) @@ -368,7 +383,7 @@ private: NOEXCEPT_SCOPE({ ALLOW_ALLOCATIONS_IN_SCOPE; e = std::make_exception_ptr( - Exception(ASYNC_LOAD_DEPENDENCY_FAILED, + Exception(ErrorCodes::ASYNC_LOAD_DEPENDENCY_FAILED, "Load job '{}' -> {}", dep->name, getExceptionMessage(exception_from_job, /* with_stack_trace = */ false))); @@ -378,11 +393,11 @@ private: // Clean dependency graph edges for (const auto & dep : job->dependencies) - if (auto dep_info = scheduled_jobs.find(dep); info != scheduled_jobs.end()) + if (auto dep_info = scheduled_jobs.find(dep); dep_info != scheduled_jobs.end()) dep_info->second.dependent_jobs.erase(job); // Job became finished - finish(job); + finish(job, lock); } void finish(const LoadJobPtr & job, std::unique_lock &) @@ -391,7 +406,7 @@ private: NOEXCEPT_SCOPE({ ALLOW_ALLOCATIONS_IN_SCOPE; finished_jobs.insert(job); - )}; + }); } void prioritize(const LoadJobPtr & job, ssize_t new_priority, std::unique_lock & lock) @@ -430,7 +445,7 @@ private: ready_queue.emplace(info.key(), job); }); - if (is_running && workers < max_threads) // TODO(serxa): Can we make max_thread changeable in runtime + if (is_running && workers < max_threads) // TODO(serxa): Can we make max_thread changeable in runtime? spawn(lock); } @@ -450,7 +465,7 @@ private: LoadJobPtr job; std::exception_ptr exception_from_job; - while (true) { // TODO(serxa): shutdown + while (true) { /// This is inside the loop to also reset previous thread names set inside the jobs setThreadName("AsyncLoader"); @@ -464,6 +479,9 @@ private: else if (job) loaded(job, lock); + if (!is_running) + return; + if (ready_queue.empty()) { workers--; @@ -489,7 +507,7 @@ private: NOEXCEPT_SCOPE({ ALLOW_ALLOCATIONS_IN_SCOPE; exception_from_job = std::make_exception_ptr( - Exception(ASYNC_LOAD_FAILED, + Exception(ErrorCodes::ASYNC_LOAD_FAILED, "Load job '{}' failed: {}", job->name, getCurrentExceptionMessage(/* with_stacktrace = */ true))); @@ -500,7 +518,6 @@ private: std::mutex mutex; bool is_running = false; - bool is_stopping = false; // Full set of scheduled pending jobs along with scheduling info std::unordered_map scheduled_jobs; @@ -522,35 +539,3 @@ private: }; } - -//////////////////////////////////////////////////////////////////////////////////////////////////////// - -namespace CurrentMetrics -{ - extern const Metric TablesLoaderThreads; - extern const Metric TablesLoaderThreadsActive; -} - -namespace DB -{ - -void test() -{ - AsyncLoader loader(CurrentMetrics::TablesLoaderThreads, CurrentMetrics::TablesLoaderThreadsActive, 2); - - auto job_func = [] (const LoadJob & self) { - std::cout << self.name << " done with priority " << self.priority << std::endl; - }; - - auto job1 = makeLoadJob({}, "job1", job_func); - auto job2 = makeLoadJob({ job1 }, "job2", job_func); - auto task1 = loader.schedule({ job1, job2 }); - - auto job3 = makeLoadJob({ job2 }, "job3", job_func); - auto job4 = makeLoadJob({ job2 }, "job4", job_func); - auto task2 = loader.schedule({ job3, job4 }); - auto job5 = makeLoadJob({ job3, job4 }, "job5", job_func); - task2.merge(loader.schedule({ job5 }, -1)); -} - -} diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp new file mode 100644 index 00000000000..f8d51c0eebd --- /dev/null +++ b/src/Common/tests/gtest_async_loader.cpp @@ -0,0 +1,66 @@ +#include + +#include +#include +#include + +#include +#include +#include +#include + + +using namespace DB; + + +namespace CurrentMetrics +{ + extern const Metric TablesLoaderThreads; + extern const Metric TablesLoaderThreadsActive; +} + +struct AsyncLoaderTest +{ + AsyncLoader loader; + + explicit AsyncLoaderTest(size_t max_threads = 1) + : loader(CurrentMetrics::TablesLoaderThreads, CurrentMetrics::TablesLoaderThreadsActive, max_threads) + {} +}; + +TEST(AsyncLoader, Smoke) +{ + AsyncLoaderTest t(2); + + static constexpr ssize_t low_priority = -1; + + std::atomic jobs_done{0}; + std::atomic low_priority_jobs_done{0}; + + auto job_func = [&] (const LoadJob & self) { + jobs_done++; + if (self.priority == low_priority) + low_priority_jobs_done++; + }; + + { + auto job1 = makeLoadJob({}, "job1", job_func); + auto job2 = makeLoadJob({ job1 }, "job2", job_func); + auto task1 = t.loader.schedule({ job1, job2 }); + + auto job3 = makeLoadJob({ job2 }, "job3", job_func); + auto job4 = makeLoadJob({ job2 }, "job4", job_func); + auto task2 = t.loader.schedule({ job3, job4 }); + auto job5 = makeLoadJob({ job3, job4 }, "job5", job_func); + task2.merge(t.loader.schedule({ job5 }, low_priority)); + + t.loader.start(); + + t.loader.wait(); + } + + ASSERT_EQ(jobs_done, 5); + ASSERT_EQ(low_priority_jobs_done, 1); + + t.loader.stop(); +} From b4318b4ab630138ad187ef3ccf4e26887252954c Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 11 Apr 2023 00:08:55 +0000 Subject: [PATCH 027/689] fix style --- src/Common/AsyncLoader.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 944251653e4..8b4fa0bde26 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -112,7 +112,8 @@ class AsyncLoader : private boost::noncopyable { private: // Key of a pending job in ready queue - struct ReadyKey { + struct ReadyKey + { ssize_t priority; UInt64 ready_seqno; @@ -125,7 +126,8 @@ private: }; // Scheduling information for a pending job - struct Info { + struct Info + { ssize_t priority = 0; size_t dependencies_left = 0; UInt64 ready_seqno = 0; // zero means that job is not in ready queue @@ -465,8 +467,8 @@ private: LoadJobPtr job; std::exception_ptr exception_from_job; - while (true) { - + while (true) + { /// This is inside the loop to also reset previous thread names set inside the jobs setThreadName("AsyncLoader"); From 2c29c0b2f307939083c1197c86b9ac58e59ec7e4 Mon Sep 17 00:00:00 2001 From: serxa Date: Thu, 13 Apr 2023 18:43:58 +0000 Subject: [PATCH 028/689] add dependency cycle detection --- src/Common/AsyncLoader.h | 49 ++++++++++++++++++++++--- src/Common/ErrorCodes.cpp | 7 ++-- src/Common/tests/gtest_async_loader.cpp | 44 ++++++++++++++++++++++ 3 files changed, 91 insertions(+), 9 deletions(-) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 8b4fa0bde26..6ccf3552721 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -28,7 +28,7 @@ class AsyncLoader; namespace ErrorCodes { - extern const int LOGICAL_ERROR; + extern const int ASYNC_LOAD_SCHEDULE_FAILED; extern const int ASYNC_LOAD_FAILED; extern const int ASYNC_LOAD_CANCELED; extern const int ASYNC_LOAD_DEPENDENCY_FAILED; @@ -71,7 +71,7 @@ public: std::rethrow_exception(exception); } - const LoadJobSet dependencies; // jobs to be done before this one, with ownership + const LoadJobSet dependencies; // jobs to be done before this one (with ownership), it is `const` to make creation of cycles hard const String name; std::atomic priority{0}; @@ -245,12 +245,13 @@ public: for (const auto & job : jobs) { if (job->status() != LoadStatus::PENDING) - throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Trying to schedule already finished load job '{}'", job->name); + throw Exception(ErrorCodes::ASYNC_LOAD_SCHEDULE_FAILED, "Trying to schedule already finished load job '{}'", job->name); if (scheduled_jobs.contains(job)) - throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Load job '{}' has been already scheduled", job->name); + throw Exception(ErrorCodes::ASYNC_LOAD_SCHEDULE_FAILED, "Load job '{}' has been already scheduled", job->name); } - // TODO(serxa): ensure scheduled_jobs graph will have no cycles, otherwise we have infinite recursion and other strange stuff? + // Ensure scheduled_jobs graph will have no cycles. The only way to get a cycle is to add a cycle, assuming old jobs cannot reference new ones. + checkCycle(jobs, lock); // We do not want any exception to be throws after this point, because the following code is not exception-safe DENY_ALLOCATIONS_IN_SCOPE; @@ -338,6 +339,41 @@ public: } private: + void checkCycle(const LoadJobSet & jobs, std::unique_lock & lock) + { + LoadJobSet left = jobs; + LoadJobSet visited; + visited.reserve(left.size()); + while (!left.empty()) + { + LoadJobPtr job = *left.begin(); + checkCycleImpl(job, left, visited, lock); + } + } + + String checkCycleImpl(const LoadJobPtr & job, LoadJobSet & left, LoadJobSet & visited, std::unique_lock & lock) + { + if (!left.contains(job)) + return {}; // Do not consider external dependencies and already processed jobs + if (auto [_, inserted] = visited.insert(job); !inserted) + { + visited.erase(job); // Mark where cycle ends + return job->name; + } + for (const auto & dep : job->dependencies) + { + if (auto chain = checkCycleImpl(dep, left, visited, lock); !chain.empty()) + { + if (!visited.contains(job)) // Check for cycle end + throw Exception(ErrorCodes::ASYNC_LOAD_SCHEDULE_FAILED, "Load job dependency cycle detected: {} -> {}", job->name, chain); + else + return fmt::format("{} -> {}", job->name, chain); // chain is not a cycle yet -- continue building + } + } + left.erase(job); + return {}; + } + void canceled(const LoadJobPtr & job, std::unique_lock & lock) { std::exception_ptr e; @@ -453,7 +489,6 @@ private: void spawn(std::unique_lock &) { - // TODO(serxa): add metrics for number of active workers or executing jobs workers++; NOEXCEPT_SCOPE({ ALLOW_ALLOCATIONS_IN_SCOPE; @@ -521,6 +556,8 @@ private: std::mutex mutex; bool is_running = false; + // TODO(serxa): add metrics for number of jobs in every state + // Full set of scheduled pending jobs along with scheduling info std::unordered_map scheduled_jobs; diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index fe99006eff4..0d203fc799c 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -650,9 +650,10 @@ M(679, IO_URING_SUBMIT_ERROR) \ M(690, MIXED_ACCESS_PARAMETER_TYPES) \ M(691, UNKNOWN_ELEMENT_OF_ENUM) \ - M(692, ASYNC_LOAD_FAILED) \ - M(693, ASYNC_LOAD_CANCELED) \ - M(694, ASYNC_LOAD_DEPENDENCY_FAILED) \ + M(692, ASYNC_LOAD_SCHEDULE_FAILED) \ + M(693, ASYNC_LOAD_FAILED) \ + M(694, ASYNC_LOAD_CANCELED) \ + M(695, ASYNC_LOAD_DEPENDENCY_FAILED) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index f8d51c0eebd..44c97bf0158 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -64,3 +64,47 @@ TEST(AsyncLoader, Smoke) t.loader.stop(); } + +TEST(AsyncLoader, CycleDetection) +{ + AsyncLoaderTest t; + + auto job_func = [&] (const LoadJob &) {}; + + LoadJobPtr cycle_breaker; // To avoid memleak we introduce with a cycle + + try + { + std::vector jobs; + jobs.push_back(makeLoadJob({}, "job0", job_func)); + jobs.push_back(makeLoadJob({ jobs[0] }, "job1", job_func)); + jobs.push_back(makeLoadJob({ jobs[0], jobs[1] }, "job2", job_func)); + jobs.push_back(makeLoadJob({ jobs[0], jobs[2] }, "job3", job_func)); + + // Actually it is hard to construct a cycle, but suppose someone was able to succeed violating constness + const_cast(jobs[1]->dependencies).insert(jobs[3]); + cycle_breaker = jobs[1]; + + // Add couple unrelated jobs + jobs.push_back(makeLoadJob({ jobs[1] }, "job4", job_func)); + jobs.push_back(makeLoadJob({ jobs[4] }, "job5", job_func)); + jobs.push_back(makeLoadJob({ jobs[3] }, "job6", job_func)); + jobs.push_back(makeLoadJob({ jobs[1], jobs[2], jobs[3], jobs[4], jobs[5], jobs[6] }, "job7", job_func)); + + // Also add another not connected jobs + jobs.push_back(makeLoadJob({}, "job8", job_func)); + jobs.push_back(makeLoadJob({}, "job9", job_func)); + jobs.push_back(makeLoadJob({ jobs[9] }, "job10", job_func)); + + auto task1 = t.loader.schedule({ jobs.begin(), jobs.end()}); + FAIL(); + } + catch (Exception & e) + { + int present[] = { 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 }; + for (int i = 0; i < std::size(present); i++) + ASSERT_EQ(e.message().find(fmt::format("job{}", i)) != String::npos, present[i]); + } + + const_cast(cycle_breaker->dependencies).clear(); +} From 3226eadd947a232a6d2f617d3ac9dfe35b888b7c Mon Sep 17 00:00:00 2001 From: serxa Date: Thu, 13 Apr 2023 22:48:46 +0000 Subject: [PATCH 029/689] add simple randomized test --- src/Common/AsyncLoader.h | 6 +++ src/Common/tests/gtest_async_loader.cpp | 57 +++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 6ccf3552721..49cecdbbb8e 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -208,6 +208,12 @@ public: , pool(metric_threads, metric_active_threads, max_threads) {} + // WARNING: all Task instances returned by `schedule()` should be destructed before AsyncLoader + ~AsyncLoader() + { + stop(); + } + // Start workers to execute scheduled load jobs void start() { diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index 44c97bf0158..3825f5d2592 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -1,5 +1,7 @@ #include +#include +#include #include #include #include @@ -23,6 +25,40 @@ struct AsyncLoaderTest { AsyncLoader loader; + std::mutex rng_mutex; + pcg64 rng{randomSeed()}; + + template + T randomInt(T from, T to) + { + std::uniform_int_distribution distribution(from, to); + std::scoped_lock lock(rng_mutex); + return distribution(rng); + } + + void randomSleepUs(UInt64 min_us, UInt64 max_us, int probability_percent) + { + if (randomInt(0, 99) < probability_percent) + std::this_thread::sleep_for(std::chrono::microseconds(randomInt(min_us, max_us))); + } + + template + LoadJobSet randomJobSet(int job_count, int dep_probability_percent, JobFunc job_func) + { + std::vector jobs; + for (int j = 0; j < job_count; j++) + { + LoadJobSet deps; + for (int d = 0; d < j; d++) + { + if (randomInt(0, 99) < dep_probability_percent) + deps.insert(jobs[d]); + } + jobs.push_back(makeLoadJob(std::move(deps), fmt::format("job{}", j), job_func)); + } + return {jobs.begin(), jobs.end()}; + } + explicit AsyncLoaderTest(size_t max_threads = 1) : loader(CurrentMetrics::TablesLoaderThreads, CurrentMetrics::TablesLoaderThreadsActive, max_threads) {} @@ -108,3 +144,24 @@ TEST(AsyncLoader, CycleDetection) const_cast(cycle_breaker->dependencies).clear(); } + +TEST(AsyncLoader, RandomTasks) +{ + AsyncLoaderTest t(16); + t.loader.start(); + + auto job_func = [&] (const LoadJob &) + { + t.randomSleepUs(100, 500, 5); + }; + + std::vector tasks; + for (int i = 0; i < 512; i++) + { + int job_count = t.randomInt(1, 32); + tasks.push_back(t.loader.schedule(t.randomJobSet(job_count, 5, job_func))); + t.randomSleepUs(100, 900, 20); // avg=100us + } + t.loader.wait(); +} + From 5e5a38cd640e71c764afdf7d347d466641144a3f Mon Sep 17 00:00:00 2001 From: serxa Date: Thu, 13 Apr 2023 23:18:38 +0000 Subject: [PATCH 030/689] add tests for job status/wait/cancel --- src/Common/AsyncLoader.h | 15 +++++----- src/Common/tests/gtest_async_loader.cpp | 40 +++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 49cecdbbb8e..5f4c67ab065 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -58,12 +58,6 @@ public: } void wait() - { - std::unique_lock lock{mutex}; - finished.wait(lock, [this] { return is_finished; }); - } - - void get() { std::unique_lock lock{mutex}; finished.wait(lock, [this] { return is_finished; }); @@ -186,9 +180,16 @@ public: ~Task() { - // Remove jobs that are not ready and wait for jobs that are in progress + remove(); + } + + void remove() + { if (loader) + { loader->remove(jobs); + detach(); + } } // Do not track jobs in this task diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index 3825f5d2592..7e76ac0e07a 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -21,6 +21,14 @@ namespace CurrentMetrics extern const Metric TablesLoaderThreadsActive; } +namespace DB::ErrorCodes +{ + extern const int ASYNC_LOAD_SCHEDULE_FAILED; + extern const int ASYNC_LOAD_FAILED; + extern const int ASYNC_LOAD_CANCELED; + extern const int ASYNC_LOAD_DEPENDENCY_FAILED; +} + struct AsyncLoaderTest { AsyncLoader loader; @@ -90,9 +98,18 @@ TEST(AsyncLoader, Smoke) auto job5 = makeLoadJob({ job3, job4 }, "job5", job_func); task2.merge(t.loader.schedule({ job5 }, low_priority)); + std::thread waiter_thread([=] { job5->wait(); }); + t.loader.start(); + job3->wait(); t.loader.wait(); + job4->wait(); + + waiter_thread.join(); + + ASSERT_EQ(job1->status(), LoadStatus::SUCCESS); + ASSERT_EQ(job2->status(), LoadStatus::SUCCESS); } ASSERT_EQ(jobs_done, 5); @@ -145,6 +162,29 @@ TEST(AsyncLoader, CycleDetection) const_cast(cycle_breaker->dependencies).clear(); } +TEST(AsyncLoader, CancelPendingJob) +{ + AsyncLoaderTest t; + + auto job_func = [&] (const LoadJob &) {}; + + auto job = makeLoadJob({}, "job", job_func); + auto task = t.loader.schedule({job}); + + task.remove(); // this cancels pending the job (async loader was not started to execute it) + + ASSERT_EQ(job->status(), LoadStatus::FAILED); + try + { + job->wait(); + FAIL(); + } + catch (Exception & e) + { + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED); + } +} + TEST(AsyncLoader, RandomTasks) { AsyncLoaderTest t(16); From a9c51c4acabcbe4955731d279831dc38f98b4fcf Mon Sep 17 00:00:00 2001 From: serxa Date: Fri, 14 Apr 2023 00:04:31 +0000 Subject: [PATCH 031/689] add more tests for cancel --- src/Common/AsyncLoader.h | 29 +++++-- src/Common/tests/gtest_async_loader.cpp | 109 +++++++++++++++++++++++- 2 files changed, 132 insertions(+), 6 deletions(-) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 5f4c67ab065..7bce762bb39 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -57,14 +57,30 @@ public: return !is_finished ? LoadStatus::PENDING : (exception ? LoadStatus::FAILED : LoadStatus::SUCCESS); } - void wait() + void wait() const { std::unique_lock lock{mutex}; + waiters++; finished.wait(lock, [this] { return is_finished; }); + waiters--; if (exception) std::rethrow_exception(exception); } + void waitNoThrow() const + { + std::unique_lock lock{mutex}; + waiters++; + finished.wait(lock, [this] { return is_finished; }); + waiters--; + } + + size_t waiters_count() const + { + std::unique_lock lock{mutex}; + return waiters; + } + const LoadJobSet dependencies; // jobs to be done before this one (with ownership), it is `const` to make creation of cycles hard const String name; std::atomic priority{0}; @@ -76,7 +92,8 @@ private: { std::unique_lock lock{mutex}; is_finished = true; - finished.notify_all(); + if (waiters > 0) + finished.notify_all(); } void setFailure(const std::exception_ptr & ptr) @@ -84,13 +101,15 @@ private: std::unique_lock lock{mutex}; is_finished = true; exception = ptr; - finished.notify_all(); + if (waiters > 0) + finished.notify_all(); } std::function func; mutable std::mutex mutex; - std::condition_variable finished; + mutable std::condition_variable finished; + mutable size_t waiters = 0; bool is_finished = false; std::exception_ptr exception; }; @@ -337,7 +356,7 @@ public: else // Job is currently executing { lock.unlock(); - job->wait(); // Wait for job to finish + job->waitNoThrow(); // Wait for job to finish lock.lock(); } finished_jobs.erase(job); diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index 7e76ac0e07a..7ef831cbfd0 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -169,7 +170,7 @@ TEST(AsyncLoader, CancelPendingJob) auto job_func = [&] (const LoadJob &) {}; auto job = makeLoadJob({}, "job", job_func); - auto task = t.loader.schedule({job}); + auto task = t.loader.schedule({ job }); task.remove(); // this cancels pending the job (async loader was not started to execute it) @@ -185,6 +186,112 @@ TEST(AsyncLoader, CancelPendingJob) } } +TEST(AsyncLoader, CancelPendingTask) +{ + AsyncLoaderTest t; + + auto job_func = [&] (const LoadJob &) {}; + + auto job1 = makeLoadJob({}, "job1", job_func); + auto job2 = makeLoadJob({ job1 }, "job2", job_func); + auto task = t.loader.schedule({ job1, job2 }); + + task.remove(); // this cancels both jobs (async loader was not started to execute it) + + ASSERT_EQ(job1->status(), LoadStatus::FAILED); + ASSERT_EQ(job2->status(), LoadStatus::FAILED); + + try + { + job1->wait(); + FAIL(); + } + catch (Exception & e) + { + ASSERT_TRUE(e.code() == ErrorCodes::ASYNC_LOAD_CANCELED); + } + + try + { + job2->wait(); + FAIL(); + } + catch (Exception & e) + { + // Result depend on non-deterministic cancel order + ASSERT_TRUE(e.code() == ErrorCodes::ASYNC_LOAD_CANCELED || e.code() == ErrorCodes::ASYNC_LOAD_DEPENDENCY_FAILED); + } +} + +TEST(AsyncLoader, CancelPendingDependency) +{ + AsyncLoaderTest t; + + auto job_func = [&] (const LoadJob &) {}; + + auto job1 = makeLoadJob({}, "job1", job_func); + auto job2 = makeLoadJob({ job1 }, "job2", job_func); + auto task1 = t.loader.schedule({ job1 }); + auto task2 = t.loader.schedule({ job2 }); + + task1.remove(); // this cancels both jobs, due to dependency (async loader was not started to execute it) + + ASSERT_EQ(job1->status(), LoadStatus::FAILED); + ASSERT_EQ(job2->status(), LoadStatus::FAILED); + + try + { + job1->wait(); + FAIL(); + } + catch (Exception & e) + { + ASSERT_TRUE(e.code() == ErrorCodes::ASYNC_LOAD_CANCELED); + } + + try + { + job2->wait(); + FAIL(); + } + catch (Exception & e) + { + ASSERT_TRUE(e.code() == ErrorCodes::ASYNC_LOAD_DEPENDENCY_FAILED); + } +} + +TEST(AsyncLoader, CancelExecutingJob) +{ + AsyncLoaderTest t; + t.loader.start(); + + std::barrier sync(2); + + auto job_func = [&] (const LoadJob &) + { + sync.arrive_and_wait(); // (A) sync with main thread + sync.arrive_and_wait(); // (B) wait for waiter + // signals (C) + }; + + auto job = makeLoadJob({}, "job", job_func); + auto task = t.loader.schedule({ job }); + + sync.arrive_and_wait(); // (A) wait for job to start executing + std::thread canceler([&] + { + task.remove(); // waits for (C) + }); + while (job->waiters_count() == 0) + std::this_thread::yield(); + ASSERT_EQ(job->status(), LoadStatus::PENDING); + sync.arrive_and_wait(); // (B) sync with job + canceler.join(); + + ASSERT_EQ(job->status(), LoadStatus::SUCCESS); + job->wait(); +} + TEST(AsyncLoader, RandomTasks) { AsyncLoaderTest t(16); From 3be06454134c01282e0c6b4bf58459589de7f9c6 Mon Sep 17 00:00:00 2001 From: serxa Date: Fri, 14 Apr 2023 01:21:25 +0000 Subject: [PATCH 032/689] fix remove of executing tasks + add test --- src/Common/AsyncLoader.h | 33 +++++++++------ src/Common/tests/gtest_async_loader.cpp | 56 +++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 12 deletions(-) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 7bce762bb39..999559169ab 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -337,13 +337,12 @@ public: { DENY_ALLOCATIONS_IN_SCOPE; std::unique_lock lock{mutex}; + // On the first pass: + // - cancel all not executing jobs to avoid races + // - do not wait executing jobs (otherwise, on unlock a worker could start executing a dependent job, that should be canceled) for (const auto & job : jobs) { - if (auto it = finished_jobs.find(job); it != finished_jobs.end()) // Job is already finished - { - finished_jobs.erase(it); - } - else if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end()) + if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end()) { if (info->second.dependencies_left > 0) // Job is not ready yet canceled(job, lock); @@ -353,15 +352,25 @@ public: info->second.ready_seqno = 0; canceled(job, lock); } - else // Job is currently executing - { - lock.unlock(); - job->waitNoThrow(); // Wait for job to finish - lock.lock(); - } - finished_jobs.erase(job); } } + // On the second pass wait for executing jobs to finish + for (const auto & job : jobs) + { + if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end()) + { + // Job is currently executing + chassert(info->second.dependencies_left == 0); + chassert(info->second.ready_seqno == 0); + lock.unlock(); + job->waitNoThrow(); // Wait for job to finish + lock.lock(); + } + } + // On the third pass all jobs are finished - remove them all + // It is better to do it under one lock to avoid exposing intermediate states + for (const auto & job : jobs) + finished_jobs.erase(job); } private: diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index 7ef831cbfd0..2ba27264460 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -292,6 +293,61 @@ TEST(AsyncLoader, CancelExecutingJob) job->wait(); } +TEST(AsyncLoader, CancelExecutingTask) +{ + AsyncLoaderTest t(16); + t.loader.start(); + std::barrier sync(2); + + auto blocker_job_func = [&] (const LoadJob &) + { + sync.arrive_and_wait(); // (A) sync with main thread + sync.arrive_and_wait(); // (B) wait for waiter + // signals (C) + }; + + auto job_to_cancel_func = [&] (const LoadJob &) + { + FAIL(); // this job should be canceled + }; + + auto job_to_succeed_func = [&] (const LoadJob &) + { + }; + + // Make several iterations to catch the race (if any) + for (int iteration = 0; iteration < 10; iteration++) { + std::vector task1_jobs; + auto blocker_job = makeLoadJob({}, "blocker_job", blocker_job_func); + task1_jobs.push_back(blocker_job); + for (int i = 0; i < 100; i++) + task1_jobs.push_back(makeLoadJob({ blocker_job }, "job_to_cancel", job_to_cancel_func)); + auto task1 = t.loader.schedule({ task1_jobs.begin(), task1_jobs.end() }); + auto job_to_succeed = makeLoadJob({ blocker_job }, "job_to_succeed", job_to_succeed_func); + auto task2 = t.loader.schedule({ job_to_succeed }); + + sync.arrive_and_wait(); // (A) wait for job to start executing + std::thread canceler([&] + { + task1.remove(); // waits for (C) + }); + while (blocker_job->waiters_count() == 0) + std::this_thread::yield(); + ASSERT_EQ(blocker_job->status(), LoadStatus::PENDING); + sync.arrive_and_wait(); // (B) sync with job + canceler.join(); + t.loader.wait(); + + ASSERT_EQ(blocker_job->status(), LoadStatus::SUCCESS); + ASSERT_EQ(job_to_succeed->status(), LoadStatus::SUCCESS); + for (const auto & job : task1_jobs) + { + if (job != blocker_job) + ASSERT_EQ(job->status(), LoadStatus::FAILED); + } + } +} + TEST(AsyncLoader, RandomTasks) { AsyncLoaderTest t(16); From ba67f95525cc2613269a9b39c617022f248b80c5 Mon Sep 17 00:00:00 2001 From: serxa Date: Fri, 14 Apr 2023 01:23:40 +0000 Subject: [PATCH 033/689] allow cancels on RandomTasks test shutdown --- src/Common/tests/gtest_async_loader.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index 2ba27264460..ae0929e6e99 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -365,6 +365,5 @@ TEST(AsyncLoader, RandomTasks) tasks.push_back(t.loader.schedule(t.randomJobSet(job_count, 5, job_func))); t.randomSleepUs(100, 900, 20); // avg=100us } - t.loader.wait(); } From 4bc52a1b06976912c8cacee948b1ebebc8cbb9f4 Mon Sep 17 00:00:00 2001 From: serxa Date: Fri, 14 Apr 2023 12:46:09 +0000 Subject: [PATCH 034/689] add concurrency test --- src/Common/tests/gtest_async_loader.cpp | 44 ++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index ae0929e6e99..4a86938817c 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -38,6 +38,10 @@ struct AsyncLoaderTest std::mutex rng_mutex; pcg64 rng{randomSeed()}; + explicit AsyncLoaderTest(size_t max_threads = 1) + : loader(CurrentMetrics::TablesLoaderThreads, CurrentMetrics::TablesLoaderThreadsActive, max_threads) + {} + template T randomInt(T from, T to) { @@ -53,7 +57,7 @@ struct AsyncLoaderTest } template - LoadJobSet randomJobSet(int job_count, int dep_probability_percent, JobFunc job_func) + LoadJobSet randomJobSet(int job_count, int dep_probability_percent, JobFunc job_func, std::string_view name_prefix = "job") { std::vector jobs; for (int j = 0; j < job_count; j++) @@ -64,14 +68,20 @@ struct AsyncLoaderTest if (randomInt(0, 99) < dep_probability_percent) deps.insert(jobs[d]); } - jobs.push_back(makeLoadJob(std::move(deps), fmt::format("job{}", j), job_func)); + jobs.push_back(makeLoadJob(std::move(deps), fmt::format("{}{}", name_prefix, j), job_func)); } return {jobs.begin(), jobs.end()}; } - explicit AsyncLoaderTest(size_t max_threads = 1) - : loader(CurrentMetrics::TablesLoaderThreads, CurrentMetrics::TablesLoaderThreadsActive, max_threads) - {} + template + LoadJobSet chainJobSet(int job_count, JobFunc job_func, std::string_view name_prefix = "job") + { + std::vector jobs; + jobs.push_back(makeLoadJob({}, fmt::format("{}{}", name_prefix, 0), job_func)); + for (int j = 1; j < job_count; j++) + jobs.push_back(makeLoadJob({ jobs[j - 1] }, fmt::format("{}{}", name_prefix, j), job_func)); + return {jobs.begin(), jobs.end()}; + } }; TEST(AsyncLoader, Smoke) @@ -367,3 +377,27 @@ TEST(AsyncLoader, RandomTasks) } } +TEST(AsyncLoader, TestConcurrency) +{ + AsyncLoaderTest t(10); + t.loader.start(); + + for (int concurrency = 1; concurrency <= 10; concurrency++) + { + std::barrier sync(concurrency); + + std::atomic executing{0}; + auto job_func = [&] (const LoadJob &) + { + executing++; + ASSERT_LE(executing, concurrency); + sync.arrive_and_wait(); + executing--; + }; + + std::vector tasks; + for (int i = 0; i < concurrency; i++) + tasks.push_back(t.loader.schedule(t.chainJobSet(5, job_func))); + t.loader.wait(); + } +} From 9ac37ca3b85ac17906e2c5d8387ae3deb4f07f1c Mon Sep 17 00:00:00 2001 From: serxa Date: Fri, 14 Apr 2023 12:57:51 +0000 Subject: [PATCH 035/689] add overload test --- src/Common/AsyncLoader.h | 8 ++++++- src/Common/tests/gtest_async_loader.cpp | 29 +++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 999559169ab..9467a705c10 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -373,6 +373,12 @@ public: finished_jobs.erase(job); } + size_t getMaxThreads() const + { + std::unique_lock lock{mutex}; + return pool.getMaxThreads(); + } + private: void checkCycle(const LoadJobSet & jobs, std::unique_lock & lock) { @@ -588,7 +594,7 @@ private: } } - std::mutex mutex; + mutable std::mutex mutex; bool is_running = false; // TODO(serxa): add metrics for number of jobs in every state diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index 4a86938817c..68de24460bb 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -399,5 +399,34 @@ TEST(AsyncLoader, TestConcurrency) for (int i = 0; i < concurrency; i++) tasks.push_back(t.loader.schedule(t.chainJobSet(5, job_func))); t.loader.wait(); + ASSERT_EQ(executing, 0); + } +} + +TEST(AsyncLoader, TestOverload) +{ + AsyncLoaderTest t(3); + t.loader.start(); + + size_t max_threads = t.loader.getMaxThreads(); + std::atomic executing{0}; + + for (int concurrency = 4; concurrency <= 8; concurrency++) + { + auto job_func = [&] (const LoadJob &) + { + executing++; + t.randomSleepUs(100, 200, 100); + ASSERT_LE(executing, max_threads); + executing--; + }; + + t.loader.stop(); + std::vector tasks; + for (int i = 0; i < concurrency; i++) + tasks.push_back(t.loader.schedule(t.chainJobSet(5, job_func))); + t.loader.start(); + t.loader.wait(); + ASSERT_EQ(executing, 0); } } From df3c5212ff24f091d157c10e1b04c25697f6a470 Mon Sep 17 00:00:00 2001 From: serxa Date: Fri, 14 Apr 2023 13:14:31 +0000 Subject: [PATCH 036/689] test deps are completed + refactoring --- src/Common/AsyncLoader.h | 6 ++- src/Common/tests/gtest_async_loader.cpp | 64 +++++++++++++------------ 2 files changed, 37 insertions(+), 33 deletions(-) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 9467a705c10..96ea6165d34 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -105,7 +105,9 @@ private: finished.notify_all(); } - std::function func; + // TODO(serxa): add callback/status for cancel? + + std::function func; mutable std::mutex mutex; mutable std::condition_variable finished; @@ -576,7 +578,7 @@ private: try { ALLOW_ALLOCATIONS_IN_SCOPE; - job->func(*job); + job->func(job); exception_from_job = {}; } catch (...) diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index 68de24460bb..d913f8f6362 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -93,9 +93,9 @@ TEST(AsyncLoader, Smoke) std::atomic jobs_done{0}; std::atomic low_priority_jobs_done{0}; - auto job_func = [&] (const LoadJob & self) { + auto job_func = [&] (const LoadJobPtr & self) { jobs_done++; - if (self.priority == low_priority) + if (self->priority == low_priority) low_priority_jobs_done++; }; @@ -134,7 +134,7 @@ TEST(AsyncLoader, CycleDetection) { AsyncLoaderTest t; - auto job_func = [&] (const LoadJob &) {}; + auto job_func = [&] (const LoadJobPtr &) {}; LoadJobPtr cycle_breaker; // To avoid memleak we introduce with a cycle @@ -178,7 +178,7 @@ TEST(AsyncLoader, CancelPendingJob) { AsyncLoaderTest t; - auto job_func = [&] (const LoadJob &) {}; + auto job_func = [&] (const LoadJobPtr &) {}; auto job = makeLoadJob({}, "job", job_func); auto task = t.loader.schedule({ job }); @@ -201,7 +201,7 @@ TEST(AsyncLoader, CancelPendingTask) { AsyncLoaderTest t; - auto job_func = [&] (const LoadJob &) {}; + auto job_func = [&] (const LoadJobPtr &) {}; auto job1 = makeLoadJob({}, "job1", job_func); auto job2 = makeLoadJob({ job1 }, "job2", job_func); @@ -238,7 +238,7 @@ TEST(AsyncLoader, CancelPendingDependency) { AsyncLoaderTest t; - auto job_func = [&] (const LoadJob &) {}; + auto job_func = [&] (const LoadJobPtr &) {}; auto job1 = makeLoadJob({}, "job1", job_func); auto job2 = makeLoadJob({ job1 }, "job2", job_func); @@ -278,7 +278,7 @@ TEST(AsyncLoader, CancelExecutingJob) std::barrier sync(2); - auto job_func = [&] (const LoadJob &) + auto job_func = [&] (const LoadJobPtr &) { sync.arrive_and_wait(); // (A) sync with main thread sync.arrive_and_wait(); // (B) wait for waiter @@ -309,19 +309,19 @@ TEST(AsyncLoader, CancelExecutingTask) t.loader.start(); std::barrier sync(2); - auto blocker_job_func = [&] (const LoadJob &) + auto blocker_job_func = [&] (const LoadJobPtr &) { sync.arrive_and_wait(); // (A) sync with main thread sync.arrive_and_wait(); // (B) wait for waiter // signals (C) }; - auto job_to_cancel_func = [&] (const LoadJob &) + auto job_to_cancel_func = [&] (const LoadJobPtr &) { FAIL(); // this job should be canceled }; - auto job_to_succeed_func = [&] (const LoadJob &) + auto job_to_succeed_func = [&] (const LoadJobPtr &) { }; @@ -358,25 +358,6 @@ TEST(AsyncLoader, CancelExecutingTask) } } -TEST(AsyncLoader, RandomTasks) -{ - AsyncLoaderTest t(16); - t.loader.start(); - - auto job_func = [&] (const LoadJob &) - { - t.randomSleepUs(100, 500, 5); - }; - - std::vector tasks; - for (int i = 0; i < 512; i++) - { - int job_count = t.randomInt(1, 32); - tasks.push_back(t.loader.schedule(t.randomJobSet(job_count, 5, job_func))); - t.randomSleepUs(100, 900, 20); // avg=100us - } -} - TEST(AsyncLoader, TestConcurrency) { AsyncLoaderTest t(10); @@ -387,7 +368,7 @@ TEST(AsyncLoader, TestConcurrency) std::barrier sync(concurrency); std::atomic executing{0}; - auto job_func = [&] (const LoadJob &) + auto job_func = [&] (const LoadJobPtr &) { executing++; ASSERT_LE(executing, concurrency); @@ -413,7 +394,7 @@ TEST(AsyncLoader, TestOverload) for (int concurrency = 4; concurrency <= 8; concurrency++) { - auto job_func = [&] (const LoadJob &) + auto job_func = [&] (const LoadJobPtr &) { executing++; t.randomSleepUs(100, 200, 100); @@ -430,3 +411,24 @@ TEST(AsyncLoader, TestOverload) ASSERT_EQ(executing, 0); } } + +TEST(AsyncLoader, RandomTasks) +{ + AsyncLoaderTest t(16); + t.loader.start(); + + auto job_func = [&] (const LoadJobPtr & self) + { + for (const auto & dep : self->dependencies) + ASSERT_EQ(dep->status(), LoadStatus::SUCCESS); + t.randomSleepUs(100, 500, 5); + }; + + std::vector tasks; + for (int i = 0; i < 512; i++) + { + int job_count = t.randomInt(1, 32); + tasks.push_back(t.loader.schedule(t.randomJobSet(job_count, 5, job_func))); + t.randomSleepUs(100, 900, 20); // avg=100us + } +} From 5ba1dc91e333ae824f1f0197442948757e82651e Mon Sep 17 00:00:00 2001 From: serxa Date: Fri, 14 Apr 2023 14:00:54 +0000 Subject: [PATCH 037/689] add LoadStatus::CANCELED --- src/Common/AsyncLoader.h | 96 ++++++++++++++----------- src/Common/tests/gtest_async_loader.cpp | 24 +++---- 2 files changed, 67 insertions(+), 53 deletions(-) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 96ea6165d34..6af1b79bec8 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -36,9 +36,10 @@ namespace ErrorCodes enum class LoadStatus { - PENDING, // Load is not finished yet - SUCCESS, // Load was successful - FAILED // Load failed or canceled + PENDING, // Load job is not started yet + OK, // Load job executed and was successful + FAILED, // Load job executed and failed + CANCELED // Load job is not going to be executed due to removal or dependency failure }; class LoadJob : private boost::noncopyable @@ -54,14 +55,14 @@ public: LoadStatus status() const { std::unique_lock lock{mutex}; - return !is_finished ? LoadStatus::PENDING : (exception ? LoadStatus::FAILED : LoadStatus::SUCCESS); + return load_status; } void wait() const { std::unique_lock lock{mutex}; waiters++; - finished.wait(lock, [this] { return is_finished; }); + finished.wait(lock, [this] { return load_status != LoadStatus::PENDING; }); waiters--; if (exception) std::rethrow_exception(exception); @@ -71,7 +72,7 @@ public: { std::unique_lock lock{mutex}; waiters++; - finished.wait(lock, [this] { return is_finished; }); + finished.wait(lock, [this] { return load_status != LoadStatus::PENDING; }); waiters--; } @@ -88,18 +89,27 @@ public: private: friend class AsyncLoader; - void setSuccess() + void ok() { std::unique_lock lock{mutex}; - is_finished = true; + load_status = LoadStatus::OK; if (waiters > 0) finished.notify_all(); } - void setFailure(const std::exception_ptr & ptr) + void failed(const std::exception_ptr & ptr) { std::unique_lock lock{mutex}; - is_finished = true; + load_status = LoadStatus::FAILED; + exception = ptr; + if (waiters > 0) + finished.notify_all(); + } + + void canceled(const std::exception_ptr & ptr) + { + std::unique_lock lock{mutex}; + load_status = LoadStatus::CANCELED; exception = ptr; if (waiters > 0) finished.notify_all(); @@ -112,7 +122,7 @@ private: mutable std::mutex mutex; mutable std::condition_variable finished; mutable size_t waiters = 0; - bool is_finished = false; + LoadStatus load_status{LoadStatus::PENDING}; std::exception_ptr exception; }; @@ -148,6 +158,12 @@ private: UInt64 ready_seqno = 0; // zero means that job is not in ready queue LoadJobSet dependent_jobs; + // Three independent states of a non-finished jobs + bool is_blocked() const { return dependencies_left > 0; } + bool is_ready() const { return dependencies_left == 0 && ready_seqno > 0; } + bool is_executing() const { return dependencies_left == 0 && ready_seqno == 0; } + + // Get key of a ready job ReadyKey key() const { return {priority, ready_seqno}; @@ -318,8 +334,8 @@ public: prioritize(dep, priority, lock); } - // Place jobs w/o dependencies to ready queue - if (info.dependencies_left == 0) + // Enqueue non-blocked jobs (w/o dependencies) to ready queue + if (!info.is_blocked()) enqueue(info, job, lock); } @@ -346,14 +362,22 @@ public: { if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end()) { - if (info->second.dependencies_left > 0) // Job is not ready yet - canceled(job, lock); - else if (info->second.ready_seqno) // Job is enqueued in ready queue + if (info->second.is_executing()) + continue; // Skip executing jobs on the first pass + if (info->second.is_ready()) { ready_queue.erase(info->second.key()); info->second.ready_seqno = 0; - canceled(job, lock); } + std::exception_ptr e; + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + e = std::make_exception_ptr( + Exception(ErrorCodes::ASYNC_LOAD_CANCELED, + "Load job '{}' canceled", + job->name)); + }); + markNotOk(job, e, LoadStatus::CANCELED, lock); } } // On the second pass wait for executing jobs to finish @@ -362,8 +386,7 @@ public: if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end()) { // Job is currently executing - chassert(info->second.dependencies_left == 0); - chassert(info->second.ready_seqno == 0); + chassert(info->second.is_executing()); lock.unlock(); job->waitNoThrow(); // Wait for job to finish lock.lock(); @@ -417,23 +440,10 @@ private: return {}; } - void canceled(const LoadJobPtr & job, std::unique_lock & lock) - { - std::exception_ptr e; - NOEXCEPT_SCOPE({ - ALLOW_ALLOCATIONS_IN_SCOPE; - e = std::make_exception_ptr( - Exception(ErrorCodes::ASYNC_LOAD_CANCELED, - "Load job '{}' canceled", - job->name)); - }); - failed(job, e, lock); - } - - void loaded(const LoadJobPtr & job, std::unique_lock & lock) + void markOk(const LoadJobPtr & job, std::unique_lock & lock) { // Notify waiters - job->setSuccess(); + job->ok(); // Update dependent jobs and enqueue if ready chassert(scheduled_jobs.contains(job)); // Job was pending @@ -441,17 +451,21 @@ private: { chassert(scheduled_jobs.contains(dep)); // All depended jobs must be pending Info & dep_info = scheduled_jobs[dep]; - if (--dep_info.dependencies_left == 0) + dep_info.dependencies_left--; + if (!dep_info.is_blocked()) enqueue(dep_info, dep, lock); } finish(job, lock); } - void failed(const LoadJobPtr & job, std::exception_ptr exception_from_job, std::unique_lock & lock) + void markNotOk(const LoadJobPtr & job, std::exception_ptr exception_from_job, LoadStatus status, std::unique_lock & lock) { // Notify waiters - job->setFailure(exception_from_job); + if (status == LoadStatus::FAILED) + job->failed(exception_from_job); + else if (status == LoadStatus::CANCELED) + job->canceled(exception_from_job); // Recurse into all dependent jobs chassert(scheduled_jobs.contains(job)); // Job was pending @@ -469,7 +483,7 @@ private: dep->name, getExceptionMessage(exception_from_job, /* with_stack_trace = */ false))); }); - failed(dep, e, lock); + markNotOk(dep, e, LoadStatus::CANCELED, lock); } // Clean dependency graph edges @@ -518,7 +532,7 @@ private: void enqueue(Info & info, const LoadJobPtr & job, std::unique_lock & lock) { - chassert(info.dependencies_left == 0); + chassert(!info.is_blocked()); chassert(info.ready_seqno == 0); info.ready_seqno = ++last_ready_seqno; NOEXCEPT_SCOPE({ @@ -555,9 +569,9 @@ private: // Handle just executed job if (exception_from_job) - failed(job, exception_from_job, lock); + markNotOk(job, exception_from_job, LoadStatus::FAILED, lock); else if (job) - loaded(job, lock); + markOk(job, lock); if (!is_running) return; diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index d913f8f6362..d7706311fa4 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -120,8 +120,8 @@ TEST(AsyncLoader, Smoke) waiter_thread.join(); - ASSERT_EQ(job1->status(), LoadStatus::SUCCESS); - ASSERT_EQ(job2->status(), LoadStatus::SUCCESS); + ASSERT_EQ(job1->status(), LoadStatus::OK); + ASSERT_EQ(job2->status(), LoadStatus::OK); } ASSERT_EQ(jobs_done, 5); @@ -185,7 +185,7 @@ TEST(AsyncLoader, CancelPendingJob) task.remove(); // this cancels pending the job (async loader was not started to execute it) - ASSERT_EQ(job->status(), LoadStatus::FAILED); + ASSERT_EQ(job->status(), LoadStatus::CANCELED); try { job->wait(); @@ -209,8 +209,8 @@ TEST(AsyncLoader, CancelPendingTask) task.remove(); // this cancels both jobs (async loader was not started to execute it) - ASSERT_EQ(job1->status(), LoadStatus::FAILED); - ASSERT_EQ(job2->status(), LoadStatus::FAILED); + ASSERT_EQ(job1->status(), LoadStatus::CANCELED); + ASSERT_EQ(job2->status(), LoadStatus::CANCELED); try { @@ -247,8 +247,8 @@ TEST(AsyncLoader, CancelPendingDependency) task1.remove(); // this cancels both jobs, due to dependency (async loader was not started to execute it) - ASSERT_EQ(job1->status(), LoadStatus::FAILED); - ASSERT_EQ(job2->status(), LoadStatus::FAILED); + ASSERT_EQ(job1->status(), LoadStatus::CANCELED); + ASSERT_EQ(job2->status(), LoadStatus::CANCELED); try { @@ -299,7 +299,7 @@ TEST(AsyncLoader, CancelExecutingJob) sync.arrive_and_wait(); // (B) sync with job canceler.join(); - ASSERT_EQ(job->status(), LoadStatus::SUCCESS); + ASSERT_EQ(job->status(), LoadStatus::OK); job->wait(); } @@ -348,12 +348,12 @@ TEST(AsyncLoader, CancelExecutingTask) canceler.join(); t.loader.wait(); - ASSERT_EQ(blocker_job->status(), LoadStatus::SUCCESS); - ASSERT_EQ(job_to_succeed->status(), LoadStatus::SUCCESS); + ASSERT_EQ(blocker_job->status(), LoadStatus::OK); + ASSERT_EQ(job_to_succeed->status(), LoadStatus::OK); for (const auto & job : task1_jobs) { if (job != blocker_job) - ASSERT_EQ(job->status(), LoadStatus::FAILED); + ASSERT_EQ(job->status(), LoadStatus::CANCELED); } } } @@ -420,7 +420,7 @@ TEST(AsyncLoader, RandomTasks) auto job_func = [&] (const LoadJobPtr & self) { for (const auto & dep : self->dependencies) - ASSERT_EQ(dep->status(), LoadStatus::SUCCESS); + ASSERT_EQ(dep->status(), LoadStatus::OK); t.randomSleepUs(100, 500, 5); }; From 5837b09880365f88e433c6abb58964fe8e1f4624 Mon Sep 17 00:00:00 2001 From: serxa Date: Fri, 14 Apr 2023 17:16:30 +0000 Subject: [PATCH 038/689] fix scheduling of jobs with dependencies on finished jobs --- src/Common/AsyncLoader.h | 211 +++++++++++++++--------- src/Common/ErrorCodes.cpp | 1 - src/Common/tests/gtest_async_loader.cpp | 86 +++++++++- 3 files changed, 211 insertions(+), 87 deletions(-) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 6af1b79bec8..84b709f14a1 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -31,7 +31,6 @@ namespace ErrorCodes extern const int ASYNC_LOAD_SCHEDULE_FAILED; extern const int ASYNC_LOAD_FAILED; extern const int ASYNC_LOAD_CANCELED; - extern const int ASYNC_LOAD_DEPENDENCY_FAILED; } enum class LoadStatus @@ -58,14 +57,20 @@ public: return load_status; } + std::exception_ptr exception() const + { + std::unique_lock lock{mutex}; + return load_exception; + } + void wait() const { std::unique_lock lock{mutex}; waiters++; finished.wait(lock, [this] { return load_status != LoadStatus::PENDING; }); waiters--; - if (exception) - std::rethrow_exception(exception); + if (load_exception) + std::rethrow_exception(load_exception); } void waitNoThrow() const @@ -101,7 +106,7 @@ private: { std::unique_lock lock{mutex}; load_status = LoadStatus::FAILED; - exception = ptr; + load_exception = ptr; if (waiters > 0) finished.notify_all(); } @@ -110,7 +115,7 @@ private: { std::unique_lock lock{mutex}; load_status = LoadStatus::CANCELED; - exception = ptr; + load_exception = ptr; if (waiters > 0) finished.notify_all(); } @@ -123,7 +128,7 @@ private: mutable std::condition_variable finished; mutable size_t waiters = 0; LoadStatus load_status{LoadStatus::PENDING}; - std::exception_ptr exception; + std::exception_ptr load_exception; }; template @@ -241,8 +246,9 @@ public: LoadJobSet jobs; }; - AsyncLoader(Metric metric_threads, Metric metric_active_threads, size_t max_threads_) - : max_threads(max_threads_) + AsyncLoader(Metric metric_threads, Metric metric_active_threads, size_t max_threads_, bool log_failures_) + : log_failures(log_failures_) + , max_threads(max_threads_) , pool(metric_threads, metric_active_threads, max_threads) {} @@ -310,7 +316,7 @@ public: job->priority.store(priority); // Set user-facing priority } - // Process incoming dependencies + // Process dependencies on scheduled pending jobs for (const auto & job : jobs) { Info & info = scheduled_jobs.find(job)->second; @@ -324,14 +330,10 @@ public: dep_info->second.dependent_jobs.insert(job); }); info.dependencies_left++; - } - else - { - // TODO(serxa): check status: (1) pending: it is wrong - throw? (2) success: good - no dep. (3) failed: propagate failure! - } - // Priority inheritance: prioritize deps to have at least given `priority` to avoid priority inversion - prioritize(dep, priority, lock); + // Priority inheritance: prioritize deps to have at least given `priority` to avoid priority inversion + prioritize(dep, priority, lock); + } } // Enqueue non-blocked jobs (w/o dependencies) to ready queue @@ -339,6 +341,57 @@ public: enqueue(info, job, lock); } + // Process dependencies on other jobs. It is done in a separate pass to facilitate propagation of cancel signals (if any). + for (const auto & job : jobs) + { + if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end()) + { + for (const auto & dep : job->dependencies) + { + if (scheduled_jobs.contains(dep)) + continue; // Skip dependencies on scheduled pending jobs (already processed) + LoadStatus dep_status = dep->status(); + if (dep_status == LoadStatus::OK) + continue; // Dependency on already successfully finished job -- it's okay. + + if (dep_status == LoadStatus::PENDING) + { + // Dependency on not scheduled pending job -- it's bad. + // Probably, there is an error in `jobs` set: not all jobs were passed to `schedule()` call. + // We are not going to run any dependent job, so cancel them all. + std::exception_ptr e; + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + e = std::make_exception_ptr(Exception(ErrorCodes::ASYNC_LOAD_CANCELED, + "Load job '{}' -> Load job '{}': not scheduled pending load job (it must be also scheduled), all dependent load jobs are canceled", + job->name, + dep->name)); + }); + finish(lock, job, LoadStatus::CANCELED, e); + break; // This job is now finished, stop its dependencies processing + } + if (dep_status == LoadStatus::FAILED || dep_status == LoadStatus::CANCELED) + { + // Dependency on already failed or canceled job -- it's okay. Cancel all dependent jobs. + std::exception_ptr e; + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + e = std::make_exception_ptr(Exception(ErrorCodes::ASYNC_LOAD_CANCELED, + "Load job '{}' -> {}", + job->name, + getExceptionMessage(dep->exception(), /* with_stack_trace = */ false))); + }); + finish(lock, job, LoadStatus::CANCELED, e); + break; // This job is now finished, stop its dependencies processing + } + } + } + else + { + // Job was already canceled on previous iteration of this cycle -- skip + } + } + return Task(this, std::move(jobs)); } @@ -364,20 +417,12 @@ public: { if (info->second.is_executing()) continue; // Skip executing jobs on the first pass - if (info->second.is_ready()) - { - ready_queue.erase(info->second.key()); - info->second.ready_seqno = 0; - } std::exception_ptr e; NOEXCEPT_SCOPE({ ALLOW_ALLOCATIONS_IN_SCOPE; - e = std::make_exception_ptr( - Exception(ErrorCodes::ASYNC_LOAD_CANCELED, - "Load job '{}' canceled", - job->name)); + e = std::make_exception_ptr(Exception(ErrorCodes::ASYNC_LOAD_CANCELED, "Load job '{}' canceled", job->name)); }); - markNotOk(job, e, LoadStatus::CANCELED, lock); + finish(lock, job, LoadStatus::CANCELED, e); } } // On the second pass wait for executing jobs to finish @@ -440,63 +485,64 @@ private: return {}; } - void markOk(const LoadJobPtr & job, std::unique_lock & lock) + void finish(std::unique_lock & lock, const LoadJobPtr & job, LoadStatus status, std::exception_ptr exception_from_job = {}) { - // Notify waiters - job->ok(); - - // Update dependent jobs and enqueue if ready - chassert(scheduled_jobs.contains(job)); // Job was pending - for (const auto & dep : scheduled_jobs[job].dependent_jobs) + if (status == LoadStatus::OK) { - chassert(scheduled_jobs.contains(dep)); // All depended jobs must be pending - Info & dep_info = scheduled_jobs[dep]; - dep_info.dependencies_left--; - if (!dep_info.is_blocked()) - enqueue(dep_info, dep, lock); + // Notify waiters + job->ok(); + + // Update dependent jobs and enqueue if ready + chassert(scheduled_jobs.contains(job)); // Job was pending + for (const auto & dep : scheduled_jobs[job].dependent_jobs) + { + chassert(scheduled_jobs.contains(dep)); // All depended jobs must be pending + Info & dep_info = scheduled_jobs[dep]; + dep_info.dependencies_left--; + if (!dep_info.is_blocked()) + enqueue(dep_info, dep, lock); + } } - - finish(job, lock); - } - - void markNotOk(const LoadJobPtr & job, std::exception_ptr exception_from_job, LoadStatus status, std::unique_lock & lock) - { - // Notify waiters - if (status == LoadStatus::FAILED) - job->failed(exception_from_job); - else if (status == LoadStatus::CANCELED) - job->canceled(exception_from_job); - - // Recurse into all dependent jobs - chassert(scheduled_jobs.contains(job)); // Job was pending - Info & info = scheduled_jobs[job]; - LoadJobSet dependent; - dependent.swap(info.dependent_jobs); // To avoid container modification during recursion - for (const auto & dep : dependent) + else { - std::exception_ptr e; - NOEXCEPT_SCOPE({ - ALLOW_ALLOCATIONS_IN_SCOPE; - e = std::make_exception_ptr( - Exception(ErrorCodes::ASYNC_LOAD_DEPENDENCY_FAILED, - "Load job '{}' -> {}", - dep->name, - getExceptionMessage(exception_from_job, /* with_stack_trace = */ false))); - }); - markNotOk(dep, e, LoadStatus::CANCELED, lock); - } + // Notify waiters + if (status == LoadStatus::FAILED) + job->failed(exception_from_job); + else if (status == LoadStatus::CANCELED) + job->canceled(exception_from_job); - // Clean dependency graph edges - for (const auto & dep : job->dependencies) - if (auto dep_info = scheduled_jobs.find(dep); dep_info != scheduled_jobs.end()) - dep_info->second.dependent_jobs.erase(job); + chassert(scheduled_jobs.contains(job)); // Job was pending + Info & info = scheduled_jobs[job]; + if (info.is_ready()) + { + ready_queue.erase(info.key()); + info.ready_seqno = 0; + } + + // Recurse into all dependent jobs + LoadJobSet dependent; + dependent.swap(info.dependent_jobs); // To avoid container modification during recursion + for (const auto & dep : dependent) + { + std::exception_ptr e; + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + e = std::make_exception_ptr( + Exception(ErrorCodes::ASYNC_LOAD_CANCELED, + "Load job '{}' -> {}", + dep->name, + getExceptionMessage(exception_from_job, /* with_stack_trace = */ false))); + }); + finish(lock, dep, LoadStatus::CANCELED, e); + } + + // Clean dependency graph edges + for (const auto & dep : job->dependencies) + if (auto dep_info = scheduled_jobs.find(dep); dep_info != scheduled_jobs.end()) + dep_info->second.dependent_jobs.erase(job); + } // Job became finished - finish(job, lock); - } - - void finish(const LoadJobPtr & job, std::unique_lock &) - { scheduled_jobs.erase(job); NOEXCEPT_SCOPE({ ALLOW_ALLOCATIONS_IN_SCOPE; @@ -569,9 +615,9 @@ private: // Handle just executed job if (exception_from_job) - markNotOk(job, exception_from_job, LoadStatus::FAILED, lock); + finish(lock, job, LoadStatus::FAILED, exception_from_job); else if (job) - markOk(job, lock); + finish(lock, job, LoadStatus::OK); if (!is_running) return; @@ -589,17 +635,18 @@ private: scheduled_jobs.find(job)->second.ready_seqno = 0; // This job is no longer in the ready queue } + ALLOW_ALLOCATIONS_IN_SCOPE; + try { - ALLOW_ALLOCATIONS_IN_SCOPE; job->func(job); exception_from_job = {}; } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); NOEXCEPT_SCOPE({ - ALLOW_ALLOCATIONS_IN_SCOPE; + if (log_failures) + tryLogCurrentException(__PRETTY_FUNCTION__); exception_from_job = std::make_exception_ptr( Exception(ErrorCodes::ASYNC_LOAD_FAILED, "Load job '{}' failed: {}", @@ -610,6 +657,8 @@ private: } } + const bool log_failures; + mutable std::mutex mutex; bool is_running = false; diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 0d203fc799c..64d07ccc9cf 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -653,7 +653,6 @@ M(692, ASYNC_LOAD_SCHEDULE_FAILED) \ M(693, ASYNC_LOAD_FAILED) \ M(694, ASYNC_LOAD_CANCELED) \ - M(695, ASYNC_LOAD_DEPENDENCY_FAILED) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index d7706311fa4..5be715db2f6 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -28,7 +29,6 @@ namespace DB::ErrorCodes extern const int ASYNC_LOAD_SCHEDULE_FAILED; extern const int ASYNC_LOAD_FAILED; extern const int ASYNC_LOAD_CANCELED; - extern const int ASYNC_LOAD_DEPENDENCY_FAILED; } struct AsyncLoaderTest @@ -39,7 +39,7 @@ struct AsyncLoaderTest pcg64 rng{randomSeed()}; explicit AsyncLoaderTest(size_t max_threads = 1) - : loader(CurrentMetrics::TablesLoaderThreads, CurrentMetrics::TablesLoaderThreadsActive, max_threads) + : loader(CurrentMetrics::TablesLoaderThreads, CurrentMetrics::TablesLoaderThreadsActive, max_threads, /* log_failures = */ false) {} template @@ -229,8 +229,7 @@ TEST(AsyncLoader, CancelPendingTask) } catch (Exception & e) { - // Result depend on non-deterministic cancel order - ASSERT_TRUE(e.code() == ErrorCodes::ASYNC_LOAD_CANCELED || e.code() == ErrorCodes::ASYNC_LOAD_DEPENDENCY_FAILED); + ASSERT_TRUE(e.code() == ErrorCodes::ASYNC_LOAD_CANCELED); } } @@ -267,7 +266,7 @@ TEST(AsyncLoader, CancelPendingDependency) } catch (Exception & e) { - ASSERT_TRUE(e.code() == ErrorCodes::ASYNC_LOAD_DEPENDENCY_FAILED); + ASSERT_TRUE(e.code() == ErrorCodes::ASYNC_LOAD_CANCELED); } } @@ -358,6 +357,83 @@ TEST(AsyncLoader, CancelExecutingTask) } } +TEST(AsyncLoader, JobFailure) +{ + AsyncLoaderTest t; + t.loader.start(); + + std::string_view error_message = "test job failure"; + + auto job_func = [&] (const LoadJobPtr &) { + throw Exception(ErrorCodes::ASYNC_LOAD_FAILED, "{}", error_message); + }; + + auto job = makeLoadJob({}, "job", job_func); + auto task = t.loader.schedule({ job }); + + t.loader.wait(); + + ASSERT_EQ(job->status(), LoadStatus::FAILED); + try + { + job->wait(); + FAIL(); + } + catch (Exception & e) + { + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_FAILED); + ASSERT_TRUE(e.message().find(error_message) != String::npos); + } +} + +TEST(AsyncLoader, ScheduleJobWithFailedDependencies) +{ + AsyncLoaderTest t; + t.loader.start(); + + std::string_view error_message = "test job failure"; + + auto failed_job_func = [&] (const LoadJobPtr &) { + throw Exception(ErrorCodes::ASYNC_LOAD_FAILED, "{}", error_message); + }; + + auto failed_job = makeLoadJob({}, "failed_job", failed_job_func); + auto failed_task = t.loader.schedule({ failed_job }); + + t.loader.wait(); + + auto job_func = [&] (const LoadJobPtr &) {}; + + auto job1 = makeLoadJob({ failed_job }, "job1", job_func); + auto job2 = makeLoadJob({ job1 }, "job2", job_func); + auto task = t.loader.schedule({ job1, job2 }); + + t.loader.wait(); + + ASSERT_EQ(job1->status(), LoadStatus::CANCELED); + ASSERT_EQ(job2->status(), LoadStatus::CANCELED); + try + { + job1->wait(); + FAIL(); + } + catch (Exception & e) + { + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED); + ASSERT_TRUE(e.message().find(error_message) != String::npos); + } + try + { + job2->wait(); + FAIL(); + } + catch (Exception & e) + { + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED); + ASSERT_TRUE(e.message().find(error_message) != String::npos); + } +} + TEST(AsyncLoader, TestConcurrency) { AsyncLoaderTest t(10); From 51992ae96d7f3d165a2fd090d87817a9fd9daac1 Mon Sep 17 00:00:00 2001 From: serxa Date: Fri, 14 Apr 2023 17:27:47 +0000 Subject: [PATCH 039/689] add ScheduleJobWithCanceledDependencies test --- src/Common/tests/gtest_async_loader.cpp | 40 +++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index 5be715db2f6..e713e0be562 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -434,6 +434,46 @@ TEST(AsyncLoader, ScheduleJobWithFailedDependencies) } } +TEST(AsyncLoader, ScheduleJobWithCanceledDependencies) +{ + AsyncLoaderTest t; + + auto canceled_job_func = [&] (const LoadJobPtr &) {}; + auto canceled_job = makeLoadJob({}, "canceled_job", canceled_job_func); + auto canceled_task = t.loader.schedule({ canceled_job }); + canceled_task.remove(); + + t.loader.start(); + + auto job_func = [&] (const LoadJobPtr &) {}; + auto job1 = makeLoadJob({ canceled_job }, "job1", job_func); + auto job2 = makeLoadJob({ job1 }, "job2", job_func); + auto task = t.loader.schedule({ job1, job2 }); + + t.loader.wait(); + + ASSERT_EQ(job1->status(), LoadStatus::CANCELED); + ASSERT_EQ(job2->status(), LoadStatus::CANCELED); + try + { + job1->wait(); + FAIL(); + } + catch (Exception & e) + { + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED); + } + try + { + job2->wait(); + FAIL(); + } + catch (Exception & e) + { + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED); + } +} + TEST(AsyncLoader, TestConcurrency) { AsyncLoaderTest t(10); From d8bf775b9ace8e82973253c9ad4ee7ab4ae370ef Mon Sep 17 00:00:00 2001 From: serxa Date: Fri, 14 Apr 2023 19:29:24 +0000 Subject: [PATCH 040/689] fix worker shutdown --- src/Common/AsyncLoader.h | 5 +---- src/Common/tests/gtest_async_loader.cpp | 2 -- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 84b709f14a1..98f2e4153d4 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -619,10 +619,7 @@ private: else if (job) finish(lock, job, LoadStatus::OK); - if (!is_running) - return; - - if (ready_queue.empty()) + if (!is_running || ready_queue.empty()) { workers--; return; diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index e713e0be562..8a1d9f8b2de 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -14,10 +14,8 @@ #include #include - using namespace DB; - namespace CurrentMetrics { extern const Metric TablesLoaderThreads; From 8c1f734f50edbbe56c1f6b8fa2fbc7c7bd0acc57 Mon Sep 17 00:00:00 2001 From: serxa Date: Fri, 14 Apr 2023 20:49:56 +0000 Subject: [PATCH 041/689] add random test with dependent tasks --- src/Common/AsyncLoader.h | 6 +++ src/Common/tests/gtest_async_loader.cpp | 59 ++++++++++++++++++++++++- 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 98f2e4153d4..6d19ed05463 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -449,6 +449,12 @@ public: return pool.getMaxThreads(); } + size_t getScheduledJobCount() const + { + std::unique_lock lock{mutex}; + return scheduled_jobs.size(); + } + private: void checkCycle(const LoadJobSet & jobs, std::unique_lock & lock) { diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index 8a1d9f8b2de..208d5590f3e 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -71,6 +71,25 @@ struct AsyncLoaderTest return {jobs.begin(), jobs.end()}; } + template + LoadJobSet randomJobSet(int job_count, int dep_probability_percent, std::vector external_deps, JobFunc job_func, std::string_view name_prefix = "job") + { + std::vector jobs; + for (int j = 0; j < job_count; j++) + { + LoadJobSet deps; + for (int d = 0; d < j; d++) + { + if (randomInt(0, 99) < dep_probability_percent) + deps.insert(jobs[d]); + } + if (randomInt(0, 99) < dep_probability_percent) + deps.insert(external_deps[randomInt(0, external_deps.size() - 1)]); + jobs.push_back(makeLoadJob(std::move(deps), fmt::format("{}{}", name_prefix, j), job_func)); + } + return {jobs.begin(), jobs.end()}; + } + template LoadJobSet chainJobSet(int job_count, JobFunc job_func, std::string_view name_prefix = "job") { @@ -526,7 +545,7 @@ TEST(AsyncLoader, TestOverload) } } -TEST(AsyncLoader, RandomTasks) +TEST(AsyncLoader, RandomIndependentTasks) { AsyncLoaderTest t(16); t.loader.start(); @@ -546,3 +565,41 @@ TEST(AsyncLoader, RandomTasks) t.randomSleepUs(100, 900, 20); // avg=100us } } + +TEST(AsyncLoader, RandomDependentTasks) +{ + AsyncLoaderTest t(16); + t.loader.start(); + + std::mutex mutex; + std::condition_variable cv; + std::vector tasks; + std::vector all_jobs; + + auto job_func = [&] (const LoadJobPtr & self) + { + for (const auto & dep : self->dependencies) + ASSERT_EQ(dep->status(), LoadStatus::OK); + cv.notify_one(); + }; + + std::unique_lock lock{mutex}; + + int tasks_left = 1000; + while (tasks_left-- > 0) + { + cv.wait(lock, [&] { return t.loader.getScheduledJobCount() < 100; }); + + // Add one new task + int job_count = t.randomInt(1, 32); + LoadJobSet jobs = t.randomJobSet(job_count, 5, all_jobs, job_func); + all_jobs.insert(all_jobs.end(), jobs.begin(), jobs.end()); + tasks.push_back(t.loader.schedule(std::move(jobs))); + + // Cancel random old task + if (tasks.size() > 100) + tasks.erase(tasks.begin() + t.randomInt(0, tasks.size() - 1)); + } + + t.loader.wait(); +} From b86c7374509895d986d2d1c6587c62c8df20a233 Mon Sep 17 00:00:00 2001 From: serxa Date: Sat, 15 Apr 2023 14:22:01 +0000 Subject: [PATCH 042/689] make performance-inefficient-vector-operation,-warnings-as-errors happy --- src/Common/tests/gtest_async_loader.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index 208d5590f3e..ad1cb206b94 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -58,6 +58,7 @@ struct AsyncLoaderTest LoadJobSet randomJobSet(int job_count, int dep_probability_percent, JobFunc job_func, std::string_view name_prefix = "job") { std::vector jobs; + jobs.reserve(job_count); for (int j = 0; j < job_count; j++) { LoadJobSet deps; @@ -75,6 +76,7 @@ struct AsyncLoaderTest LoadJobSet randomJobSet(int job_count, int dep_probability_percent, std::vector external_deps, JobFunc job_func, std::string_view name_prefix = "job") { std::vector jobs; + jobs.reserve(job_count); for (int j = 0; j < job_count; j++) { LoadJobSet deps; @@ -94,6 +96,7 @@ struct AsyncLoaderTest LoadJobSet chainJobSet(int job_count, JobFunc job_func, std::string_view name_prefix = "job") { std::vector jobs; + jobs.reserve(job_count); jobs.push_back(makeLoadJob({}, fmt::format("{}{}", name_prefix, 0), job_func)); for (int j = 1; j < job_count; j++) jobs.push_back(makeLoadJob({ jobs[j - 1] }, fmt::format("{}{}", name_prefix, j), job_func)); @@ -158,6 +161,7 @@ TEST(AsyncLoader, CycleDetection) try { std::vector jobs; + jobs.reserve(16); jobs.push_back(makeLoadJob({}, "job0", job_func)); jobs.push_back(makeLoadJob({ jobs[0] }, "job1", job_func)); jobs.push_back(makeLoadJob({ jobs[0], jobs[1] }, "job2", job_func)); @@ -344,6 +348,7 @@ TEST(AsyncLoader, CancelExecutingTask) // Make several iterations to catch the race (if any) for (int iteration = 0; iteration < 10; iteration++) { std::vector task1_jobs; + task1_jobs.reserve(256); auto blocker_job = makeLoadJob({}, "blocker_job", blocker_job_func); task1_jobs.push_back(blocker_job); for (int i = 0; i < 100; i++) @@ -510,6 +515,7 @@ TEST(AsyncLoader, TestConcurrency) }; std::vector tasks; + tasks.reserve(concurrency); for (int i = 0; i < concurrency; i++) tasks.push_back(t.loader.schedule(t.chainJobSet(5, job_func))); t.loader.wait(); @@ -537,6 +543,7 @@ TEST(AsyncLoader, TestOverload) t.loader.stop(); std::vector tasks; + tasks.reserve(concurrency); for (int i = 0; i < concurrency; i++) tasks.push_back(t.loader.schedule(t.chainJobSet(5, job_func))); t.loader.start(); @@ -558,6 +565,7 @@ TEST(AsyncLoader, RandomIndependentTasks) }; std::vector tasks; + tasks.reserve(512); for (int i = 0; i < 512; i++) { int job_count = t.randomInt(1, 32); @@ -586,6 +594,7 @@ TEST(AsyncLoader, RandomDependentTasks) std::unique_lock lock{mutex}; int tasks_left = 1000; + tasks.reserve(tasks_left); while (tasks_left-- > 0) { cv.wait(lock, [&] { return t.loader.getScheduledJobCount() < 100; }); From 264956fecea261805c902ab1ba16c9da604c6259 Mon Sep 17 00:00:00 2001 From: serxa Date: Sat, 15 Apr 2023 19:48:10 +0000 Subject: [PATCH 043/689] add test for static priorities --- src/Common/AsyncLoader.h | 26 +++++++++++------- src/Common/tests/gtest_async_loader.cpp | 35 +++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 6d19ed05463..5fff5ee89c1 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -144,23 +144,31 @@ private: // Key of a pending job in ready queue struct ReadyKey { - ssize_t priority; - UInt64 ready_seqno; + ssize_t priority; // Ascending order + ssize_t initial_priority; // Ascending order + UInt64 ready_seqno; // Descending order bool operator<(const ReadyKey & rhs) const { - if (priority == rhs.priority) - return ready_seqno < rhs.ready_seqno; - return priority > rhs.priority; + if (priority > rhs.priority) + return true; + if (priority < rhs.priority) + return false; + if (initial_priority > rhs.initial_priority) + return true; + if (initial_priority < rhs.initial_priority) + return false; + return ready_seqno < rhs.ready_seqno; } }; // Scheduling information for a pending job struct Info { - ssize_t priority = 0; + ssize_t initial_priority = 0; // Initial priority passed into schedule() + ssize_t priority = 0; // Elevated priority, due to priority inheritance or prioritize() size_t dependencies_left = 0; - UInt64 ready_seqno = 0; // zero means that job is not in ready queue + UInt64 ready_seqno = 0; // Zero means that job is not in ready queue LoadJobSet dependent_jobs; // Three independent states of a non-finished jobs @@ -171,7 +179,7 @@ private: // Get key of a ready job ReadyKey key() const { - return {priority, ready_seqno}; + return {.priority = priority, .initial_priority = initial_priority, .ready_seqno = ready_seqno}; } }; @@ -311,7 +319,7 @@ public: { NOEXCEPT_SCOPE({ ALLOW_ALLOCATIONS_IN_SCOPE; - scheduled_jobs.emplace(job, Info{.priority = priority}); + scheduled_jobs.emplace(job, Info{.initial_priority = priority, .priority = priority}); }); job->priority.store(priority); // Set user-facing priority } diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index ad1cb206b94..a72cc0f994b 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -552,6 +552,41 @@ TEST(AsyncLoader, TestOverload) } } +TEST(AsyncLoader, StaticPriorities) +{ + AsyncLoaderTest t(1); + + std::string schedule; + + auto job_func = [&] (const LoadJobPtr & self) + { + schedule += fmt::format("{}{}", self->name, self->priority); + }; + + std::vector jobs; + jobs.push_back(makeLoadJob({}, "A", job_func)); // 0 + jobs.push_back(makeLoadJob({ jobs[0] }, "B", job_func)); // 1 + jobs.push_back(makeLoadJob({ jobs[0] }, "C", job_func)); // 2 + jobs.push_back(makeLoadJob({ jobs[0] }, "D", job_func)); // 3 + jobs.push_back(makeLoadJob({ jobs[0] }, "E", job_func)); // 4 + jobs.push_back(makeLoadJob({ jobs[3], jobs[4] }, "F", job_func)); // 5 + jobs.push_back(makeLoadJob({ jobs[5] }, "G", job_func)); // 6 + jobs.push_back(makeLoadJob({ jobs[6] }, "H", job_func)); // 7 + auto task = t.loader.schedule({ jobs[0] }, 0); + task.merge(t.loader.schedule({ jobs[1] }, 3)); + task.merge(t.loader.schedule({ jobs[2] }, 4)); + task.merge(t.loader.schedule({ jobs[3] }, 1)); + task.merge(t.loader.schedule({ jobs[4] }, 2)); + task.merge(t.loader.schedule({ jobs[5] }, 0)); + task.merge(t.loader.schedule({ jobs[6] }, 0)); + task.merge(t.loader.schedule({ jobs[7] }, 9)); + + t.loader.start(); + t.loader.wait(); + + ASSERT_EQ(schedule, "A9E9D9F9G9H9C4B3"); +} + TEST(AsyncLoader, RandomIndependentTasks) { AsyncLoaderTest t(16); From c736d9fd4fc4c79e6ea2f7c71ba0e8c7f437c878 Mon Sep 17 00:00:00 2001 From: serxa Date: Sun, 16 Apr 2023 08:35:59 +0000 Subject: [PATCH 044/689] fix test --- src/Common/tests/gtest_async_loader.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index a72cc0f994b..755f856d340 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -73,7 +73,7 @@ struct AsyncLoaderTest } template - LoadJobSet randomJobSet(int job_count, int dep_probability_percent, std::vector external_deps, JobFunc job_func, std::string_view name_prefix = "job") + LoadJobSet randomJobSet(int job_count, int dep_probability_percent, const std::vector & external_deps, JobFunc job_func, std::string_view name_prefix = "job") { std::vector jobs; jobs.reserve(job_count); @@ -85,7 +85,7 @@ struct AsyncLoaderTest if (randomInt(0, 99) < dep_probability_percent) deps.insert(jobs[d]); } - if (randomInt(0, 99) < dep_probability_percent) + if (!external_deps.empty() && randomInt(0, 99) < dep_probability_percent) deps.insert(external_deps[randomInt(0, external_deps.size() - 1)]); jobs.push_back(makeLoadJob(std::move(deps), fmt::format("{}{}", name_prefix, j), job_func)); } From 69719c0819c79a470ef08aa2e686a12330e0f945 Mon Sep 17 00:00:00 2001 From: serxa Date: Sun, 16 Apr 2023 11:04:59 +0000 Subject: [PATCH 045/689] allow to change max number of workers in runtime --- src/Common/AsyncLoader.h | 22 ++++++++--- src/Common/tests/gtest_async_loader.cpp | 49 +++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 5 deletions(-) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 5fff5ee89c1..d639c1031cd 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -120,8 +120,6 @@ private: finished.notify_all(); } - // TODO(serxa): add callback/status for cancel? - std::function func; mutable std::mutex mutex; @@ -243,6 +241,7 @@ public: } // Do not track jobs in this task + // WARNING: Jobs will never be removed() and are going to be stored as finished_jobs until ~AsyncLoader() void detach() { loader = nullptr; @@ -451,10 +450,23 @@ public: finished_jobs.erase(job); } + void setMaxThreads(size_t value) + { + std::unique_lock lock{mutex}; + pool.setMaxThreads(value); + pool.setMaxFreeThreads(value); + pool.setQueueSize(value); + max_threads = value; + if (!is_running) + return; + for (size_t i = 0; workers < max_threads && i < ready_queue.size(); i++) + spawn(lock); + } + size_t getMaxThreads() const { std::unique_lock lock{mutex}; - return pool.getMaxThreads(); + return max_threads; } size_t getScheduledJobCount() const @@ -600,7 +612,7 @@ private: ready_queue.emplace(info.key(), job); }); - if (is_running && workers < max_threads) // TODO(serxa): Can we make max_thread changeable in runtime? + if (is_running && workers < max_threads) spawn(lock); } @@ -633,7 +645,7 @@ private: else if (job) finish(lock, job, LoadStatus::OK); - if (!is_running || ready_queue.empty()) + if (!is_running || ready_queue.empty() || workers > max_threads) { workers--; return; diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index 755f856d340..52cc3436595 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -647,3 +647,52 @@ TEST(AsyncLoader, RandomDependentTasks) t.loader.wait(); } + +TEST(AsyncLoader, SetMaxThreads) +{ + AsyncLoaderTest t(1); + + std::atomic sync_index{0}; + std::atomic executing{0}; + int max_threads_values[] = {1, 2, 3, 4, 5, 4, 3, 2, 1, 5, 10, 5, 1, 20, 1}; + std::vector>> syncs; + syncs.reserve(std::size(max_threads_values)); + for (int max_threads : max_threads_values) + syncs.push_back(std::make_unique>(max_threads + 1)); + + + auto job_func = [&] (const LoadJobPtr &) + { + int idx = sync_index; + if (idx < syncs.size()) + { + executing++; + syncs[idx]->arrive_and_wait(); // (A) + executing--; + syncs[idx]->arrive_and_wait(); // (B) + } + }; + + // Generate enough independent jobs + for (int i = 0; i < 1000; i++) + t.loader.schedule({makeLoadJob({}, "job", job_func)}).detach(); + + t.loader.start(); + while (sync_index < syncs.size()) + { + // Wait for `max_threads` jobs to start executing + int idx = sync_index; + while (executing.load() != max_threads_values[idx]) + { + ASSERT_LE(executing, max_threads_values[idx]); + std::this_thread::yield(); + } + + // Allow all jobs to finish + syncs[idx]->arrive_and_wait(); // (A) + sync_index++; + if (sync_index < syncs.size()) + t.loader.setMaxThreads(max_threads_values[sync_index]); + syncs[idx]->arrive_and_wait(); // (B) this sync point is required to allow `executing` value to go back down to zero after we change number of workers + } +} From 800efd5df82491ed0e6decfe72cdff5670c5ac0e Mon Sep 17 00:00:00 2001 From: serxa Date: Sun, 16 Apr 2023 12:07:46 +0000 Subject: [PATCH 046/689] add a good comment --- src/Common/AsyncLoader.h | 42 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index d639c1031cd..9e54b3ae75f 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -135,7 +135,45 @@ LoadJobPtr makeLoadJob(LoadJobSet && dependencies, const String & name, Func && return std::make_shared(std::move(dependencies), name, std::forward(func)); } -// TODO(serxa): write good comment +// `AsyncLoader` is a scheduler for DAG of `LoadJob`s. It tracks dependencies and priorities of jobs. +// Basic usage example: +// auto job_func = [&] (const LoadJobPtr & self) { +// LOG_TRACE(log, "Executing load job '{}' with priority '{}'", self->name, self->priority); +// }; +// auto job1 = makeLoadJob({}, "job1", job_func); +// auto job2 = makeLoadJob({ job1 }, "job2", job_func); +// auto job3 = makeLoadJob({ job1 }, "job3", job_func); +// auto task = async_loader.schedule({ job1, job2, job3 }, /* priority = */ 0); +// Here we have created and scheduled a task consisting of two jobs. Job1 has no dependencies and is run first. +// Job2 and job3 depends on job1 and are run only after job1 completion. Another thread may prioritize a job and wait for it: +// async_loader->prioritize(job3, 1); // higher priority jobs are run first +// job3->wait(); // blocks until job completion or cancelation and rethrow an exception (if any) +// +// AsyncLoader tracks state of all scheduled jobs. Job lifecycle is the following: +// 1) Job is constructed with PENDING status. +// 2) Job is scheduled and placed into a task. Scheduled job may be ready (i.e. have all its dependencies finished) or blocked. +// 3a) When all dependencies are successfully executed, job became ready. Ready job is enqueued into the ready queue. +// 3b) If at least one of job dependencies is failed or canceled, then this job is canceled (with all it's dependent jobs as well). +// On cancelation an ASYNC_LOAD_CANCELED exception is generated and saved inside LoadJob object. Job status is changed to CANCELED. +// Exception is rethrown by any existing or new `wait()` call. Job is moved to the set of finished jobs. +// 4) Scheduled pending ready job starts execution by a worker. Job is dequeuedCallback `job_func` is called. +// Status of an executing job is PENDING. And it is still considered as scheduled job by AsyncLoader. +// Note that `job_func` of a CANCELED job is never executed. +// 5a) On successful execution job status is changed to OK and all existing and new `wait()` calls finish w/o exceptions. +// 5b) Any exception thrown out of `job_func` is wrapped into ASYNC_LOAD_FAILED exception and save inside LoadJob. +// Job status is changed to FAILED. All dependent jobs are canceled. The exception is rethrown from all existing and new `wait()` calls. +// 6) Job is no longer considered as scheduled and is instead moved to finished jobs set. This is required for introspection of finished jobs. +// 7) Task object containing this job is destructed or `remove()` is explicitly called. Job is removed from the finished job set. +// 8) Job is destructed. +// +// Every job has a priority associated with it. AsyncLoader runs higher priority (greater `priority` value) jobs first. Job priority can be elevated +// (a) if either it has a dependent job with higher priority (in this case priority of a dependent job is inherited); +// (b) or job was explicitly prioritized by `prioritize(job, higher_priority)` call (this also leads to a priority inheritance for all the dependencies). +// Note that to avoid priority inversion `job_func` should use `self->priority` to schedule new jobs in AsyncLoader or any other pool. +// Value stored in load job priority field is atomic and can be increased even during job execution. +// +// When task is scheduled. It can contain dependencies on previously scheduled jobs. These jobs can have any status. +// The only forbidden thing is a dependency on job, that was not scheduled in AsyncLoader yet: all dependent jobs are immediately canceled. class AsyncLoader : private boost::noncopyable { private: @@ -685,8 +723,6 @@ private: mutable std::mutex mutex; bool is_running = false; - // TODO(serxa): add metrics for number of jobs in every state - // Full set of scheduled pending jobs along with scheduling info std::unordered_map scheduled_jobs; From 1a3780ed8e6fa0b0ce0a701059bc54e7d2b03d5f Mon Sep 17 00:00:00 2001 From: serxa Date: Sun, 16 Apr 2023 12:13:23 +0000 Subject: [PATCH 047/689] fix --- src/Common/AsyncLoader.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 9e54b3ae75f..aca35b599f4 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -172,7 +172,7 @@ LoadJobPtr makeLoadJob(LoadJobSet && dependencies, const String & name, Func && // Note that to avoid priority inversion `job_func` should use `self->priority` to schedule new jobs in AsyncLoader or any other pool. // Value stored in load job priority field is atomic and can be increased even during job execution. // -// When task is scheduled. It can contain dependencies on previously scheduled jobs. These jobs can have any status. +// When task is scheduled it can contain dependencies on previously scheduled jobs. These jobs can have any status. // The only forbidden thing is a dependency on job, that was not scheduled in AsyncLoader yet: all dependent jobs are immediately canceled. class AsyncLoader : private boost::noncopyable { From 281b72c4131015a7e43340ac2ae2d78b327ba46e Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Sun, 16 Apr 2023 15:09:44 +0200 Subject: [PATCH 048/689] Update src/Common/AsyncLoader.h --- src/Common/AsyncLoader.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index aca35b599f4..1423b7f83c7 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -154,7 +154,7 @@ LoadJobPtr makeLoadJob(LoadJobSet && dependencies, const String & name, Func && // 2) Job is scheduled and placed into a task. Scheduled job may be ready (i.e. have all its dependencies finished) or blocked. // 3a) When all dependencies are successfully executed, job became ready. Ready job is enqueued into the ready queue. // 3b) If at least one of job dependencies is failed or canceled, then this job is canceled (with all it's dependent jobs as well). -// On cancelation an ASYNC_LOAD_CANCELED exception is generated and saved inside LoadJob object. Job status is changed to CANCELED. +// On cancellation an ASYNC_LOAD_CANCELED exception is generated and saved inside LoadJob object. Job status is changed to CANCELED. // Exception is rethrown by any existing or new `wait()` call. Job is moved to the set of finished jobs. // 4) Scheduled pending ready job starts execution by a worker. Job is dequeuedCallback `job_func` is called. // Status of an executing job is PENDING. And it is still considered as scheduled job by AsyncLoader. From 1efacba196e282254c1e68e08f74783bfdb6f92c Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Sun, 16 Apr 2023 15:09:55 +0200 Subject: [PATCH 049/689] Update src/Common/AsyncLoader.h --- src/Common/AsyncLoader.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 1423b7f83c7..34de4fe3c8b 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -147,7 +147,7 @@ LoadJobPtr makeLoadJob(LoadJobSet && dependencies, const String & name, Func && // Here we have created and scheduled a task consisting of two jobs. Job1 has no dependencies and is run first. // Job2 and job3 depends on job1 and are run only after job1 completion. Another thread may prioritize a job and wait for it: // async_loader->prioritize(job3, 1); // higher priority jobs are run first -// job3->wait(); // blocks until job completion or cancelation and rethrow an exception (if any) +// job3->wait(); // blocks until job completion or cancellation and rethrow an exception (if any) // // AsyncLoader tracks state of all scheduled jobs. Job lifecycle is the following: // 1) Job is constructed with PENDING status. From 356b93483591df41ab9fa2dfd82b98a91c26bd27 Mon Sep 17 00:00:00 2001 From: serxa Date: Sun, 16 Apr 2023 18:17:11 +0000 Subject: [PATCH 050/689] split AsyncLoader into .h and .cpp files + more docs --- src/Common/AsyncLoader.cpp | 560 +++++++++++++++++++++ src/Common/AsyncLoader.h | 632 +++--------------------- src/Common/tests/gtest_async_loader.cpp | 4 +- 3 files changed, 641 insertions(+), 555 deletions(-) create mode 100644 src/Common/AsyncLoader.cpp diff --git a/src/Common/AsyncLoader.cpp b/src/Common/AsyncLoader.cpp new file mode 100644 index 00000000000..09dd4f129ae --- /dev/null +++ b/src/Common/AsyncLoader.cpp @@ -0,0 +1,560 @@ +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ASYNC_LOAD_SCHEDULE_FAILED; + extern const int ASYNC_LOAD_FAILED; + extern const int ASYNC_LOAD_CANCELED; +} + + +LoadStatus LoadJob::status() const +{ + std::unique_lock lock{mutex}; + return load_status; +} + +std::exception_ptr LoadJob::exception() const +{ + std::unique_lock lock{mutex}; + return load_exception; +} + +ssize_t LoadJob::priority() const +{ + return load_priority; +} + +void LoadJob::wait() const +{ + std::unique_lock lock{mutex}; + waiters++; + finished.wait(lock, [this] { return load_status != LoadStatus::PENDING; }); + waiters--; + if (load_exception) + std::rethrow_exception(load_exception); +} + +void LoadJob::waitNoThrow() const noexcept +{ + std::unique_lock lock{mutex}; + waiters++; + finished.wait(lock, [this] { return load_status != LoadStatus::PENDING; }); + waiters--; +} + +size_t LoadJob::waiters_count() const +{ + std::unique_lock lock{mutex}; + return waiters; +} + +void LoadJob::ok() +{ + std::unique_lock lock{mutex}; + load_status = LoadStatus::OK; + if (waiters > 0) + finished.notify_all(); +} + +void LoadJob::failed(const std::exception_ptr & ptr) +{ + std::unique_lock lock{mutex}; + load_status = LoadStatus::FAILED; + load_exception = ptr; + if (waiters > 0) + finished.notify_all(); +} + +void LoadJob::canceled(const std::exception_ptr & ptr) +{ + std::unique_lock lock{mutex}; + load_status = LoadStatus::CANCELED; + load_exception = ptr; + if (waiters > 0) + finished.notify_all(); +} + + +AsyncLoader::Task::Task() + : loader(nullptr) +{} + +AsyncLoader::Task::Task(AsyncLoader * loader_, LoadJobSet && jobs_) + : loader(loader_) + , jobs(std::move(jobs_)) +{} + +AsyncLoader::Task::Task(AsyncLoader::Task && o) noexcept + : loader(std::exchange(o.loader, nullptr)) + , jobs(std::move(o.jobs)) +{} + +AsyncLoader::Task::~Task() +{ + remove(); +} + +AsyncLoader::Task & AsyncLoader::Task::operator=(AsyncLoader::Task && o) noexcept +{ + loader = std::exchange(o.loader, nullptr); + jobs = std::move(o.jobs); + return *this; +} + +void AsyncLoader::Task::merge(AsyncLoader::Task && o) +{ + if (!loader) + { + *this = std::move(o); + } + else + { + chassert(loader == o.loader); + jobs.merge(o.jobs); + o.loader = nullptr; + } +} + +void AsyncLoader::Task::remove() +{ + if (loader) + { + loader->remove(jobs); + detach(); + } +} + +void AsyncLoader::Task::detach() +{ + loader = nullptr; + jobs.clear(); +} + +AsyncLoader::AsyncLoader(Metric metric_threads, Metric metric_active_threads, size_t max_threads_, bool log_failures_) + : log_failures(log_failures_) + , max_threads(max_threads_) + , pool(metric_threads, metric_active_threads, max_threads) +{} + +AsyncLoader::~AsyncLoader() +{ + stop(); +} + +void AsyncLoader::start() +{ + std::unique_lock lock{mutex}; + is_running = true; + for (size_t i = 0; workers < max_threads && i < ready_queue.size(); i++) + spawn(lock); +} + +void AsyncLoader::wait() +{ + pool.wait(); +} + +void AsyncLoader::stop() +{ + { + std::unique_lock lock{mutex}; + is_running = false; + // NOTE: there is no need to notify because workers never wait + } + pool.wait(); +} + +AsyncLoader::Task AsyncLoader::schedule(LoadJobSet && jobs, ssize_t priority) +{ + std::unique_lock lock{mutex}; + + // Sanity checks + for (const auto & job : jobs) + { + if (job->status() != LoadStatus::PENDING) + throw Exception(ErrorCodes::ASYNC_LOAD_SCHEDULE_FAILED, "Trying to schedule already finished load job '{}'", job->name); + if (scheduled_jobs.contains(job)) + throw Exception(ErrorCodes::ASYNC_LOAD_SCHEDULE_FAILED, "Load job '{}' has been already scheduled", job->name); + } + + // Ensure scheduled_jobs graph will have no cycles. The only way to get a cycle is to add a cycle, assuming old jobs cannot reference new ones. + checkCycle(jobs, lock); + + // We do not want any exception to be throws after this point, because the following code is not exception-safe + DENY_ALLOCATIONS_IN_SCOPE; + + // Schedule all incoming jobs + for (const auto & job : jobs) + { + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + scheduled_jobs.emplace(job, Info{.initial_priority = priority, .priority = priority}); + }); + job->load_priority.store(priority); // Set user-facing priority + } + + // Process dependencies on scheduled pending jobs + for (const auto & job : jobs) + { + Info & info = scheduled_jobs.find(job)->second; + for (const auto & dep : job->dependencies) + { + // Register every dependency on scheduled job with back-link to dependent job + if (auto dep_info = scheduled_jobs.find(dep); dep_info != scheduled_jobs.end()) + { + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + dep_info->second.dependent_jobs.insert(job); + }); + info.dependencies_left++; + + // Priority inheritance: prioritize deps to have at least given `priority` to avoid priority inversion + prioritize(dep, priority, lock); + } + } + + // Enqueue non-blocked jobs (w/o dependencies) to ready queue + if (!info.is_blocked()) + enqueue(info, job, lock); + } + + // Process dependencies on other jobs. It is done in a separate pass to facilitate propagation of cancel signals (if any). + for (const auto & job : jobs) + { + if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end()) + { + for (const auto & dep : job->dependencies) + { + if (scheduled_jobs.contains(dep)) + continue; // Skip dependencies on scheduled pending jobs (already processed) + LoadStatus dep_status = dep->status(); + if (dep_status == LoadStatus::OK) + continue; // Dependency on already successfully finished job -- it's okay. + + if (dep_status == LoadStatus::PENDING) + { + // Dependency on not scheduled pending job -- it's bad. + // Probably, there is an error in `jobs` set: not all jobs were passed to `schedule()` call. + // We are not going to run any dependent job, so cancel them all. + std::exception_ptr e; + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + e = std::make_exception_ptr(Exception(ErrorCodes::ASYNC_LOAD_CANCELED, + "Load job '{}' -> Load job '{}': not scheduled pending load job (it must be also scheduled), all dependent load jobs are canceled", + job->name, + dep->name)); + }); + finish(lock, job, LoadStatus::CANCELED, e); + break; // This job is now finished, stop its dependencies processing + } + if (dep_status == LoadStatus::FAILED || dep_status == LoadStatus::CANCELED) + { + // Dependency on already failed or canceled job -- it's okay. Cancel all dependent jobs. + std::exception_ptr e; + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + e = std::make_exception_ptr(Exception(ErrorCodes::ASYNC_LOAD_CANCELED, + "Load job '{}' -> {}", + job->name, + getExceptionMessage(dep->exception(), /* with_stack_trace = */ false))); + }); + finish(lock, job, LoadStatus::CANCELED, e); + break; // This job is now finished, stop its dependencies processing + } + } + } + else + { + // Job was already canceled on previous iteration of this cycle -- skip + } + } + + return Task(this, std::move(jobs)); +} + +void AsyncLoader::prioritize(const LoadJobPtr & job, ssize_t new_priority) +{ + DENY_ALLOCATIONS_IN_SCOPE; + std::unique_lock lock{mutex}; + prioritize(job, new_priority, lock); +} + +void AsyncLoader::remove(const LoadJobSet & jobs) +{ + DENY_ALLOCATIONS_IN_SCOPE; + std::unique_lock lock{mutex}; + // On the first pass: + // - cancel all not executing jobs to avoid races + // - do not wait executing jobs (otherwise, on unlock a worker could start executing a dependent job, that should be canceled) + for (const auto & job : jobs) + { + if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end()) + { + if (info->second.is_executing()) + continue; // Skip executing jobs on the first pass + std::exception_ptr e; + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + e = std::make_exception_ptr(Exception(ErrorCodes::ASYNC_LOAD_CANCELED, "Load job '{}' canceled", job->name)); + }); + finish(lock, job, LoadStatus::CANCELED, e); + } + } + // On the second pass wait for executing jobs to finish + for (const auto & job : jobs) + { + if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end()) + { + // Job is currently executing + chassert(info->second.is_executing()); + lock.unlock(); + job->waitNoThrow(); // Wait for job to finish + lock.lock(); + } + } + // On the third pass all jobs are finished - remove them all + // It is better to do it under one lock to avoid exposing intermediate states + for (const auto & job : jobs) + finished_jobs.erase(job); +} + +void AsyncLoader::setMaxThreads(size_t value) +{ + std::unique_lock lock{mutex}; + pool.setMaxThreads(value); + pool.setMaxFreeThreads(value); + pool.setQueueSize(value); + max_threads = value; + if (!is_running) + return; + for (size_t i = 0; workers < max_threads && i < ready_queue.size(); i++) + spawn(lock); +} + +size_t AsyncLoader::getMaxThreads() const +{ + std::unique_lock lock{mutex}; + return max_threads; +} + +size_t AsyncLoader::getScheduledJobCount() const +{ + std::unique_lock lock{mutex}; + return scheduled_jobs.size(); +} + +void AsyncLoader::checkCycle(const LoadJobSet & jobs, std::unique_lock & lock) +{ + LoadJobSet left = jobs; + LoadJobSet visited; + visited.reserve(left.size()); + while (!left.empty()) + { + LoadJobPtr job = *left.begin(); + checkCycleImpl(job, left, visited, lock); + } +} + +String AsyncLoader::checkCycleImpl(const LoadJobPtr & job, LoadJobSet & left, LoadJobSet & visited, std::unique_lock & lock) +{ + if (!left.contains(job)) + return {}; // Do not consider external dependencies and already processed jobs + if (auto [_, inserted] = visited.insert(job); !inserted) + { + visited.erase(job); // Mark where cycle ends + return job->name; + } + for (const auto & dep : job->dependencies) + { + if (auto chain = checkCycleImpl(dep, left, visited, lock); !chain.empty()) + { + if (!visited.contains(job)) // Check for cycle end + throw Exception(ErrorCodes::ASYNC_LOAD_SCHEDULE_FAILED, "Load job dependency cycle detected: {} -> {}", job->name, chain); + else + return fmt::format("{} -> {}", job->name, chain); // chain is not a cycle yet -- continue building + } + } + left.erase(job); + return {}; +} + +void AsyncLoader::finish(std::unique_lock & lock, const LoadJobPtr & job, LoadStatus status, std::exception_ptr exception_from_job) +{ + if (status == LoadStatus::OK) + { + // Notify waiters + job->ok(); + + // Update dependent jobs and enqueue if ready + chassert(scheduled_jobs.contains(job)); // Job was pending + for (const auto & dep : scheduled_jobs[job].dependent_jobs) + { + chassert(scheduled_jobs.contains(dep)); // All depended jobs must be pending + Info & dep_info = scheduled_jobs[dep]; + dep_info.dependencies_left--; + if (!dep_info.is_blocked()) + enqueue(dep_info, dep, lock); + } + } + else + { + // Notify waiters + if (status == LoadStatus::FAILED) + job->failed(exception_from_job); + else if (status == LoadStatus::CANCELED) + job->canceled(exception_from_job); + + chassert(scheduled_jobs.contains(job)); // Job was pending + Info & info = scheduled_jobs[job]; + if (info.is_ready()) + { + ready_queue.erase(info.key()); + info.ready_seqno = 0; + } + + // Recurse into all dependent jobs + LoadJobSet dependent; + dependent.swap(info.dependent_jobs); // To avoid container modification during recursion + for (const auto & dep : dependent) + { + std::exception_ptr e; + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + e = std::make_exception_ptr( + Exception(ErrorCodes::ASYNC_LOAD_CANCELED, + "Load job '{}' -> {}", + dep->name, + getExceptionMessage(exception_from_job, /* with_stack_trace = */ false))); + }); + finish(lock, dep, LoadStatus::CANCELED, e); + } + + // Clean dependency graph edges + for (const auto & dep : job->dependencies) + if (auto dep_info = scheduled_jobs.find(dep); dep_info != scheduled_jobs.end()) + dep_info->second.dependent_jobs.erase(job); + } + + // Job became finished + scheduled_jobs.erase(job); + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + finished_jobs.insert(job); + }); +} + +void AsyncLoader::prioritize(const LoadJobPtr & job, ssize_t new_priority, std::unique_lock & lock) +{ + if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end()) + { + if (info->second.priority >= new_priority) + return; // Never lower priority + + // Update priority and push job forward through ready queue if needed + if (info->second.ready_seqno) + ready_queue.erase(info->second.key()); + info->second.priority = new_priority; + job->load_priority.store(new_priority); // Set user-facing priority (may affect executing jobs) + if (info->second.ready_seqno) + { + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + ready_queue.emplace(info->second.key(), job); + }); + } + + // Recurse into dependencies + for (const auto & dep : job->dependencies) + prioritize(dep, new_priority, lock); + } +} + +void AsyncLoader::enqueue(Info & info, const LoadJobPtr & job, std::unique_lock & lock) +{ + chassert(!info.is_blocked()); + chassert(info.ready_seqno == 0); + info.ready_seqno = ++last_ready_seqno; + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + ready_queue.emplace(info.key(), job); + }); + + if (is_running && workers < max_threads) + spawn(lock); +} + +void AsyncLoader::spawn(std::unique_lock &) +{ + workers++; + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + pool.scheduleOrThrowOnError([this] { worker(); }); + }); +} + +void AsyncLoader::worker() +{ + DENY_ALLOCATIONS_IN_SCOPE; + + LoadJobPtr job; + std::exception_ptr exception_from_job; + while (true) + { + // This is inside the loop to also reset previous thread names set inside the jobs + setThreadName("AsyncLoader"); + + { + std::unique_lock lock{mutex}; + + // Handle just executed job + if (exception_from_job) + finish(lock, job, LoadStatus::FAILED, exception_from_job); + else if (job) + finish(lock, job, LoadStatus::OK); + + if (!is_running || ready_queue.empty() || workers > max_threads) + { + workers--; + return; + } + + // Take next job to be executed from the ready queue + auto it = ready_queue.begin(); + job = it->second; + ready_queue.erase(it); + scheduled_jobs.find(job)->second.ready_seqno = 0; // This job is no longer in the ready queue + } + + ALLOW_ALLOCATIONS_IN_SCOPE; + + try + { + job->func(job); + exception_from_job = {}; + } + catch (...) + { + NOEXCEPT_SCOPE({ + if (log_failures) + tryLogCurrentException(__PRETTY_FUNCTION__); + exception_from_job = std::make_exception_ptr( + Exception(ErrorCodes::ASYNC_LOAD_FAILED, + "Load job '{}' failed: {}", + job->name, + getCurrentExceptionMessage(/* with_stacktrace = */ true))); + }); + } + } +} + +} diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 34de4fe3c8b..bf35b5de736 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -8,14 +8,9 @@ #include #include #include -#include #include #include #include -#include -#include -#include -#include namespace DB { @@ -26,21 +21,18 @@ using LoadJobSet = std::unordered_set; class LoadTask; class AsyncLoader; -namespace ErrorCodes -{ - extern const int ASYNC_LOAD_SCHEDULE_FAILED; - extern const int ASYNC_LOAD_FAILED; - extern const int ASYNC_LOAD_CANCELED; -} - +// Execution status of a load job. enum class LoadStatus { - PENDING, // Load job is not started yet - OK, // Load job executed and was successful - FAILED, // Load job executed and failed - CANCELED // Load job is not going to be executed due to removal or dependency failure + PENDING, // Load job is not started yet. + OK, // Load job executed and was successful. + FAILED, // Load job executed and failed. + CANCELED // Load job is not going to be executed due to removal or dependency failure. }; +// Smallest indivisible part of a loading process. Load job can have multiple dependencies, thus jobs constitute a direct acyclic graph (DAG). +// Job encapsulates a function to be executed by `AsyncLoader` as soon as job functions of all dependencies are successfully executed. +// Job can be waited for by an arbitrary number of threads. See `AsyncLoader` class description for more details. class LoadJob : private boost::noncopyable { public: @@ -51,76 +43,35 @@ public: , func(std::forward(func_)) {} - LoadStatus status() const - { - std::unique_lock lock{mutex}; - return load_status; - } + // Current job status. + LoadStatus status() const; + std::exception_ptr exception() const; - std::exception_ptr exception() const - { - std::unique_lock lock{mutex}; - return load_exception; - } + // Returns current value of a priority of the job. May differ from initial priority passed to `AsyncLoader:::schedule()` call. + ssize_t priority() const; - void wait() const - { - std::unique_lock lock{mutex}; - waiters++; - finished.wait(lock, [this] { return load_status != LoadStatus::PENDING; }); - waiters--; - if (load_exception) - std::rethrow_exception(load_exception); - } + // Sync wait for a pending job to be finished: OK, FAILED or CANCELED status. + // Throws if job is FAILED or CANCELED. Returns or throws immediately on non-pending job. + void wait() const; - void waitNoThrow() const - { - std::unique_lock lock{mutex}; - waiters++; - finished.wait(lock, [this] { return load_status != LoadStatus::PENDING; }); - waiters--; - } + // Wait for a job to reach any non PENDING status. + void waitNoThrow() const noexcept; - size_t waiters_count() const - { - std::unique_lock lock{mutex}; - return waiters; - } + // Returns number of threads blocked by `wait()` or `waitNoThrow()` calls. + size_t waiters_count() const; - const LoadJobSet dependencies; // jobs to be done before this one (with ownership), it is `const` to make creation of cycles hard + const LoadJobSet dependencies; // Jobs to be done before this one (with ownership), it is `const` to make creation of cycles hard const String name; - std::atomic priority{0}; private: friend class AsyncLoader; - void ok() - { - std::unique_lock lock{mutex}; - load_status = LoadStatus::OK; - if (waiters > 0) - finished.notify_all(); - } - - void failed(const std::exception_ptr & ptr) - { - std::unique_lock lock{mutex}; - load_status = LoadStatus::FAILED; - load_exception = ptr; - if (waiters > 0) - finished.notify_all(); - } - - void canceled(const std::exception_ptr & ptr) - { - std::unique_lock lock{mutex}; - load_status = LoadStatus::CANCELED; - load_exception = ptr; - if (waiters > 0) - finished.notify_all(); - } + void ok(); + void failed(const std::exception_ptr & ptr); + void canceled(const std::exception_ptr & ptr); std::function func; + std::atomic load_priority{0}; mutable std::mutex mutex; mutable std::condition_variable finished; @@ -177,7 +128,7 @@ LoadJobPtr makeLoadJob(LoadJobSet && dependencies, const String & name, Func && class AsyncLoader : private boost::noncopyable { private: - // Key of a pending job in ready queue + // Key of a pending job in the ready queue. struct ReadyKey { ssize_t priority; // Ascending order @@ -198,14 +149,14 @@ private: } }; - // Scheduling information for a pending job + // Scheduling information for a pending job. struct Info { - ssize_t initial_priority = 0; // Initial priority passed into schedule() - ssize_t priority = 0; // Elevated priority, due to priority inheritance or prioritize() - size_t dependencies_left = 0; - UInt64 ready_seqno = 0; // Zero means that job is not in ready queue - LoadJobSet dependent_jobs; + ssize_t initial_priority = 0; // Initial priority passed into schedule(). + ssize_t priority = 0; // Elevated priority, due to priority inheritance or prioritize(). + size_t dependencies_left = 0; // Current number of dependencies on pending jobs. + UInt64 ready_seqno = 0; // Zero means that job is not in ready queue. + LoadJobSet dependent_jobs; // Set of jobs dependent on this job. // Three independent states of a non-finished jobs bool is_blocked() const { return dependencies_left > 0; } @@ -222,521 +173,96 @@ private: public: using Metric = CurrentMetrics::Metric; - // Helper class that removes all not started jobs in destructor and wait all executing jobs to finish + // Helper class that removes all not started and finished jobs in destructor and waits for all the executing jobs to finish. class Task { public: - Task() - : loader(nullptr) - {} - - Task(AsyncLoader * loader_, LoadJobSet && jobs_) - : loader(loader_) - , jobs(std::move(jobs_)) - {} - + Task(); + Task(AsyncLoader * loader_, LoadJobSet && jobs_); Task(const Task & o) = delete; + Task(Task && o) noexcept; Task & operator=(const Task & o) = delete; + ~Task(); + Task & operator=(Task && o) noexcept; - Task(Task && o) noexcept - : loader(std::exchange(o.loader, nullptr)) - , jobs(std::move(o.jobs)) - {} + // Merge all jobs from other task into this task. Useful for merging jobs with different priorities into one task. + void merge(Task && o); - Task & operator=(Task && o) noexcept - { - loader = std::exchange(o.loader, nullptr); - jobs = std::move(o.jobs); - return *this; - } + // Remove all jobs from AsyncLoader. + void remove(); - void merge(Task && o) - { - if (!loader) - { - *this = std::move(o); - } - else - { - chassert(loader == o.loader); - jobs.merge(o.jobs); - o.loader = nullptr; - } - } - - ~Task() - { - remove(); - } - - void remove() - { - if (loader) - { - loader->remove(jobs); - detach(); - } - } - - // Do not track jobs in this task - // WARNING: Jobs will never be removed() and are going to be stored as finished_jobs until ~AsyncLoader() - void detach() - { - loader = nullptr; - jobs.clear(); - } + // Do not track jobs in this task. + // WARNING: Jobs will never be removed() and are going to be stored as finished_jobs until ~AsyncLoader(). + void detach(); private: AsyncLoader * loader; LoadJobSet jobs; }; - AsyncLoader(Metric metric_threads, Metric metric_active_threads, size_t max_threads_, bool log_failures_) - : log_failures(log_failures_) - , max_threads(max_threads_) - , pool(metric_threads, metric_active_threads, max_threads) - {} + AsyncLoader(Metric metric_threads, Metric metric_active_threads, size_t max_threads_, bool log_failures_); - // WARNING: all Task instances returned by `schedule()` should be destructed before AsyncLoader - ~AsyncLoader() - { - stop(); - } + // WARNING: all Task instances returned by `schedule()` should be destructed before AsyncLoader. + ~AsyncLoader(); - // Start workers to execute scheduled load jobs - void start() - { - std::unique_lock lock{mutex}; - is_running = true; - for (size_t i = 0; workers < max_threads && i < ready_queue.size(); i++) - spawn(lock); - } + // Start workers to execute scheduled load jobs. + void start(); // Wait for all load jobs to finish, including all new jobs. So at first take care to stop adding new jobs. - void wait() - { - pool.wait(); - } + void wait(); // Wait for currently executing jobs to finish, but do not run any other pending jobs. // Not finished jobs are left in pending state: // - they can be resumed by calling start() again; // - or canceled using ~Task() or remove() later. - void stop() - { - { - std::unique_lock lock{mutex}; - is_running = false; - // NOTE: there is no need to notify because workers never wait - } - pool.wait(); - } + void stop(); - [[nodiscard]] Task schedule(LoadJobSet && jobs, ssize_t priority = 0) - { - std::unique_lock lock{mutex}; - - // Sanity checks - for (const auto & job : jobs) - { - if (job->status() != LoadStatus::PENDING) - throw Exception(ErrorCodes::ASYNC_LOAD_SCHEDULE_FAILED, "Trying to schedule already finished load job '{}'", job->name); - if (scheduled_jobs.contains(job)) - throw Exception(ErrorCodes::ASYNC_LOAD_SCHEDULE_FAILED, "Load job '{}' has been already scheduled", job->name); - } - - // Ensure scheduled_jobs graph will have no cycles. The only way to get a cycle is to add a cycle, assuming old jobs cannot reference new ones. - checkCycle(jobs, lock); - - // We do not want any exception to be throws after this point, because the following code is not exception-safe - DENY_ALLOCATIONS_IN_SCOPE; - - // Schedule all incoming jobs - for (const auto & job : jobs) - { - NOEXCEPT_SCOPE({ - ALLOW_ALLOCATIONS_IN_SCOPE; - scheduled_jobs.emplace(job, Info{.initial_priority = priority, .priority = priority}); - }); - job->priority.store(priority); // Set user-facing priority - } - - // Process dependencies on scheduled pending jobs - for (const auto & job : jobs) - { - Info & info = scheduled_jobs.find(job)->second; - for (const auto & dep : job->dependencies) - { - // Register every dependency on scheduled job with back-link to dependent job - if (auto dep_info = scheduled_jobs.find(dep); dep_info != scheduled_jobs.end()) - { - NOEXCEPT_SCOPE({ - ALLOW_ALLOCATIONS_IN_SCOPE; - dep_info->second.dependent_jobs.insert(job); - }); - info.dependencies_left++; - - // Priority inheritance: prioritize deps to have at least given `priority` to avoid priority inversion - prioritize(dep, priority, lock); - } - } - - // Enqueue non-blocked jobs (w/o dependencies) to ready queue - if (!info.is_blocked()) - enqueue(info, job, lock); - } - - // Process dependencies on other jobs. It is done in a separate pass to facilitate propagation of cancel signals (if any). - for (const auto & job : jobs) - { - if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end()) - { - for (const auto & dep : job->dependencies) - { - if (scheduled_jobs.contains(dep)) - continue; // Skip dependencies on scheduled pending jobs (already processed) - LoadStatus dep_status = dep->status(); - if (dep_status == LoadStatus::OK) - continue; // Dependency on already successfully finished job -- it's okay. - - if (dep_status == LoadStatus::PENDING) - { - // Dependency on not scheduled pending job -- it's bad. - // Probably, there is an error in `jobs` set: not all jobs were passed to `schedule()` call. - // We are not going to run any dependent job, so cancel them all. - std::exception_ptr e; - NOEXCEPT_SCOPE({ - ALLOW_ALLOCATIONS_IN_SCOPE; - e = std::make_exception_ptr(Exception(ErrorCodes::ASYNC_LOAD_CANCELED, - "Load job '{}' -> Load job '{}': not scheduled pending load job (it must be also scheduled), all dependent load jobs are canceled", - job->name, - dep->name)); - }); - finish(lock, job, LoadStatus::CANCELED, e); - break; // This job is now finished, stop its dependencies processing - } - if (dep_status == LoadStatus::FAILED || dep_status == LoadStatus::CANCELED) - { - // Dependency on already failed or canceled job -- it's okay. Cancel all dependent jobs. - std::exception_ptr e; - NOEXCEPT_SCOPE({ - ALLOW_ALLOCATIONS_IN_SCOPE; - e = std::make_exception_ptr(Exception(ErrorCodes::ASYNC_LOAD_CANCELED, - "Load job '{}' -> {}", - job->name, - getExceptionMessage(dep->exception(), /* with_stack_trace = */ false))); - }); - finish(lock, job, LoadStatus::CANCELED, e); - break; // This job is now finished, stop its dependencies processing - } - } - } - else - { - // Job was already canceled on previous iteration of this cycle -- skip - } - } - - return Task(this, std::move(jobs)); - } + // Schedule all `jobs` with given `priority` and return task containing these jobs. + // Higher priority jobs (with greater `priority` value) are executed earlier. + // All dependencies of a scheduled job inherit its priority if it is higher. This way higher priority job + // never wait for (blocked by) lower priority jobs (no priority inversion). + [[nodiscard]] Task schedule(LoadJobSet && jobs, ssize_t priority = 0); // Increase priority of a job and all its dependencies recursively - void prioritize(const LoadJobPtr & job, ssize_t new_priority) - { - DENY_ALLOCATIONS_IN_SCOPE; - std::unique_lock lock{mutex}; - prioritize(job, new_priority, lock); - } + void prioritize(const LoadJobPtr & job, ssize_t new_priority); - // Remove finished jobs, cancel scheduled jobs, wait for executing jobs to finish and remove them - void remove(const LoadJobSet & jobs) - { - DENY_ALLOCATIONS_IN_SCOPE; - std::unique_lock lock{mutex}; - // On the first pass: - // - cancel all not executing jobs to avoid races - // - do not wait executing jobs (otherwise, on unlock a worker could start executing a dependent job, that should be canceled) - for (const auto & job : jobs) - { - if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end()) - { - if (info->second.is_executing()) - continue; // Skip executing jobs on the first pass - std::exception_ptr e; - NOEXCEPT_SCOPE({ - ALLOW_ALLOCATIONS_IN_SCOPE; - e = std::make_exception_ptr(Exception(ErrorCodes::ASYNC_LOAD_CANCELED, "Load job '{}' canceled", job->name)); - }); - finish(lock, job, LoadStatus::CANCELED, e); - } - } - // On the second pass wait for executing jobs to finish - for (const auto & job : jobs) - { - if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end()) - { - // Job is currently executing - chassert(info->second.is_executing()); - lock.unlock(); - job->waitNoThrow(); // Wait for job to finish - lock.lock(); - } - } - // On the third pass all jobs are finished - remove them all - // It is better to do it under one lock to avoid exposing intermediate states - for (const auto & job : jobs) - finished_jobs.erase(job); - } + // Remove finished jobs, cancel scheduled jobs, wait for executing jobs to finish and remove them. + void remove(const LoadJobSet & jobs); - void setMaxThreads(size_t value) - { - std::unique_lock lock{mutex}; - pool.setMaxThreads(value); - pool.setMaxFreeThreads(value); - pool.setQueueSize(value); - max_threads = value; - if (!is_running) - return; - for (size_t i = 0; workers < max_threads && i < ready_queue.size(); i++) - spawn(lock); - } + // Increase or decrease maximum number of simultaneously executing jobs. + void setMaxThreads(size_t value); - size_t getMaxThreads() const - { - std::unique_lock lock{mutex}; - return max_threads; - } - - size_t getScheduledJobCount() const - { - std::unique_lock lock{mutex}; - return scheduled_jobs.size(); - } + size_t getMaxThreads() const; + size_t getScheduledJobCount() const; private: - void checkCycle(const LoadJobSet & jobs, std::unique_lock & lock) - { - LoadJobSet left = jobs; - LoadJobSet visited; - visited.reserve(left.size()); - while (!left.empty()) - { - LoadJobPtr job = *left.begin(); - checkCycleImpl(job, left, visited, lock); - } - } + void checkCycle(const LoadJobSet & jobs, std::unique_lock & lock); + String checkCycleImpl(const LoadJobPtr & job, LoadJobSet & left, LoadJobSet & visited, std::unique_lock & lock); + void finish(std::unique_lock & lock, const LoadJobPtr & job, LoadStatus status, std::exception_ptr exception_from_job = {}); + void prioritize(const LoadJobPtr & job, ssize_t new_priority, std::unique_lock & lock); + void enqueue(Info & info, const LoadJobPtr & job, std::unique_lock & lock); + void spawn(std::unique_lock &); + void worker(); - String checkCycleImpl(const LoadJobPtr & job, LoadJobSet & left, LoadJobSet & visited, std::unique_lock & lock) - { - if (!left.contains(job)) - return {}; // Do not consider external dependencies and already processed jobs - if (auto [_, inserted] = visited.insert(job); !inserted) - { - visited.erase(job); // Mark where cycle ends - return job->name; - } - for (const auto & dep : job->dependencies) - { - if (auto chain = checkCycleImpl(dep, left, visited, lock); !chain.empty()) - { - if (!visited.contains(job)) // Check for cycle end - throw Exception(ErrorCodes::ASYNC_LOAD_SCHEDULE_FAILED, "Load job dependency cycle detected: {} -> {}", job->name, chain); - else - return fmt::format("{} -> {}", job->name, chain); // chain is not a cycle yet -- continue building - } - } - left.erase(job); - return {}; - } + const bool log_failures; // Worker should log all exceptions caught from job functions. - void finish(std::unique_lock & lock, const LoadJobPtr & job, LoadStatus status, std::exception_ptr exception_from_job = {}) - { - if (status == LoadStatus::OK) - { - // Notify waiters - job->ok(); - - // Update dependent jobs and enqueue if ready - chassert(scheduled_jobs.contains(job)); // Job was pending - for (const auto & dep : scheduled_jobs[job].dependent_jobs) - { - chassert(scheduled_jobs.contains(dep)); // All depended jobs must be pending - Info & dep_info = scheduled_jobs[dep]; - dep_info.dependencies_left--; - if (!dep_info.is_blocked()) - enqueue(dep_info, dep, lock); - } - } - else - { - // Notify waiters - if (status == LoadStatus::FAILED) - job->failed(exception_from_job); - else if (status == LoadStatus::CANCELED) - job->canceled(exception_from_job); - - chassert(scheduled_jobs.contains(job)); // Job was pending - Info & info = scheduled_jobs[job]; - if (info.is_ready()) - { - ready_queue.erase(info.key()); - info.ready_seqno = 0; - } - - // Recurse into all dependent jobs - LoadJobSet dependent; - dependent.swap(info.dependent_jobs); // To avoid container modification during recursion - for (const auto & dep : dependent) - { - std::exception_ptr e; - NOEXCEPT_SCOPE({ - ALLOW_ALLOCATIONS_IN_SCOPE; - e = std::make_exception_ptr( - Exception(ErrorCodes::ASYNC_LOAD_CANCELED, - "Load job '{}' -> {}", - dep->name, - getExceptionMessage(exception_from_job, /* with_stack_trace = */ false))); - }); - finish(lock, dep, LoadStatus::CANCELED, e); - } - - // Clean dependency graph edges - for (const auto & dep : job->dependencies) - if (auto dep_info = scheduled_jobs.find(dep); dep_info != scheduled_jobs.end()) - dep_info->second.dependent_jobs.erase(job); - } - - // Job became finished - scheduled_jobs.erase(job); - NOEXCEPT_SCOPE({ - ALLOW_ALLOCATIONS_IN_SCOPE; - finished_jobs.insert(job); - }); - } - - void prioritize(const LoadJobPtr & job, ssize_t new_priority, std::unique_lock & lock) - { - if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end()) - { - if (info->second.priority >= new_priority) - return; // Never lower priority - - // Update priority and push job forward through ready queue if needed - if (info->second.ready_seqno) - ready_queue.erase(info->second.key()); - info->second.priority = new_priority; - job->priority.store(new_priority); // Set user-facing priority (may affect executing jobs) - if (info->second.ready_seqno) - { - NOEXCEPT_SCOPE({ - ALLOW_ALLOCATIONS_IN_SCOPE; - ready_queue.emplace(info->second.key(), job); - }); - } - - // Recurse into dependencies - for (const auto & dep : job->dependencies) - prioritize(dep, new_priority, lock); - } - } - - void enqueue(Info & info, const LoadJobPtr & job, std::unique_lock & lock) - { - chassert(!info.is_blocked()); - chassert(info.ready_seqno == 0); - info.ready_seqno = ++last_ready_seqno; - NOEXCEPT_SCOPE({ - ALLOW_ALLOCATIONS_IN_SCOPE; - ready_queue.emplace(info.key(), job); - }); - - if (is_running && workers < max_threads) - spawn(lock); - } - - void spawn(std::unique_lock &) - { - workers++; - NOEXCEPT_SCOPE({ - ALLOW_ALLOCATIONS_IN_SCOPE; - pool.scheduleOrThrowOnError([this] { worker(); }); - }); - } - - void worker() - { - DENY_ALLOCATIONS_IN_SCOPE; - - LoadJobPtr job; - std::exception_ptr exception_from_job; - while (true) - { - /// This is inside the loop to also reset previous thread names set inside the jobs - setThreadName("AsyncLoader"); - - { - std::unique_lock lock{mutex}; - - // Handle just executed job - if (exception_from_job) - finish(lock, job, LoadStatus::FAILED, exception_from_job); - else if (job) - finish(lock, job, LoadStatus::OK); - - if (!is_running || ready_queue.empty() || workers > max_threads) - { - workers--; - return; - } - - // Take next job to be executed from the ready queue - auto it = ready_queue.begin(); - job = it->second; - ready_queue.erase(it); - scheduled_jobs.find(job)->second.ready_seqno = 0; // This job is no longer in the ready queue - } - - ALLOW_ALLOCATIONS_IN_SCOPE; - - try - { - job->func(job); - exception_from_job = {}; - } - catch (...) - { - NOEXCEPT_SCOPE({ - if (log_failures) - tryLogCurrentException(__PRETTY_FUNCTION__); - exception_from_job = std::make_exception_ptr( - Exception(ErrorCodes::ASYNC_LOAD_FAILED, - "Load job '{}' failed: {}", - job->name, - getCurrentExceptionMessage(/* with_stacktrace = */ true))); - }); - } - } - } - - const bool log_failures; - - mutable std::mutex mutex; + mutable std::mutex mutex; // Guards all the fields below. bool is_running = false; - // Full set of scheduled pending jobs along with scheduling info + // Full set of scheduled pending jobs along with scheduling info. std::unordered_map scheduled_jobs; - // Subset of scheduled pending jobs with resolved dependencies (waiting for a thread to be executed) - // Represent a queue of jobs in order of decreasing priority and FIFO for jobs with equal priorities + // Subset of scheduled pending non-blocked jobs (waiting for a worker to be executed). + // Represent a queue of jobs in order of decreasing priority and FIFO for jobs with equal priorities. std::map ready_queue; - // Set of finished jobs (for introspection only, until job is removed) + // Set of finished jobs (for introspection only, until jobs are removed). LoadJobSet finished_jobs; - // Increasing counter for ReadyKey assignment (to preserve FIFO order of jobs with equal priority) + // Increasing counter for `ReadyKey` assignment (to preserve FIFO order of the jobs with equal priorities). UInt64 last_ready_seqno = 0; - // For executing jobs. Note that we avoid using an internal queue of the pool to be able to prioritize jobs + // For executing jobs. Note that we avoid using an internal queue of the pool to be able to prioritize jobs. size_t max_threads; size_t workers = 0; ThreadPool pool; diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index 52cc3436595..e6baa87c182 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -115,7 +115,7 @@ TEST(AsyncLoader, Smoke) auto job_func = [&] (const LoadJobPtr & self) { jobs_done++; - if (self->priority == low_priority) + if (self->priority() == low_priority) low_priority_jobs_done++; }; @@ -560,7 +560,7 @@ TEST(AsyncLoader, StaticPriorities) auto job_func = [&] (const LoadJobPtr & self) { - schedule += fmt::format("{}{}", self->name, self->priority); + schedule += fmt::format("{}{}", self->name, self->priority()); }; std::vector jobs; From 0d7e3d410075560c22dd53a2d20b39d8f602428b Mon Sep 17 00:00:00 2001 From: serxa Date: Sun, 16 Apr 2023 18:18:50 +0000 Subject: [PATCH 051/689] fix comment --- src/Common/AsyncLoader.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index bf35b5de736..4b05dd40bf1 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -89,7 +89,7 @@ LoadJobPtr makeLoadJob(LoadJobSet && dependencies, const String & name, Func && // `AsyncLoader` is a scheduler for DAG of `LoadJob`s. It tracks dependencies and priorities of jobs. // Basic usage example: // auto job_func = [&] (const LoadJobPtr & self) { -// LOG_TRACE(log, "Executing load job '{}' with priority '{}'", self->name, self->priority); +// LOG_TRACE(log, "Executing load job '{}' with priority '{}'", self->name, self->priority()); // }; // auto job1 = makeLoadJob({}, "job1", job_func); // auto job2 = makeLoadJob({ job1 }, "job2", job_func); @@ -120,7 +120,7 @@ LoadJobPtr makeLoadJob(LoadJobSet && dependencies, const String & name, Func && // Every job has a priority associated with it. AsyncLoader runs higher priority (greater `priority` value) jobs first. Job priority can be elevated // (a) if either it has a dependent job with higher priority (in this case priority of a dependent job is inherited); // (b) or job was explicitly prioritized by `prioritize(job, higher_priority)` call (this also leads to a priority inheritance for all the dependencies). -// Note that to avoid priority inversion `job_func` should use `self->priority` to schedule new jobs in AsyncLoader or any other pool. +// Note that to avoid priority inversion `job_func` should use `self->priority()` to schedule new jobs in AsyncLoader or any other pool. // Value stored in load job priority field is atomic and can be increased even during job execution. // // When task is scheduled it can contain dependencies on previously scheduled jobs. These jobs can have any status. From 911c46aa3298d7ee32fd745e56261924ec67cf11 Mon Sep 17 00:00:00 2001 From: serxa Date: Sun, 16 Apr 2023 18:40:24 +0000 Subject: [PATCH 052/689] fix double finish() during cancellation of a job with multiple paths --- src/Common/AsyncLoader.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Common/AsyncLoader.cpp b/src/Common/AsyncLoader.cpp index 09dd4f129ae..8dbfe27e5cc 100644 --- a/src/Common/AsyncLoader.cpp +++ b/src/Common/AsyncLoader.cpp @@ -427,6 +427,8 @@ void AsyncLoader::finish(std::unique_lock & lock, const LoadJobPtr & dependent.swap(info.dependent_jobs); // To avoid container modification during recursion for (const auto & dep : dependent) { + if (!scheduled_jobs.contains(dep)) + continue; // Job has already been canceled std::exception_ptr e; NOEXCEPT_SCOPE({ ALLOW_ALLOCATIONS_IN_SCOPE; @@ -439,7 +441,7 @@ void AsyncLoader::finish(std::unique_lock & lock, const LoadJobPtr & finish(lock, dep, LoadStatus::CANCELED, e); } - // Clean dependency graph edges + // Clean dependency graph edges pointing to canceled jobs for (const auto & dep : job->dependencies) if (auto dep_info = scheduled_jobs.find(dep); dep_info != scheduled_jobs.end()) dep_info->second.dependent_jobs.erase(job); From 54c5b1083bf2aa123cb16f122f570faed62be6c5 Mon Sep 17 00:00:00 2001 From: serxa Date: Sun, 16 Apr 2023 19:28:14 +0000 Subject: [PATCH 053/689] add test for dynamic prioritization --- src/Common/AsyncLoader.cpp | 2 + src/Common/tests/gtest_async_loader.cpp | 56 +++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/src/Common/AsyncLoader.cpp b/src/Common/AsyncLoader.cpp index 8dbfe27e5cc..0734741f469 100644 --- a/src/Common/AsyncLoader.cpp +++ b/src/Common/AsyncLoader.cpp @@ -284,6 +284,8 @@ AsyncLoader::Task AsyncLoader::schedule(LoadJobSet && jobs, ssize_t priority) void AsyncLoader::prioritize(const LoadJobPtr & job, ssize_t new_priority) { + if (!job) + return; DENY_ALLOCATIONS_IN_SCOPE; std::unique_lock lock{mutex}; prioritize(job, new_priority, lock); diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index e6baa87c182..21a0ba1f41e 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -587,6 +587,62 @@ TEST(AsyncLoader, StaticPriorities) ASSERT_EQ(schedule, "A9E9D9F9G9H9C4B3"); } +TEST(AsyncLoader, DynamicPriorities) +{ + AsyncLoaderTest t(1); + + for (bool prioritize : {false, true}) + { + std::string schedule; + + LoadJobPtr job_to_prioritize; + + auto job_func = [&] (const LoadJobPtr & self) + { + if (prioritize && self->name == "C") + t.loader.prioritize(job_to_prioritize, 9); // dynamic prioritization + schedule += fmt::format("{}{}", self->name, self->priority()); + }; + + // Job DAG with initial priorities. During execution of C4, job G0 priority is increased to G9, postponing B3 job executing. + // A0 -+-> B3 + // | + // `-> C4 + // | + // `-> D1 -. + // | +-> F0 --> G0 --> H0 + // `-> E2 -' + std::vector jobs; + jobs.push_back(makeLoadJob({}, "A", job_func)); // 0 + jobs.push_back(makeLoadJob({ jobs[0] }, "B", job_func)); // 1 + jobs.push_back(makeLoadJob({ jobs[0] }, "C", job_func)); // 2 + jobs.push_back(makeLoadJob({ jobs[0] }, "D", job_func)); // 3 + jobs.push_back(makeLoadJob({ jobs[0] }, "E", job_func)); // 4 + jobs.push_back(makeLoadJob({ jobs[3], jobs[4] }, "F", job_func)); // 5 + jobs.push_back(makeLoadJob({ jobs[5] }, "G", job_func)); // 6 + jobs.push_back(makeLoadJob({ jobs[6] }, "H", job_func)); // 7 + auto task = t.loader.schedule({ jobs[0] }, 0); + task.merge(t.loader.schedule({ jobs[1] }, 3)); + task.merge(t.loader.schedule({ jobs[2] }, 4)); + task.merge(t.loader.schedule({ jobs[3] }, 1)); + task.merge(t.loader.schedule({ jobs[4] }, 2)); + task.merge(t.loader.schedule({ jobs[5] }, 0)); + task.merge(t.loader.schedule({ jobs[6] }, 0)); + task.merge(t.loader.schedule({ jobs[7] }, 0)); + + job_to_prioritize = jobs[6]; + + t.loader.start(); + t.loader.wait(); + t.loader.stop(); + + if (prioritize) + ASSERT_EQ(schedule, "A4C4E9D9F9G9B3H0"); + else + ASSERT_EQ(schedule, "A4C4B3E2D1F0G0H0"); + } +} + TEST(AsyncLoader, RandomIndependentTasks) { AsyncLoaderTest t(16); From 326c733be6908c415ca1094f36ce6982d2e34af9 Mon Sep 17 00:00:00 2001 From: serxa Date: Sun, 16 Apr 2023 21:15:44 +0000 Subject: [PATCH 054/689] fix tidy build --- src/Common/AsyncLoader.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/AsyncLoader.cpp b/src/Common/AsyncLoader.cpp index 0734741f469..8da41a03d3d 100644 --- a/src/Common/AsyncLoader.cpp +++ b/src/Common/AsyncLoader.cpp @@ -266,7 +266,7 @@ AsyncLoader::Task AsyncLoader::schedule(LoadJobSet && jobs, ssize_t priority) e = std::make_exception_ptr(Exception(ErrorCodes::ASYNC_LOAD_CANCELED, "Load job '{}' -> {}", job->name, - getExceptionMessage(dep->exception(), /* with_stack_trace = */ false))); + getExceptionMessage(dep->exception(), /* with_stacktrace = */ false))); }); finish(lock, job, LoadStatus::CANCELED, e); break; // This job is now finished, stop its dependencies processing @@ -438,7 +438,7 @@ void AsyncLoader::finish(std::unique_lock & lock, const LoadJobPtr & Exception(ErrorCodes::ASYNC_LOAD_CANCELED, "Load job '{}' -> {}", dep->name, - getExceptionMessage(exception_from_job, /* with_stack_trace = */ false))); + getExceptionMessage(exception_from_job, /* with_stacktrace = */ false))); }); finish(lock, dep, LoadStatus::CANCELED, e); } From baced2c66ecb37f645d2ef89740dd11e525aa4dd Mon Sep 17 00:00:00 2001 From: serxa Date: Mon, 17 Apr 2023 09:17:51 +0000 Subject: [PATCH 055/689] fix data race in test --- src/Common/tests/gtest_async_loader.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index 21a0ba1f41e..d6039162994 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -751,4 +751,5 @@ TEST(AsyncLoader, SetMaxThreads) t.loader.setMaxThreads(max_threads_values[sync_index]); syncs[idx]->arrive_and_wait(); // (B) this sync point is required to allow `executing` value to go back down to zero after we change number of workers } + t.loader.wait(); } From d4daea5f0906d559657f102cf6332fd3e3982bf2 Mon Sep 17 00:00:00 2001 From: serxa Date: Mon, 17 Apr 2023 09:47:16 +0000 Subject: [PATCH 056/689] fix typos --- src/Common/AsyncLoader.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 4b05dd40bf1..383d4b5ef24 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -95,9 +95,9 @@ LoadJobPtr makeLoadJob(LoadJobSet && dependencies, const String & name, Func && // auto job2 = makeLoadJob({ job1 }, "job2", job_func); // auto job3 = makeLoadJob({ job1 }, "job3", job_func); // auto task = async_loader.schedule({ job1, job2, job3 }, /* priority = */ 0); -// Here we have created and scheduled a task consisting of two jobs. Job1 has no dependencies and is run first. -// Job2 and job3 depends on job1 and are run only after job1 completion. Another thread may prioritize a job and wait for it: -// async_loader->prioritize(job3, 1); // higher priority jobs are run first +// Here we have created and scheduled a task consisting of three jobs. Job1 has no dependencies and is run first. +// Job2 and job3 depend on job1 and are run only after job1 completion. Another thread may prioritize a job and wait for it: +// async_loader->prioritize(job3, /* priority = */ 1); // higher priority jobs are run first // job3->wait(); // blocks until job completion or cancellation and rethrow an exception (if any) // // AsyncLoader tracks state of all scheduled jobs. Job lifecycle is the following: From 8cc9b32af1a80dd25f2c4f480949090a0d454967 Mon Sep 17 00:00:00 2001 From: serxa Date: Mon, 17 Apr 2023 10:03:54 +0000 Subject: [PATCH 057/689] fix more comments --- src/Common/AsyncLoader.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 383d4b5ef24..d3de883f757 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -158,7 +158,7 @@ private: UInt64 ready_seqno = 0; // Zero means that job is not in ready queue. LoadJobSet dependent_jobs; // Set of jobs dependent on this job. - // Three independent states of a non-finished jobs + // Three independent states of a non-finished job. bool is_blocked() const { return dependencies_left > 0; } bool is_ready() const { return dependencies_left == 0 && ready_seqno > 0; } bool is_executing() const { return dependencies_left == 0 && ready_seqno == 0; } @@ -188,11 +188,11 @@ public: // Merge all jobs from other task into this task. Useful for merging jobs with different priorities into one task. void merge(Task && o); - // Remove all jobs from AsyncLoader. + // Remove all jobs of this task from AsyncLoader. void remove(); // Do not track jobs in this task. - // WARNING: Jobs will never be removed() and are going to be stored as finished_jobs until ~AsyncLoader(). + // WARNING: Jobs will never be removed() and are going to be stored as finished jobs until ~AsyncLoader(). void detach(); private: @@ -213,17 +213,19 @@ public: // Wait for currently executing jobs to finish, but do not run any other pending jobs. // Not finished jobs are left in pending state: - // - they can be resumed by calling start() again; + // - they can be executed by calling start() again; // - or canceled using ~Task() or remove() later. void stop(); - // Schedule all `jobs` with given `priority` and return task containing these jobs. + // Schedule all `jobs` with given `priority` and return a task containing these jobs. // Higher priority jobs (with greater `priority` value) are executed earlier. // All dependencies of a scheduled job inherit its priority if it is higher. This way higher priority job // never wait for (blocked by) lower priority jobs (no priority inversion). + // Returned task destructor ensures that all the `jobs` are finished (OK, FAILED or CANCELED) + // and are removed from AsyncLoader, so it is thread-safe to destroy them. [[nodiscard]] Task schedule(LoadJobSet && jobs, ssize_t priority = 0); - // Increase priority of a job and all its dependencies recursively + // Increase priority of a job and all its dependencies recursively. void prioritize(const LoadJobPtr & job, ssize_t new_priority); // Remove finished jobs, cancel scheduled jobs, wait for executing jobs to finish and remove them. From c672d117ea78c48d036dcf40b7c3c5774644fb74 Mon Sep 17 00:00:00 2001 From: serxa Date: Mon, 17 Apr 2023 10:11:59 +0000 Subject: [PATCH 058/689] better job shutdown --- src/Common/AsyncLoader.cpp | 12 ++++++++---- src/Common/AsyncLoader.h | 1 + 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/Common/AsyncLoader.cpp b/src/Common/AsyncLoader.cpp index 8da41a03d3d..d382100a203 100644 --- a/src/Common/AsyncLoader.cpp +++ b/src/Common/AsyncLoader.cpp @@ -62,8 +62,7 @@ void LoadJob::ok() { std::unique_lock lock{mutex}; load_status = LoadStatus::OK; - if (waiters > 0) - finished.notify_all(); + finish(); } void LoadJob::failed(const std::exception_ptr & ptr) @@ -71,8 +70,7 @@ void LoadJob::failed(const std::exception_ptr & ptr) std::unique_lock lock{mutex}; load_status = LoadStatus::FAILED; load_exception = ptr; - if (waiters > 0) - finished.notify_all(); + finish(); } void LoadJob::canceled(const std::exception_ptr & ptr) @@ -80,6 +78,12 @@ void LoadJob::canceled(const std::exception_ptr & ptr) std::unique_lock lock{mutex}; load_status = LoadStatus::CANCELED; load_exception = ptr; + finish(); +} + +void LoadJob::finish() +{ + func = {}; // To ensure job function is destructed before `AsyncLoader::wait()` and `task->wait()` return if (waiters > 0) finished.notify_all(); } diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index d3de883f757..2e2c605cfe6 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -69,6 +69,7 @@ private: void ok(); void failed(const std::exception_ptr & ptr); void canceled(const std::exception_ptr & ptr); + void finish(); std::function func; std::atomic load_priority{0}; From ff7444f0772eb201dcb4f591a6578eb745ba5aff Mon Sep 17 00:00:00 2001 From: serxa Date: Mon, 17 Apr 2023 10:16:01 +0000 Subject: [PATCH 059/689] typo --- src/Common/AsyncLoader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/AsyncLoader.cpp b/src/Common/AsyncLoader.cpp index d382100a203..7fe88ce7212 100644 --- a/src/Common/AsyncLoader.cpp +++ b/src/Common/AsyncLoader.cpp @@ -83,7 +83,7 @@ void LoadJob::canceled(const std::exception_ptr & ptr) void LoadJob::finish() { - func = {}; // To ensure job function is destructed before `AsyncLoader::wait()` and `task->wait()` return + func = {}; // To ensure job function is destructed before `AsyncLoader::wait()` and `LoadJob::wait()` return if (waiters > 0) finished.notify_all(); } From 31e3a3bc0c1824c0c1b65b33596ee967b6bc9c6b Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Wed, 19 Apr 2023 13:35:58 +0200 Subject: [PATCH 060/689] Update src/Common/AsyncLoader.h Co-authored-by: Antonio Andelic --- src/Common/AsyncLoader.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 2e2c605cfe6..5f87601e753 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -37,9 +37,9 @@ class LoadJob : private boost::noncopyable { public: template - LoadJob(LoadJobSet && dependencies_, const String & name_, Func && func_) + LoadJob(LoadJobSet && dependencies_, String name_, Func && func_) : dependencies(std::move(dependencies_)) - , name(name_) + , name(std::move(name_)) , func(std::forward(func_)) {} From 2e31140ca4e2740d6b9b1dd3ed2a585ab1b2ec92 Mon Sep 17 00:00:00 2001 From: serxa Date: Wed, 19 Apr 2023 11:46:56 +0000 Subject: [PATCH 061/689] review fixes --- src/Common/AsyncLoader.cpp | 2 +- src/Common/AsyncLoader.h | 2 +- src/Common/tests/gtest_async_loader.cpp | 9 +++++---- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Common/AsyncLoader.cpp b/src/Common/AsyncLoader.cpp index 7fe88ce7212..1d428b3601f 100644 --- a/src/Common/AsyncLoader.cpp +++ b/src/Common/AsyncLoader.cpp @@ -52,7 +52,7 @@ void LoadJob::waitNoThrow() const noexcept waiters--; } -size_t LoadJob::waiters_count() const +size_t LoadJob::waitersCount() const { std::unique_lock lock{mutex}; return waiters; diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 2e2c605cfe6..26a192d56c8 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -58,7 +58,7 @@ public: void waitNoThrow() const noexcept; // Returns number of threads blocked by `wait()` or `waitNoThrow()` calls. - size_t waiters_count() const; + size_t waitersCount() const; const LoadJobSet dependencies; // Jobs to be done before this one (with ownership), it is `const` to make creation of cycles hard const String name; diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index d6039162994..c05dbb751ca 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -313,7 +314,7 @@ TEST(AsyncLoader, CancelExecutingJob) { task.remove(); // waits for (C) }); - while (job->waiters_count() == 0) + while (job->waitersCount() == 0) std::this_thread::yield(); ASSERT_EQ(job->status(), LoadStatus::PENDING); sync.arrive_and_wait(); // (B) sync with job @@ -362,7 +363,7 @@ TEST(AsyncLoader, CancelExecutingTask) { task1.remove(); // waits for (C) }); - while (blocker_job->waiters_count() == 0) + while (blocker_job->waitersCount() == 0) std::this_thread::yield(); ASSERT_EQ(blocker_job->status(), LoadStatus::PENDING); sync.arrive_and_wait(); // (B) sync with job @@ -384,10 +385,10 @@ TEST(AsyncLoader, JobFailure) AsyncLoaderTest t; t.loader.start(); - std::string_view error_message = "test job failure"; + std::string error_message = "test job failure"; auto job_func = [&] (const LoadJobPtr &) { - throw Exception(ErrorCodes::ASYNC_LOAD_FAILED, "{}", error_message); + throw std::runtime_error(error_message); }; auto job = makeLoadJob({}, "job", job_func); From 4b51f12479b28e584735d747756eea79ba4b9471 Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Wed, 19 Apr 2023 13:48:38 +0200 Subject: [PATCH 062/689] Update src/Common/AsyncLoader.h Co-authored-by: Antonio Andelic --- src/Common/AsyncLoader.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index e393fbde7d6..6f49b9b2d72 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -82,9 +82,9 @@ private: }; template -LoadJobPtr makeLoadJob(LoadJobSet && dependencies, const String & name, Func && func) +LoadJobPtr makeLoadJob(LoadJobSet && dependencies, String name, Func && func) { - return std::make_shared(std::move(dependencies), name, std::forward(func)); + return std::make_shared(std::move(dependencies), std::move(name), std::forward(func)); } // `AsyncLoader` is a scheduler for DAG of `LoadJob`s. It tracks dependencies and priorities of jobs. From 966902fdfda744bfb386cd555c21ae168af02d53 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 16 Feb 2023 16:25:07 +0800 Subject: [PATCH 063/689] extend firt_value/last_value to accept null --- .../AggregateFunctionAny.cpp | 14 +- .../AggregateFunctionFactory.cpp | 2 +- .../AggregateFunctionMinMaxAny.h | 165 +++++++++++++++--- src/AggregateFunctions/HelpersMinMaxAny.h | 48 ++++- 4 files changed, 199 insertions(+), 30 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionAny.cpp b/src/AggregateFunctions/AggregateFunctionAny.cpp index 9bc6e6af14f..219d530c58d 100644 --- a/src/AggregateFunctions/AggregateFunctionAny.cpp +++ b/src/AggregateFunctions/AggregateFunctionAny.cpp @@ -14,11 +14,21 @@ AggregateFunctionPtr createAggregateFunctionAny(const std::string & name, const return AggregateFunctionPtr(createAggregateFunctionSingleValue(name, argument_types, parameters, settings)); } +AggregateFunctionPtr createAggregateFunctionNullableAny(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings) +{ + return AggregateFunctionPtr(createAggregateFunctionSingleNullableValue(name, argument_types, parameters, settings)); +} + AggregateFunctionPtr createAggregateFunctionAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings) { return AggregateFunctionPtr(createAggregateFunctionSingleValue(name, argument_types, parameters, settings)); } +AggregateFunctionPtr createAggregateFunctionNullableAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings) +{ + return AggregateFunctionPtr(createAggregateFunctionSingleNullableValue(name, argument_types, parameters, settings)); +} + AggregateFunctionPtr createAggregateFunctionAnyHeavy(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings) { return AggregateFunctionPtr(createAggregateFunctionSingleValue(name, argument_types, parameters, settings)); @@ -36,10 +46,10 @@ void registerAggregateFunctionsAny(AggregateFunctionFactory & factory) // Synonyms for use as window functions. factory.registerFunction("first_value", - { createAggregateFunctionAny, properties }, + { createAggregateFunctionNullableAny, properties }, AggregateFunctionFactory::CaseInsensitive); factory.registerFunction("last_value", - { createAggregateFunctionAnyLast, properties }, + { createAggregateFunctionNullableAnyLast, properties }, AggregateFunctionFactory::CaseInsensitive); } diff --git a/src/AggregateFunctions/AggregateFunctionFactory.cpp b/src/AggregateFunctions/AggregateFunctionFactory.cpp index 6cacf66500f..dc8b410dd9a 100644 --- a/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -106,7 +106,7 @@ AggregateFunctionPtr AggregateFunctionFactory::get( // nullability themselves. Another special case is functions from Nothing // that are rewritten to AggregateFunctionNothing, in this case // nested_function is nullptr. - if (!nested_function || !nested_function->isOnlyWindowFunction()) + if (!nested_function->getResultType()->isNullable() && (!nested_function || !nested_function->isOnlyWindowFunction())) return combinator->transformAggregateFunction(nested_function, out_properties, types_without_low_cardinality, parameters); } diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index b984772c8ea..76cdd5690a5 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -766,19 +766,24 @@ static_assert( /// For any other value types. +template struct SingleValueDataGeneric { private: using Self = SingleValueDataGeneric; Field value; + bool has_value = false; public: - static constexpr bool is_nullable = false; + static constexpr bool is_nullable = IS_NULLABLE; static constexpr bool is_any = false; + static constexpr bool is_null_greater = IS_NULL_GREATER; bool has() const { + if constexpr (is_nullable) + return has_value; return !value.isNull(); } @@ -813,11 +818,15 @@ public: void change(const IColumn & column, size_t row_num, Arena *) { column.get(row_num, value); + if constexpr (is_nullable) + has_value = true; } void change(const Self & to, Arena *) { value = to.value; + if constexpr (is_nullable) + has_value = true; } bool changeFirstTime(const IColumn & column, size_t row_num, Arena * arena) @@ -833,7 +842,7 @@ public: bool changeFirstTime(const Self & to, Arena * arena) { - if (!has() && to.has()) + if (!has() && (is_nullable || to.has())) { change(to, arena); return true; @@ -868,27 +877,86 @@ public: } else { - Field new_value; - column.get(row_num, new_value); - if (new_value < value) + if constexpr (is_nullable) { - value = new_value; - return true; + Field new_value; + column.get(row_num, new_value); + if constexpr (!is_null_greater) + { + if (!value.isNull() && (new_value.isNull() || new_value < value)) + { + value = new_value; + return true; + } + else + return false; + } + else + { + if ((value.isNull() && !new_value.isNull()) || (!new_value.isNull() && new_value < value)) + { + value = new_value; + return true; + } + + return false; + } } else - return false; + { + Field new_value; + column.get(row_num, new_value); + if (new_value < value) + { + value = new_value; + return true; + } + else + return false; + } } } bool changeIfLess(const Self & to, Arena * arena) { - if (to.has() && (!has() || to.value < value)) + if (!to.has()) + return false; + if constexpr (is_nullable) { - change(to, arena); - return true; + if (!has()) + { + change(to, arena); + return true; + } + if constexpr (!is_null_greater) + { + if (to.value.isNull() || (!value.isNull() && to.value < value)) + { + value = to.value; + return true; + } + return false; + } + else + { + if ((value.isNull() && !to.value.isNull()) || (!to.value.isNull() || to.value < value)) + { + value = to.value; + return true; + } + return false; + } } else - return false; + { + if (!has() || to.value < value) + { + change(to, arena); + return true; + } + else + return false; + } } bool changeIfGreater(const IColumn & column, size_t row_num, Arena * arena) @@ -900,27 +968,80 @@ public: } else { - Field new_value; - column.get(row_num, new_value); - if (new_value > value) + if constexpr (is_nullable) { - value = new_value; - return true; + Field new_value; + column.get(row_num, new_value); + if constexpr (is_null_greater) + { + if (!value.isNull() && (new_value.isNull() || value < new_value)) + { + value = new_value; + return true; + } + return false; + } + else + { + if ((value.isNull() && !new_value.isNull()) || (!new_value.isNull() && value < new_value)) + { + value = new_value; + return true; + } + return false; + } } else - return false; + { + Field new_value; + column.get(row_num, new_value); + if (new_value > value) + { + value = new_value; + return true; + } + else + return false; + } } } bool changeIfGreater(const Self & to, Arena * arena) { - if (to.has() && (!has() || to.value > value)) + if (!to.has()) + return false; + if constexpr (is_nullable) { - change(to, arena); - return true; + if constexpr (is_null_greater) + { + if (!value.isNull() && (to.value.isNull() || value < to.value)) + { + value = to.value; + return true; + } + return false; + } + else + { + if ((value.isNull() && !to.value.isNull()) || (!to.value.isNull() && value < to.value)) + { + value = to.value; + return true; + } + return false; + + } } else - return false; + { + if (!has() || to.value > value) + { + change(to, arena); + return true; + } + else + return false; + } } bool isEqualTo(const IColumn & column, size_t row_num) const diff --git a/src/AggregateFunctions/HelpersMinMaxAny.h b/src/AggregateFunctions/HelpersMinMaxAny.h index 026a206b109..f8fac616d2c 100644 --- a/src/AggregateFunctions/HelpersMinMaxAny.h +++ b/src/AggregateFunctions/HelpersMinMaxAny.h @@ -9,9 +9,12 @@ #include #include - namespace DB { +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} struct Settings; /// min, max, any, anyLast, anyHeavy, etc... @@ -22,7 +25,6 @@ static IAggregateFunction * createAggregateFunctionSingleValue(const String & na assertUnary(name, argument_types); const DataTypePtr & argument_type = argument_types[0]; - WhichDataType which(argument_type); #define DISPATCH(TYPE) \ if (which.idx == TypeIndex::TYPE) return new AggregateFunctionTemplate>>(argument_type); /// NOLINT @@ -46,7 +48,43 @@ static IAggregateFunction * createAggregateFunctionSingleValue(const String & na if (which.idx == TypeIndex::String) return new AggregateFunctionTemplate>(argument_type); - return new AggregateFunctionTemplate>(argument_type); + return new AggregateFunctionTemplate>>(argument_type); +} + +template