From db375779bde676a2dfa425a7b64d56a420e50832 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 25 Nov 2024 19:45:59 +0100 Subject: [PATCH] better --- src/Core/Settings.cpp | 6 ++++-- src/Interpreters/TableJoin.cpp | 7 ++++--- src/Interpreters/TableJoin.h | 29 ++++++++++++++++------------- 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index f5fde1b572b..4ea7986dd7f 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -2468,7 +2468,7 @@ Possible values: - default - This is the equivalent of `hash` or `direct`, if possible (same as `direct,hash`) + This is the equivalent of `hash`, `parallel_hash` or `direct`, if possible (same as `direct,parallel_hash,hash`) - grace_hash @@ -2482,11 +2482,13 @@ Possible values: [Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section. + When using the `hash` algorithm, the right part of `JOIN` is uploaded into RAM. + - parallel_hash A variation of `hash` join that splits the data into buckets and builds several hashtables instead of one concurrently to speed up this process. - When using the `hash` algorithm, the right part of `JOIN` is uploaded into RAM. + When using the `parallel_hash` algorithm, the right part of `JOIN` is uploaded into RAM. - partial_merge diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index f03b8d44356..4c0ff66afd6 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -7,7 +7,9 @@ #include #include +#include #include +#include #include #include @@ -28,7 +30,6 @@ #include #include -#include #include #include #include @@ -997,8 +998,8 @@ void TableJoin::resetToCross() bool TableJoin::allowParallelHashJoin() const { - if (std::ranges::find(join_algorithm, JoinAlgorithm::DEFAULT) == join_algorithm.end() - && std::ranges::find(join_algorithm, JoinAlgorithm::PARALLEL_HASH) == join_algorithm.end()) + if (std::ranges::none_of( + join_algorithm, [](auto algo) { return algo == JoinAlgorithm::DEFAULT || algo == JoinAlgorithm::PARALLEL_HASH; })) return false; if (!right_storage_name.empty()) return false; diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 146230f44be..7f3dd0d54cd 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -1,25 +1,26 @@ #pragma once +#include #include #include -#include -#include -#include -#include #include +#include #include +#include #include - -#include +#include #include +#include +#include -#include -#include - -#include -#include #include +#include +#include +#include +#include +#include + namespace CurrentMetrics { extern const Metric TemporaryFilesForJoin; @@ -285,10 +286,12 @@ public: bool isEnabledAlgorithm(JoinAlgorithm val) const { - /// When join_algorithm = 'default' (not specified by user) we use hash or direct algorithm. + /// When join_algorithm = 'default' (not specified by user) we use [parallel_]hash or direct algorithm. /// It's behaviour that was initially supported by clickhouse. bool is_default_enabled = std::find(join_algorithm.begin(), join_algorithm.end(), JoinAlgorithm::DEFAULT) != join_algorithm.end(); - if (is_default_enabled && (val == JoinAlgorithm::DEFAULT || val == JoinAlgorithm::HASH || val == JoinAlgorithm::DIRECT)) + constexpr auto default_algorithms = std::array{ + JoinAlgorithm::DEFAULT, JoinAlgorithm::HASH, JoinAlgorithm::PARALLEL_HASH, JoinAlgorithm::DIRECT}; + if (is_default_enabled && std::ranges::find(default_algorithms, val) != default_algorithms.end()) return true; return std::find(join_algorithm.begin(), join_algorithm.end(), val) != join_algorithm.end(); }