This commit is contained in:
Nikita Taranov 2024-11-25 19:45:59 +01:00
parent 42df6591aa
commit db375779bd
3 changed files with 24 additions and 18 deletions

View File

@ -2468,7 +2468,7 @@ Possible values:
- default - default
This is the equivalent of `hash` or `direct`, if possible (same as `direct,hash`) This is the equivalent of `hash`, `parallel_hash` or `direct`, if possible (same as `direct,parallel_hash,hash`)
- grace_hash - grace_hash
@ -2482,11 +2482,13 @@ Possible values:
[Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section. [Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section.
When using the `hash` algorithm, the right part of `JOIN` is uploaded into RAM.
- parallel_hash - parallel_hash
A variation of `hash` join that splits the data into buckets and builds several hashtables instead of one concurrently to speed up this process. A variation of `hash` join that splits the data into buckets and builds several hashtables instead of one concurrently to speed up this process.
When using the `hash` algorithm, the right part of `JOIN` is uploaded into RAM. When using the `parallel_hash` algorithm, the right part of `JOIN` is uploaded into RAM.
- partial_merge - partial_merge

View File

@ -7,7 +7,9 @@
#include <Core/Block.h> #include <Core/Block.h>
#include <Core/ColumnsWithTypeAndName.h> #include <Core/ColumnsWithTypeAndName.h>
#include <Core/Joins.h>
#include <Core/Settings.h> #include <Core/Settings.h>
#include <Common/logger_useful.h>
#include <DataTypes/DataTypeNullable.h> #include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeTuple.h> #include <DataTypes/DataTypeTuple.h>
@ -28,7 +30,6 @@
#include <Storages/StorageDictionary.h> #include <Storages/StorageDictionary.h>
#include <Storages/StorageJoin.h> #include <Storages/StorageJoin.h>
#include <Common/logger_useful.h>
#include <algorithm> #include <algorithm>
#include <string> #include <string>
#include <type_traits> #include <type_traits>
@ -997,8 +998,8 @@ void TableJoin::resetToCross()
bool TableJoin::allowParallelHashJoin() const bool TableJoin::allowParallelHashJoin() const
{ {
if (std::ranges::find(join_algorithm, JoinAlgorithm::DEFAULT) == join_algorithm.end() if (std::ranges::none_of(
&& std::ranges::find(join_algorithm, JoinAlgorithm::PARALLEL_HASH) == join_algorithm.end()) join_algorithm, [](auto algo) { return algo == JoinAlgorithm::DEFAULT || algo == JoinAlgorithm::PARALLEL_HASH; }))
return false; return false;
if (!right_storage_name.empty()) if (!right_storage_name.empty())
return false; return false;

View File

@ -1,25 +1,26 @@
#pragma once #pragma once
#include <Core/Joins.h>
#include <Core/Names.h> #include <Core/Names.h>
#include <Core/NamesAndTypes.h> #include <Core/NamesAndTypes.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Interpreters/IJoin.h>
#include <Interpreters/JoinUtils.h>
#include <QueryPipeline/SizeLimits.h>
#include <DataTypes/getLeastSupertype.h> #include <DataTypes/getLeastSupertype.h>
#include <Interpreters/IJoin.h>
#include <Interpreters/IKeyValueEntity.h> #include <Interpreters/IKeyValueEntity.h>
#include <Interpreters/JoinUtils.h>
#include <Interpreters/TemporaryDataOnDisk.h> #include <Interpreters/TemporaryDataOnDisk.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Common/Exception.h>
#include <Parsers/IAST_fwd.h> #include <Parsers/IAST_fwd.h>
#include <QueryPipeline/SizeLimits.h>
#include <Common/Exception.h>
#include <cstddef>
#include <unordered_map>
#include <utility>
#include <memory>
#include <base/types.h> #include <base/types.h>
#include <algorithm>
#include <cstddef>
#include <memory>
#include <unordered_map>
#include <utility>
namespace CurrentMetrics namespace CurrentMetrics
{ {
extern const Metric TemporaryFilesForJoin; extern const Metric TemporaryFilesForJoin;
@ -285,10 +286,12 @@ public:
bool isEnabledAlgorithm(JoinAlgorithm val) const bool isEnabledAlgorithm(JoinAlgorithm val) const
{ {
/// When join_algorithm = 'default' (not specified by user) we use hash or direct algorithm. /// When join_algorithm = 'default' (not specified by user) we use [parallel_]hash or direct algorithm.
/// It's behaviour that was initially supported by clickhouse. /// It's behaviour that was initially supported by clickhouse.
bool is_default_enabled = std::find(join_algorithm.begin(), join_algorithm.end(), JoinAlgorithm::DEFAULT) != join_algorithm.end(); bool is_default_enabled = std::find(join_algorithm.begin(), join_algorithm.end(), JoinAlgorithm::DEFAULT) != join_algorithm.end();
if (is_default_enabled && (val == JoinAlgorithm::DEFAULT || val == JoinAlgorithm::HASH || val == JoinAlgorithm::DIRECT)) constexpr auto default_algorithms = std::array<JoinAlgorithm, 4>{
JoinAlgorithm::DEFAULT, JoinAlgorithm::HASH, JoinAlgorithm::PARALLEL_HASH, JoinAlgorithm::DIRECT};
if (is_default_enabled && std::ranges::find(default_algorithms, val) != default_algorithms.end())
return true; return true;
return std::find(join_algorithm.begin(), join_algorithm.end(), val) != join_algorithm.end(); return std::find(join_algorithm.begin(), join_algorithm.end(), val) != join_algorithm.end();
} }