This commit is contained in:
Nikita Taranov 2024-11-25 19:45:59 +01:00
parent 42df6591aa
commit db375779bd
3 changed files with 24 additions and 18 deletions

View File

@ -2468,7 +2468,7 @@ Possible values:
- default
This is the equivalent of `hash` or `direct`, if possible (same as `direct,hash`)
This is the equivalent of `hash`, `parallel_hash` or `direct`, if possible (same as `direct,parallel_hash,hash`)
- grace_hash
@ -2482,11 +2482,13 @@ Possible values:
[Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section.
When using the `hash` algorithm, the right part of `JOIN` is uploaded into RAM.
- parallel_hash
A variation of `hash` join that splits the data into buckets and builds several hashtables instead of one concurrently to speed up this process.
When using the `hash` algorithm, the right part of `JOIN` is uploaded into RAM.
When using the `parallel_hash` algorithm, the right part of `JOIN` is uploaded into RAM.
- partial_merge

View File

@ -7,7 +7,9 @@
#include <Core/Block.h>
#include <Core/ColumnsWithTypeAndName.h>
#include <Core/Joins.h>
#include <Core/Settings.h>
#include <Common/logger_useful.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeTuple.h>
@ -28,7 +30,6 @@
#include <Storages/StorageDictionary.h>
#include <Storages/StorageJoin.h>
#include <Common/logger_useful.h>
#include <algorithm>
#include <string>
#include <type_traits>
@ -997,8 +998,8 @@ void TableJoin::resetToCross()
bool TableJoin::allowParallelHashJoin() const
{
if (std::ranges::find(join_algorithm, JoinAlgorithm::DEFAULT) == join_algorithm.end()
&& std::ranges::find(join_algorithm, JoinAlgorithm::PARALLEL_HASH) == join_algorithm.end())
if (std::ranges::none_of(
join_algorithm, [](auto algo) { return algo == JoinAlgorithm::DEFAULT || algo == JoinAlgorithm::PARALLEL_HASH; }))
return false;
if (!right_storage_name.empty())
return false;

View File

@ -1,25 +1,26 @@
#pragma once
#include <Core/Joins.h>
#include <Core/Names.h>
#include <Core/NamesAndTypes.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Interpreters/IJoin.h>
#include <Interpreters/JoinUtils.h>
#include <QueryPipeline/SizeLimits.h>
#include <DataTypes/getLeastSupertype.h>
#include <Interpreters/IJoin.h>
#include <Interpreters/IKeyValueEntity.h>
#include <Interpreters/JoinUtils.h>
#include <Interpreters/TemporaryDataOnDisk.h>
#include <Common/Exception.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/IAST_fwd.h>
#include <QueryPipeline/SizeLimits.h>
#include <Common/Exception.h>
#include <cstddef>
#include <unordered_map>
#include <utility>
#include <memory>
#include <base/types.h>
#include <algorithm>
#include <cstddef>
#include <memory>
#include <unordered_map>
#include <utility>
namespace CurrentMetrics
{
extern const Metric TemporaryFilesForJoin;
@ -285,10 +286,12 @@ public:
bool isEnabledAlgorithm(JoinAlgorithm val) const
{
/// When join_algorithm = 'default' (not specified by user) we use hash or direct algorithm.
/// When join_algorithm = 'default' (not specified by user) we use [parallel_]hash or direct algorithm.
/// It's behaviour that was initially supported by clickhouse.
bool is_default_enabled = std::find(join_algorithm.begin(), join_algorithm.end(), JoinAlgorithm::DEFAULT) != join_algorithm.end();
if (is_default_enabled && (val == JoinAlgorithm::DEFAULT || val == JoinAlgorithm::HASH || val == JoinAlgorithm::DIRECT))
constexpr auto default_algorithms = std::array<JoinAlgorithm, 4>{
JoinAlgorithm::DEFAULT, JoinAlgorithm::HASH, JoinAlgorithm::PARALLEL_HASH, JoinAlgorithm::DIRECT};
if (is_default_enabled && std::ranges::find(default_algorithms, val) != default_algorithms.end())
return true;
return std::find(join_algorithm.begin(), join_algorithm.end(), val) != join_algorithm.end();
}