mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Cleanup logic around join_algorithm setting
This commit is contained in:
parent
466fceb3ee
commit
96bcae419c
@ -355,7 +355,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
|
|||||||
M(UInt64, max_bytes_in_join, 0, "Maximum size of the hash table for JOIN (in number of bytes in memory).", 0) \
|
M(UInt64, max_bytes_in_join, 0, "Maximum size of the hash table for JOIN (in number of bytes in memory).", 0) \
|
||||||
M(OverflowMode, join_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
|
M(OverflowMode, join_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
|
||||||
M(Bool, join_any_take_last_row, false, "When disabled (default) ANY JOIN will take the first found row for a key. When enabled, it will take the last row seen if there are multiple rows for the same key.", IMPORTANT) \
|
M(Bool, join_any_take_last_row, false, "When disabled (default) ANY JOIN will take the first found row for a key. When enabled, it will take the last row seen if there are multiple rows for the same key.", IMPORTANT) \
|
||||||
M(JoinAlgorithm, join_algorithm, JoinAlgorithm::HASH, "Specify join algorithm: 'auto', 'hash', 'partial_merge', 'prefer_partial_merge', 'parallel_hash'. 'auto' tries to change HashJoin to MergeJoin on the fly to avoid out of memory.", 0) \
|
M(JoinAlgorithm, join_algorithm, JoinAlgorithm::DEFAULT, "Specify join algorithm.", 0) \
|
||||||
M(UInt64, default_max_bytes_in_join, 1000000000, "Maximum size of right-side table if limit is required but max_bytes_in_join is not set.", 0) \
|
M(UInt64, default_max_bytes_in_join, 1000000000, "Maximum size of right-side table if limit is required but max_bytes_in_join is not set.", 0) \
|
||||||
M(UInt64, partial_merge_join_left_table_buffer_bytes, 0, "If not 0 group left table blocks in bigger ones for left-side table in partial merge join. It uses up to 2x of specified memory per joining thread.", 0) \
|
M(UInt64, partial_merge_join_left_table_buffer_bytes, 0, "If not 0 group left table blocks in bigger ones for left-side table in partial merge join. It uses up to 2x of specified memory per joining thread.", 0) \
|
||||||
M(UInt64, partial_merge_join_rows_in_right_blocks, 65536, "Split right-hand joining data in blocks of specified size. It's a portion of data indexed by min-max values and possibly unloaded on disk.", 0) \
|
M(UInt64, partial_merge_join_rows_in_right_blocks, 65536, "Split right-hand joining data in blocks of specified size. It's a portion of data indexed by min-max values and possibly unloaded on disk.", 0) \
|
||||||
|
@ -31,7 +31,8 @@ IMPLEMENT_SETTING_ENUM(JoinStrictness, ErrorCodes::UNKNOWN_JOIN,
|
|||||||
|
|
||||||
|
|
||||||
IMPLEMENT_SETTING_MULTI_ENUM(JoinAlgorithm, ErrorCodes::UNKNOWN_JOIN,
|
IMPLEMENT_SETTING_MULTI_ENUM(JoinAlgorithm, ErrorCodes::UNKNOWN_JOIN,
|
||||||
{{"auto", JoinAlgorithm::AUTO},
|
{{"default", JoinAlgorithm::DEFAULT},
|
||||||
|
{"auto", JoinAlgorithm::AUTO},
|
||||||
{"hash", JoinAlgorithm::HASH},
|
{"hash", JoinAlgorithm::HASH},
|
||||||
{"partial_merge", JoinAlgorithm::PARTIAL_MERGE},
|
{"partial_merge", JoinAlgorithm::PARTIAL_MERGE},
|
||||||
{"prefer_partial_merge", JoinAlgorithm::PREFER_PARTIAL_MERGE},
|
{"prefer_partial_merge", JoinAlgorithm::PREFER_PARTIAL_MERGE},
|
||||||
|
@ -38,7 +38,8 @@ DECLARE_SETTING_ENUM(JoinStrictness)
|
|||||||
|
|
||||||
enum class JoinAlgorithm
|
enum class JoinAlgorithm
|
||||||
{
|
{
|
||||||
AUTO = 0,
|
DEFAULT = 0,
|
||||||
|
AUTO,
|
||||||
HASH,
|
HASH,
|
||||||
PARTIAL_MERGE,
|
PARTIAL_MERGE,
|
||||||
PREFER_PARTIAL_MERGE,
|
PREFER_PARTIAL_MERGE,
|
||||||
|
@ -89,7 +89,6 @@ namespace ErrorCodes
|
|||||||
extern const int NOT_IMPLEMENTED;
|
extern const int NOT_IMPLEMENTED;
|
||||||
extern const int UNKNOWN_IDENTIFIER;
|
extern const int UNKNOWN_IDENTIFIER;
|
||||||
extern const int UNKNOWN_TYPE_OF_AST_NODE;
|
extern const int UNKNOWN_TYPE_OF_AST_NODE;
|
||||||
extern const int UNSUPPORTED_METHOD;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
@ -1079,34 +1078,58 @@ static ActionsDAGPtr createJoinedBlockActions(ContextPtr context, const TableJoi
|
|||||||
return ExpressionAnalyzer(expression_list, syntax_result, context).getActionsDAG(true, false);
|
return ExpressionAnalyzer(expression_list, syntax_result, context).getActionsDAG(true, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::shared_ptr<IJoin> chooseJoinAlgorithm(std::shared_ptr<TableJoin> analyzed_join, const Block & right_sample_block, ContextPtr context)
|
std::shared_ptr<DirectKeyValueJoin> tryKeyValueJoin(std::shared_ptr<TableJoin> analyzed_join, const Block & right_sample_block);
|
||||||
{
|
|
||||||
/// HashJoin with Dictionary optimisation
|
|
||||||
if (analyzed_join->tryInitDictJoin(right_sample_block, context))
|
|
||||||
return std::make_shared<HashJoin>(analyzed_join, right_sample_block);
|
|
||||||
|
|
||||||
bool allow_merge_join = analyzed_join->allowMergeJoin();
|
static std::shared_ptr<IJoin> chooseJoinAlgorithm(std::shared_ptr<TableJoin> analyzed_join, std::unique_ptr<QueryPlan> & joined_plan, ContextPtr context)
|
||||||
if (analyzed_join->forceHashJoin() || (analyzed_join->preferMergeJoin() && !allow_merge_join))
|
|
||||||
{
|
{
|
||||||
if (analyzed_join->allowParallelHashJoin())
|
Block right_sample_block = joined_plan->getCurrentDataStream().header;
|
||||||
|
|
||||||
|
if (analyzed_join->isEnabledAlgorithm(JoinAlgorithm::DIRECT))
|
||||||
{
|
{
|
||||||
return std::make_shared<ConcurrentHashJoin>(context, analyzed_join, context->getSettings().max_threads, right_sample_block);
|
if (JoinPtr kvjoin = tryKeyValueJoin(analyzed_join, right_sample_block))
|
||||||
|
{
|
||||||
|
/// Do not need to execute plan for right part
|
||||||
|
joined_plan.reset();
|
||||||
|
return kvjoin;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// It's not a hash join actually, that's why we check JoinAlgorithm::DIRECT
|
||||||
|
/// It's would be fixed in https://github.com/ClickHouse/ClickHouse/pull/38956
|
||||||
|
if (analyzed_join->tryInitDictJoin(right_sample_block, context))
|
||||||
|
{
|
||||||
|
joined_plan.reset();
|
||||||
return std::make_shared<HashJoin>(analyzed_join, right_sample_block);
|
return std::make_shared<HashJoin>(analyzed_join, right_sample_block);
|
||||||
}
|
}
|
||||||
else if (analyzed_join->forceMergeJoin() || (analyzed_join->preferMergeJoin() && allow_merge_join))
|
}
|
||||||
|
|
||||||
|
if (analyzed_join->isEnabledAlgorithm(JoinAlgorithm::PARTIAL_MERGE) ||
|
||||||
|
analyzed_join->isEnabledAlgorithm(JoinAlgorithm::PREFER_PARTIAL_MERGE))
|
||||||
{
|
{
|
||||||
|
if (MergeJoin::isSupported(analyzed_join))
|
||||||
return std::make_shared<MergeJoin>(analyzed_join, right_sample_block);
|
return std::make_shared<MergeJoin>(analyzed_join, right_sample_block);
|
||||||
}
|
}
|
||||||
else if (analyzed_join->forceFullSortingMergeJoin())
|
|
||||||
|
if (analyzed_join->isEnabledAlgorithm(JoinAlgorithm::HASH) ||
|
||||||
|
/// partial_merge is preferred, but can't be used for specified kind of join, fallback to hash
|
||||||
|
analyzed_join->isEnabledAlgorithm(JoinAlgorithm::PREFER_PARTIAL_MERGE) ||
|
||||||
|
analyzed_join->isEnabledAlgorithm(JoinAlgorithm::PARALLEL_HASH))
|
||||||
{
|
{
|
||||||
if (analyzed_join->getClauses().size() != 1)
|
if (analyzed_join->allowParallelHashJoin())
|
||||||
throw Exception("Full sorting merge join is supported only for single-condition joins", ErrorCodes::NOT_IMPLEMENTED);
|
return std::make_shared<ConcurrentHashJoin>(context, analyzed_join, context->getSettings().max_threads, right_sample_block);
|
||||||
if (analyzed_join->isSpecialStorage())
|
return std::make_shared<HashJoin>(analyzed_join, right_sample_block);
|
||||||
throw Exception("Full sorting merge join is not supported for special storage", ErrorCodes::NOT_IMPLEMENTED);
|
}
|
||||||
|
|
||||||
|
if (analyzed_join->isEnabledAlgorithm(JoinAlgorithm::FULL_SORTING_MERGE))
|
||||||
|
{
|
||||||
|
if (FullSortingMergeJoin::isSupported(analyzed_join))
|
||||||
return std::make_shared<FullSortingMergeJoin>(analyzed_join, right_sample_block);
|
return std::make_shared<FullSortingMergeJoin>(analyzed_join, right_sample_block);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (analyzed_join->isEnabledAlgorithm(JoinAlgorithm::AUTO))
|
||||||
return std::make_shared<JoinSwitcher>(analyzed_join, right_sample_block);
|
return std::make_shared<JoinSwitcher>(analyzed_join, right_sample_block);
|
||||||
|
|
||||||
|
throw Exception("Can't execute any of specified algorithms for specified strictness/kind and right storage type",
|
||||||
|
ErrorCodes::NOT_IMPLEMENTED);
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::unique_ptr<QueryPlan> buildJoinedPlan(
|
static std::unique_ptr<QueryPlan> buildJoinedPlan(
|
||||||
@ -1164,27 +1187,26 @@ static std::unique_ptr<QueryPlan> buildJoinedPlan(
|
|||||||
|
|
||||||
std::shared_ptr<DirectKeyValueJoin> tryKeyValueJoin(std::shared_ptr<TableJoin> analyzed_join, const Block & right_sample_block)
|
std::shared_ptr<DirectKeyValueJoin> tryKeyValueJoin(std::shared_ptr<TableJoin> analyzed_join, const Block & right_sample_block)
|
||||||
{
|
{
|
||||||
auto error_or_null = [&](const String & msg)
|
if (!analyzed_join->isEnabledAlgorithm(JoinAlgorithm::DIRECT))
|
||||||
{
|
|
||||||
if (analyzed_join->isForcedAlgorithm(JoinAlgorithm::DIRECT))
|
|
||||||
throw DB::Exception(ErrorCodes::UNSUPPORTED_METHOD, "Can't use '{}' join algorithm: {}", JoinAlgorithm::DIRECT, msg);
|
|
||||||
return nullptr;
|
|
||||||
};
|
|
||||||
|
|
||||||
if (!analyzed_join->isAllowedAlgorithm(JoinAlgorithm::DIRECT))
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
auto storage = analyzed_join->getStorageKeyValue();
|
auto storage = analyzed_join->getStorageKeyValue();
|
||||||
if (!storage)
|
if (!storage)
|
||||||
return error_or_null("unsupported storage");
|
{
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
if (!isInnerOrLeft(analyzed_join->kind()))
|
if (!isInnerOrLeft(analyzed_join->kind()))
|
||||||
return error_or_null("illegal kind");
|
{
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
if (analyzed_join->strictness() != ASTTableJoin::Strictness::All &&
|
if (analyzed_join->strictness() != ASTTableJoin::Strictness::All &&
|
||||||
analyzed_join->strictness() != ASTTableJoin::Strictness::Any &&
|
analyzed_join->strictness() != ASTTableJoin::Strictness::Any &&
|
||||||
analyzed_join->strictness() != ASTTableJoin::Strictness::RightAny)
|
analyzed_join->strictness() != ASTTableJoin::Strictness::RightAny)
|
||||||
return error_or_null("illegal strictness");
|
{
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
const auto & clauses = analyzed_join->getClauses();
|
const auto & clauses = analyzed_join->getClauses();
|
||||||
bool only_one_key = clauses.size() == 1 &&
|
bool only_one_key = clauses.size() == 1 &&
|
||||||
@ -1194,15 +1216,16 @@ std::shared_ptr<DirectKeyValueJoin> tryKeyValueJoin(std::shared_ptr<TableJoin> a
|
|||||||
!clauses[0].on_filter_condition_right;
|
!clauses[0].on_filter_condition_right;
|
||||||
|
|
||||||
if (!only_one_key)
|
if (!only_one_key)
|
||||||
return error_or_null("multiple keys is not allowed");
|
{
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
String key_name = clauses[0].key_names_right[0];
|
String key_name = clauses[0].key_names_right[0];
|
||||||
String original_key_name = analyzed_join->getOriginalName(key_name);
|
String original_key_name = analyzed_join->getOriginalName(key_name);
|
||||||
const auto & storage_primary_key = storage->getPrimaryKey();
|
const auto & storage_primary_key = storage->getPrimaryKey();
|
||||||
if (storage_primary_key.size() != 1 || storage_primary_key[0] != original_key_name)
|
if (storage_primary_key.size() != 1 || storage_primary_key[0] != original_key_name)
|
||||||
{
|
{
|
||||||
return error_or_null(fmt::format("key '{}'{} doesn't match storage '{}'",
|
return nullptr;
|
||||||
key_name, (key_name != original_key_name ? " (aka '" + original_key_name + "')" : ""), fmt::join(storage_primary_key, ",")));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::make_shared<DirectKeyValueJoin>(analyzed_join, right_sample_block, storage);
|
return std::make_shared<DirectKeyValueJoin>(analyzed_join, right_sample_block, storage);
|
||||||
@ -1240,18 +1263,7 @@ JoinPtr SelectQueryExpressionAnalyzer::makeJoin(
|
|||||||
joined_plan->addStep(std::move(converting_step));
|
joined_plan->addStep(std::move(converting_step));
|
||||||
}
|
}
|
||||||
|
|
||||||
const Block & right_sample_block = joined_plan->getCurrentDataStream().header;
|
JoinPtr join = chooseJoinAlgorithm(analyzed_join, joined_plan, getContext());
|
||||||
if (JoinPtr kvjoin = tryKeyValueJoin(analyzed_join, right_sample_block))
|
|
||||||
{
|
|
||||||
joined_plan.reset();
|
|
||||||
return kvjoin;
|
|
||||||
}
|
|
||||||
|
|
||||||
JoinPtr join = chooseJoinAlgorithm(analyzed_join, right_sample_block, getContext());
|
|
||||||
|
|
||||||
/// Do not make subquery for join over dictionary.
|
|
||||||
if (analyzed_join->getDictionaryReader())
|
|
||||||
joined_plan.reset();
|
|
||||||
|
|
||||||
return join;
|
return join;
|
||||||
}
|
}
|
||||||
|
@ -34,14 +34,26 @@ public:
|
|||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "FullSortingMergeJoin::addJoinedBlock should not be called");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "FullSortingMergeJoin::addJoinedBlock should not be called");
|
||||||
}
|
}
|
||||||
|
|
||||||
void checkTypesOfKeys(const Block & left_block) const override
|
static bool isSupported(const std::shared_ptr<TableJoin> & table_join)
|
||||||
{
|
{
|
||||||
if (table_join->getClauses().size() != 1)
|
if (!table_join->oneDisjunct())
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "FullSortingMergeJoin supports only one join key");
|
return false;
|
||||||
|
|
||||||
|
bool support_storage = !table_join->isSpecialStorage();
|
||||||
|
|
||||||
|
const auto & on_expr = table_join->getOnlyClause();
|
||||||
|
bool support_conditions = !on_expr.on_filter_condition_left && !on_expr.on_filter_condition_right;
|
||||||
|
|
||||||
/// Key column can change nullability and it's not handled on type conversion stage, so algorithm should be aware of it
|
/// Key column can change nullability and it's not handled on type conversion stage, so algorithm should be aware of it
|
||||||
if (table_join->hasUsing() && table_join->joinUseNulls())
|
bool support_using_and_nulls = !table_join->hasUsing() || !table_join->joinUseNulls();
|
||||||
throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "FullSortingMergeJoin doesn't support USING with join_use_nulls");
|
|
||||||
|
return support_conditions && support_using_and_nulls && support_storage;
|
||||||
|
}
|
||||||
|
|
||||||
|
void checkTypesOfKeys(const Block & left_block) const override
|
||||||
|
{
|
||||||
|
if (!isSupported(table_join))
|
||||||
|
throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "FullSortingMergeJoin doesn't support specified query");
|
||||||
|
|
||||||
const auto & onexpr = table_join->getOnlyClause();
|
const auto & onexpr = table_join->getOnlyClause();
|
||||||
for (size_t i = 0; i < onexpr.key_names_left.size(); ++i)
|
for (size_t i = 0; i < onexpr.key_names_left.size(); ++i)
|
||||||
|
@ -718,7 +718,7 @@ void HashJoin::initRightBlockStructure(Block & saved_block_sample)
|
|||||||
|
|
||||||
bool multiple_disjuncts = !table_join->oneDisjunct();
|
bool multiple_disjuncts = !table_join->oneDisjunct();
|
||||||
/// We could remove key columns for LEFT | INNER HashJoin but we should keep them for JoinSwitcher (if any).
|
/// We could remove key columns for LEFT | INNER HashJoin but we should keep them for JoinSwitcher (if any).
|
||||||
bool save_key_columns = !table_join->forceHashJoin() || isRightOrFull(kind) || multiple_disjuncts;
|
bool save_key_columns = table_join->isEnabledAlgorithm(JoinAlgorithm::AUTO) || isRightOrFull(kind) || multiple_disjuncts;
|
||||||
if (save_key_columns)
|
if (save_key_columns)
|
||||||
{
|
{
|
||||||
saved_block_sample = right_table_keys.cloneEmpty();
|
saved_block_sample = right_table_keys.cloneEmpty();
|
||||||
|
@ -311,7 +311,8 @@ std::shared_ptr<TableJoin> JoinedTables::makeTableJoin(const ASTSelectQuery & se
|
|||||||
{
|
{
|
||||||
table_join->setStorageJoin(storage_join);
|
table_join->setStorageJoin(storage_join);
|
||||||
}
|
}
|
||||||
else if (auto storage_dict = std::dynamic_pointer_cast<StorageDictionary>(storage); storage_dict)
|
else if (auto storage_dict = std::dynamic_pointer_cast<StorageDictionary>(storage);
|
||||||
|
storage_dict && join_algorithm.isSet(JoinAlgorithm::DIRECT))
|
||||||
{
|
{
|
||||||
table_join->setStorageJoin(storage_dict);
|
table_join->setStorageJoin(storage_dict);
|
||||||
}
|
}
|
||||||
|
@ -1135,6 +1135,20 @@ void MergeJoin::addConditionJoinColumn(Block & block, JoinTableSide block_side)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool MergeJoin::isSupported(const std::shared_ptr<TableJoin> & table_join)
|
||||||
|
{
|
||||||
|
auto kind = table_join->kind();
|
||||||
|
auto strictness = table_join->strictness();
|
||||||
|
|
||||||
|
bool is_any = (strictness == ASTTableJoin::Strictness::Any);
|
||||||
|
bool is_all = (strictness == ASTTableJoin::Strictness::All);
|
||||||
|
bool is_semi = (strictness == ASTTableJoin::Strictness::Semi);
|
||||||
|
|
||||||
|
bool all_join = is_all && (isInner(kind) || isLeft(kind) || isRight(kind) || isFull(kind));
|
||||||
|
bool special_left = isInnerOrLeft(kind) && (is_any || is_semi);
|
||||||
|
|
||||||
|
return (all_join || special_left) && table_join->oneDisjunct();
|
||||||
|
}
|
||||||
|
|
||||||
MergeJoin::RightBlockInfo::RightBlockInfo(std::shared_ptr<Block> block_, size_t block_number_, size_t & skip_, RowBitmaps * bitmaps_)
|
MergeJoin::RightBlockInfo::RightBlockInfo(std::shared_ptr<Block> block_, size_t block_number_, size_t & skip_, RowBitmaps * bitmaps_)
|
||||||
: block(block_)
|
: block(block_)
|
||||||
|
@ -37,6 +37,8 @@ public:
|
|||||||
|
|
||||||
std::shared_ptr<NotJoinedBlocks> getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const override;
|
std::shared_ptr<NotJoinedBlocks> getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const override;
|
||||||
|
|
||||||
|
static bool isSupported(const std::shared_ptr<TableJoin> & table_join);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
friend class NotJoinedMerge;
|
friend class NotJoinedMerge;
|
||||||
|
|
||||||
|
@ -363,7 +363,7 @@ void TableJoin::addJoinedColumnsAndCorrectTypesImpl(TColumns & left_columns, boo
|
|||||||
* For `JOIN ON expr1 == expr2` we will infer common type later in makeTableJoin,
|
* For `JOIN ON expr1 == expr2` we will infer common type later in makeTableJoin,
|
||||||
* when part of plan built and types of expression will be known.
|
* when part of plan built and types of expression will be known.
|
||||||
*/
|
*/
|
||||||
inferJoinKeyCommonType(left_columns, columns_from_joined_table, !isSpecialStorage(), forceFullSortingMergeJoin());
|
inferJoinKeyCommonType(left_columns, columns_from_joined_table, !isSpecialStorage(), isEnabledAlgorithm(JoinAlgorithm::FULL_SORTING_MERGE));
|
||||||
|
|
||||||
if (auto it = left_type_map.find(col.name); it != left_type_map.end())
|
if (auto it = left_type_map.find(col.name); it != left_type_map.end())
|
||||||
{
|
{
|
||||||
@ -409,18 +409,6 @@ bool TableJoin::oneDisjunct() const
|
|||||||
return clauses.size() == 1;
|
return clauses.size() == 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TableJoin::allowMergeJoin() const
|
|
||||||
{
|
|
||||||
bool is_any = (strictness() == ASTTableJoin::Strictness::Any);
|
|
||||||
bool is_all = (strictness() == ASTTableJoin::Strictness::All);
|
|
||||||
bool is_semi = (strictness() == ASTTableJoin::Strictness::Semi);
|
|
||||||
|
|
||||||
bool all_join = is_all && (isInner(kind()) || isLeft(kind()) || isRight(kind()) || isFull(kind()));
|
|
||||||
bool special_left = isLeft(kind()) && (is_any || is_semi);
|
|
||||||
|
|
||||||
return (all_join || special_left) && oneDisjunct();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool TableJoin::needStreamWithNonJoinedRows() const
|
bool TableJoin::needStreamWithNonJoinedRows() const
|
||||||
{
|
{
|
||||||
if (strictness() == ASTTableJoin::Strictness::Asof ||
|
if (strictness() == ASTTableJoin::Strictness::Asof ||
|
||||||
@ -511,7 +499,7 @@ TableJoin::createConvertingActions(
|
|||||||
const ColumnsWithTypeAndName & left_sample_columns,
|
const ColumnsWithTypeAndName & left_sample_columns,
|
||||||
const ColumnsWithTypeAndName & right_sample_columns)
|
const ColumnsWithTypeAndName & right_sample_columns)
|
||||||
{
|
{
|
||||||
inferJoinKeyCommonType(left_sample_columns, right_sample_columns, !isSpecialStorage(), forceFullSortingMergeJoin());
|
inferJoinKeyCommonType(left_sample_columns, right_sample_columns, !isSpecialStorage(), isEnabledAlgorithm(JoinAlgorithm::FULL_SORTING_MERGE));
|
||||||
|
|
||||||
NameToNameMap left_key_column_rename;
|
NameToNameMap left_key_column_rename;
|
||||||
NameToNameMap right_key_column_rename;
|
NameToNameMap right_key_column_rename;
|
||||||
|
@ -193,24 +193,20 @@ public:
|
|||||||
bool sameStrictnessAndKind(ASTTableJoin::Strictness, ASTTableJoin::Kind) const;
|
bool sameStrictnessAndKind(ASTTableJoin::Strictness, ASTTableJoin::Kind) const;
|
||||||
const SizeLimits & sizeLimits() const { return size_limits; }
|
const SizeLimits & sizeLimits() const { return size_limits; }
|
||||||
VolumePtr getTemporaryVolume() { return tmp_volume; }
|
VolumePtr getTemporaryVolume() { return tmp_volume; }
|
||||||
bool allowMergeJoin() const;
|
|
||||||
|
|
||||||
bool isAllowedAlgorithm(JoinAlgorithm val) const { return join_algorithm.isSet(val) || join_algorithm.isSet(JoinAlgorithm::AUTO); }
|
bool isEnabledAlgorithm(JoinAlgorithm val) const
|
||||||
bool isForcedAlgorithm(JoinAlgorithm val) const { return join_algorithm == MultiEnum<JoinAlgorithm>(val); }
|
{
|
||||||
|
/// When join_algorithm = 'default' (not specified by user) we use hash or direct algorithm.
|
||||||
bool preferMergeJoin() const { return join_algorithm == MultiEnum<JoinAlgorithm>(JoinAlgorithm::PREFER_PARTIAL_MERGE); }
|
/// It's behaviour that was initially supported by clickhouse.
|
||||||
bool forceMergeJoin() const { return join_algorithm == MultiEnum<JoinAlgorithm>(JoinAlgorithm::PARTIAL_MERGE); }
|
bool is_enbaled_by_default = val == JoinAlgorithm::DEFAULT
|
||||||
|
|| val == JoinAlgorithm::HASH
|
||||||
|
|| val == JoinAlgorithm::DIRECT;
|
||||||
|
if (join_algorithm.isSet(JoinAlgorithm::DEFAULT) && is_enbaled_by_default)
|
||||||
|
return true;
|
||||||
|
return join_algorithm.isSet(val);
|
||||||
|
}
|
||||||
|
|
||||||
bool allowParallelHashJoin() const;
|
bool allowParallelHashJoin() const;
|
||||||
bool forceFullSortingMergeJoin() const { return !isSpecialStorage() && join_algorithm.isSet(JoinAlgorithm::FULL_SORTING_MERGE); }
|
|
||||||
|
|
||||||
bool forceHashJoin() const
|
|
||||||
{
|
|
||||||
/// HashJoin always used for DictJoin
|
|
||||||
return dictionary_reader
|
|
||||||
|| join_algorithm == MultiEnum<JoinAlgorithm>(JoinAlgorithm::HASH)
|
|
||||||
|| join_algorithm == MultiEnum<JoinAlgorithm>(JoinAlgorithm::PARALLEL_HASH);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool joinUseNulls() const { return join_use_nulls; }
|
bool joinUseNulls() const { return join_use_nulls; }
|
||||||
bool forceNullableRight() const { return join_use_nulls && isLeftOrFull(table_join.kind); }
|
bool forceNullableRight() const { return join_use_nulls && isLeftOrFull(table_join.kind); }
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#include <Core/NamesAndTypes.h>
|
#include <Core/NamesAndTypes.h>
|
||||||
|
|
||||||
#include <Common/checkStackSize.h>
|
#include <Common/checkStackSize.h>
|
||||||
|
#include <Core/SettingsEnums.h>
|
||||||
|
|
||||||
#include <Interpreters/TreeRewriter.h>
|
#include <Interpreters/TreeRewriter.h>
|
||||||
#include <Interpreters/LogicalExpressionsOptimizer.h>
|
#include <Interpreters/LogicalExpressionsOptimizer.h>
|
||||||
@ -683,7 +684,7 @@ bool tryJoinOnConst(TableJoin & analyzed_join, ASTPtr & on_expression, ContextPt
|
|||||||
else
|
else
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (!analyzed_join.forceHashJoin())
|
if (!analyzed_join.isEnabledAlgorithm(JoinAlgorithm::HASH))
|
||||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
|
||||||
"JOIN ON constant ({}) supported only with join algorithm 'hash'",
|
"JOIN ON constant ({}) supported only with join algorithm 'hash'",
|
||||||
queryToString(on_expression));
|
queryToString(on_expression));
|
||||||
@ -770,7 +771,7 @@ void collectJoinedColumns(TableJoin & analyzed_join, ASTTableJoin & table_join,
|
|||||||
data.asofToJoinKeys();
|
data.asofToJoinKeys();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!analyzed_join.oneDisjunct() && !analyzed_join.forceHashJoin())
|
if (!analyzed_join.oneDisjunct() && !analyzed_join.isEnabledAlgorithm(JoinAlgorithm::HASH))
|
||||||
throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "Only `hash` join supports multiple ORs for keys in JOIN ON section");
|
throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "Only `hash` join supports multiple ORs for keys in JOIN ON section");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -47,16 +47,16 @@ SELECT '--- totals';
|
|||||||
SELECT rdb.key % 2, sum(k), max(value2) FROM t2 INNER JOIN rdb ON rdb.key == t2.k GROUP BY (rdb.key % 2) WITH TOTALS;
|
SELECT rdb.key % 2, sum(k), max(value2) FROM t2 INNER JOIN rdb ON rdb.key == t2.k GROUP BY (rdb.key % 2) WITH TOTALS;
|
||||||
|
|
||||||
SELECT '---';
|
SELECT '---';
|
||||||
SELECT * FROM t1 RIGHT JOIN rdb ON rdb.key == t1.k; -- { serverError UNSUPPORTED_METHOD }
|
SELECT * FROM t1 RIGHT JOIN rdb ON rdb.key == t1.k; -- { serverError NOT_IMPLEMENTED }
|
||||||
SELECT * FROM t1 RIGHT JOIN rdb ON rdb.key == t1.k FORMAT Null SETTINGS join_algorithm = 'direct,hash';
|
SELECT * FROM t1 RIGHT JOIN rdb ON rdb.key == t1.k FORMAT Null SETTINGS join_algorithm = 'direct,hash';
|
||||||
|
|
||||||
SELECT * FROM t1 FULL JOIN rdb ON rdb.key == t1.k; -- { serverError UNSUPPORTED_METHOD }
|
SELECT * FROM t1 FULL JOIN rdb ON rdb.key == t1.k; -- { serverError NOT_IMPLEMENTED }
|
||||||
SELECT * FROM t1 FULL JOIN rdb ON rdb.key == t1.k FORMAT Null SETTINGS join_algorithm = 'direct,hash';
|
SELECT * FROM t1 FULL JOIN rdb ON rdb.key == t1.k FORMAT Null SETTINGS join_algorithm = 'direct,hash';
|
||||||
|
|
||||||
SELECT * FROM t1 INNER JOIN rdb ON rdb.key + 1 == t1.k; -- { serverError UNSUPPORTED_METHOD }
|
SELECT * FROM t1 INNER JOIN rdb ON rdb.key + 1 == t1.k; -- { serverError NOT_IMPLEMENTED }
|
||||||
SELECT * FROM t1 INNER JOIN rdb ON rdb.key + 1 == t1.k FORMAT Null SETTINGS join_algorithm = 'direct,hash';
|
SELECT * FROM t1 INNER JOIN rdb ON rdb.key + 1 == t1.k FORMAT Null SETTINGS join_algorithm = 'direct,hash';
|
||||||
|
|
||||||
SELECT * FROM t1 INNER JOIN (SELECT * FROM rdb) AS rdb ON rdb.key == t1.k; -- { serverError UNSUPPORTED_METHOD }
|
SELECT * FROM t1 INNER JOIN (SELECT * FROM rdb) AS rdb ON rdb.key == t1.k; -- { serverError NOT_IMPLEMENTED }
|
||||||
SELECT * FROM t1 INNER JOIN (SELECT * FROM rdb) AS rdb ON rdb.key == t1.k FORMAT Null SETTINGS join_algorithm = 'direct,hash';
|
SELECT * FROM t1 INNER JOIN (SELECT * FROM rdb) AS rdb ON rdb.key == t1.k FORMAT Null SETTINGS join_algorithm = 'direct,hash';
|
||||||
|
|
||||||
DROP TABLE IF EXISTS rdb;
|
DROP TABLE IF EXISTS rdb;
|
||||||
|
Loading…
Reference in New Issue
Block a user