mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge pull request #25618 from vdimir/slow-dict-join-fix
Fix unnecessary dictionary re-reads in KeyGetterForDict
This commit is contained in:
commit
1e9e073b0a
@ -204,6 +204,7 @@ HashJoin::HashJoin(std::shared_ptr<TableJoin> table_join_, const Block & right_s
|
||||
|
||||
if (table_join->dictionary_reader)
|
||||
{
|
||||
LOG_DEBUG(log, "Performing join over dict");
|
||||
data->type = Type::DICT;
|
||||
std::get<MapsOne>(data->maps).create(Type::DICT);
|
||||
chooseMethod(key_columns, key_sizes); /// init key_sizes
|
||||
@ -319,30 +320,23 @@ public:
|
||||
using Mapped = RowRef;
|
||||
using FindResult = ColumnsHashing::columns_hashing_impl::FindResultImpl<Mapped, true>;
|
||||
|
||||
KeyGetterForDict(const ColumnRawPtrs & key_columns_, const Sizes &, void *)
|
||||
: key_columns(key_columns_)
|
||||
{}
|
||||
|
||||
FindResult findKey(const TableJoin & table_join, size_t row, const Arena &)
|
||||
KeyGetterForDict(const TableJoin & table_join, const ColumnRawPtrs & key_columns)
|
||||
{
|
||||
const DictionaryReader & reader = *table_join.dictionary_reader;
|
||||
if (!read_result)
|
||||
{
|
||||
reader.readKeys(*key_columns[0], read_result, found, positions);
|
||||
result.block = &read_result;
|
||||
table_join.dictionary_reader->readKeys(*key_columns[0], read_result, found, positions);
|
||||
|
||||
if (table_join.forceNullableRight())
|
||||
for (auto & column : read_result)
|
||||
for (ColumnWithTypeAndName & column : read_result)
|
||||
if (table_join.rightBecomeNullable(column.type))
|
||||
JoinCommon::convertColumnToNullable(column);
|
||||
}
|
||||
|
||||
FindResult findKey(void *, size_t row, const Arena &)
|
||||
{
|
||||
result.block = &read_result;
|
||||
result.row_num = positions[row];
|
||||
return FindResult(&result, found[row], 0);
|
||||
}
|
||||
|
||||
private:
|
||||
const ColumnRawPtrs & key_columns;
|
||||
Block read_result;
|
||||
Mapped result;
|
||||
ColumnVector<UInt8>::Container found;
|
||||
@ -851,6 +845,7 @@ void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unuse
|
||||
/// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS).
|
||||
template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS, typename KeyGetter, typename Map, bool need_filter, bool has_null_map>
|
||||
NO_INLINE IColumn::Filter joinRightColumns(
|
||||
KeyGetter && key_getter,
|
||||
const Map & map,
|
||||
AddedColumns & added_columns,
|
||||
const ConstNullMapPtr & null_map [[maybe_unused]],
|
||||
@ -880,8 +875,6 @@ NO_INLINE IColumn::Filter joinRightColumns(
|
||||
if constexpr (need_replication)
|
||||
added_columns.offsets_to_replicate = std::make_unique<IColumn::Offsets>(rows);
|
||||
|
||||
auto key_getter = createKeyGetter<KeyGetter, is_asof_join>(added_columns.key_columns, added_columns.key_sizes);
|
||||
|
||||
IColumn::Offset current_offset = 0;
|
||||
|
||||
for (size_t i = 0; i < rows; ++i)
|
||||
@ -980,35 +973,51 @@ NO_INLINE IColumn::Filter joinRightColumns(
|
||||
|
||||
template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS, typename KeyGetter, typename Map>
|
||||
IColumn::Filter joinRightColumnsSwitchNullability(
|
||||
const Map & map, AddedColumns & added_columns, const ConstNullMapPtr & null_map, JoinStuff::JoinUsedFlags & used_flags)
|
||||
KeyGetter && key_getter,
|
||||
const Map & map,
|
||||
AddedColumns & added_columns,
|
||||
const ConstNullMapPtr & null_map,
|
||||
JoinStuff::JoinUsedFlags & used_flags)
|
||||
{
|
||||
if (added_columns.need_filter)
|
||||
{
|
||||
if (null_map)
|
||||
return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, true, true>(map, added_columns, null_map, used_flags);
|
||||
return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, true, true>(
|
||||
std::forward<KeyGetter>(key_getter), map, added_columns, null_map, used_flags);
|
||||
else
|
||||
return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, true, false>(map, added_columns, nullptr, used_flags);
|
||||
return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, true, false>(
|
||||
std::forward<KeyGetter>(key_getter), map, added_columns, nullptr, used_flags);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (null_map)
|
||||
return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, false, true>(map, added_columns, null_map, used_flags);
|
||||
return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, false, true>(
|
||||
std::forward<KeyGetter>(key_getter), map, added_columns, null_map, used_flags);
|
||||
else
|
||||
return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, false, false>(map, added_columns, nullptr, used_flags);
|
||||
return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, false, false>(
|
||||
std::forward<KeyGetter>(key_getter), map, added_columns, nullptr, used_flags);
|
||||
}
|
||||
}
|
||||
|
||||
template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS, typename Maps>
|
||||
IColumn::Filter switchJoinRightColumns(
|
||||
const Maps & maps_, AddedColumns & added_columns, HashJoin::Type type, const ConstNullMapPtr & null_map, JoinStuff::JoinUsedFlags & used_flags)
|
||||
const Maps & maps_,
|
||||
AddedColumns & added_columns,
|
||||
HashJoin::Type type,
|
||||
const ConstNullMapPtr & null_map,
|
||||
JoinStuff::JoinUsedFlags & used_flags)
|
||||
{
|
||||
constexpr bool is_asof_join = STRICTNESS == ASTTableJoin::Strictness::Asof;
|
||||
switch (type)
|
||||
{
|
||||
#define M(TYPE) \
|
||||
case HashJoin::Type::TYPE: \
|
||||
return joinRightColumnsSwitchNullability<KIND, STRICTNESS,\
|
||||
typename KeyGetterForType<HashJoin::Type::TYPE, const std::remove_reference_t<decltype(*maps_.TYPE)>>::Type>(\
|
||||
*maps_.TYPE, added_columns, null_map, used_flags);
|
||||
{ \
|
||||
using KeyGetter = typename KeyGetterForType<HashJoin::Type::TYPE, const std::remove_reference_t<decltype(*maps_.TYPE)>>::Type; \
|
||||
auto key_getter = createKeyGetter<KeyGetter, is_asof_join>(added_columns.key_columns, added_columns.key_sizes); \
|
||||
return joinRightColumnsSwitchNullability<KIND, STRICTNESS, KeyGetter>( \
|
||||
std::move(key_getter), *maps_.TYPE, added_columns, null_map, used_flags); \
|
||||
}
|
||||
APPLY_FOR_JOIN_VARIANTS(M)
|
||||
#undef M
|
||||
|
||||
@ -1025,8 +1034,12 @@ IColumn::Filter dictionaryJoinRightColumns(const TableJoin & table_join, AddedCo
|
||||
STRICTNESS == ASTTableJoin::Strictness::Semi ||
|
||||
STRICTNESS == ASTTableJoin::Strictness::Anti))
|
||||
{
|
||||
assert(added_columns.key_columns.size() == 1);
|
||||
|
||||
JoinStuff::JoinUsedFlags flags;
|
||||
return joinRightColumnsSwitchNullability<KIND, STRICTNESS, KeyGetterForDict>(table_join, added_columns, null_map, flags);
|
||||
KeyGetterForDict key_getter(table_join, added_columns.key_columns);
|
||||
return joinRightColumnsSwitchNullability<KIND, STRICTNESS, KeyGetterForDict>(
|
||||
std::move(key_getter), nullptr, added_columns, null_map, flags);
|
||||
}
|
||||
|
||||
throw Exception("Logical error: wrong JOIN combination", ErrorCodes::LOGICAL_ERROR);
|
||||
|
37
tests/performance/dict_join.xml
Normal file
37
tests/performance/dict_join.xml
Normal file
@ -0,0 +1,37 @@
|
||||
<test>
|
||||
<create_query>
|
||||
CREATE TABLE join_dictionary_source_table (key UInt64, value String)
|
||||
ENGINE = MergeTree ORDER BY key;
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
CREATE DICTIONARY join_hashed_dictionary (key UInt64, value String)
|
||||
PRIMARY KEY key
|
||||
SOURCE(CLICKHOUSE(DB 'default' TABLE 'join_dictionary_source_table'))
|
||||
LIFETIME(MIN 0 MAX 1000)
|
||||
LAYOUT(HASHED());
|
||||
</create_query>
|
||||
|
||||
<fill_query>
|
||||
INSERT INTO join_dictionary_source_table
|
||||
SELECT number, toString(number)
|
||||
FROM numbers(1000000);
|
||||
</fill_query>
|
||||
|
||||
<query>
|
||||
SELECT COUNT()
|
||||
FROM join_dictionary_source_table
|
||||
JOIN join_hashed_dictionary
|
||||
ON join_dictionary_source_table.key = join_hashed_dictionary.key;
|
||||
</query>
|
||||
|
||||
<query>
|
||||
SELECT COUNT()
|
||||
FROM join_dictionary_source_table
|
||||
JOIN join_hashed_dictionary
|
||||
ON join_dictionary_source_table.key = toUInt64(join_hashed_dictionary.key);
|
||||
</query>
|
||||
|
||||
<drop_query>DROP DICTIONARY IF EXISTS join_hashed_dictionary;</drop_query>
|
||||
<drop_query>DROP TABLE IF EXISTS join_dictionary_source_table;</drop_query>
|
||||
</test>
|
Loading…
Reference in New Issue
Block a user