ClickHouse/dbms/src/Interpreters/Join.cpp

1432 lines
52 KiB
C++
Raw Normal View History

2019-07-04 12:12:39 +00:00
#include <any>
#include <common/logger_useful.h>
ColumnConst unification (#1011) * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * Fixed error in ColumnArray::replicateGeneric [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150].
2017-07-21 06:35:58 +00:00
#include <Columns/ColumnConst.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnNullable.h>
#include <DataTypes/DataTypeNullable.h>
2014-06-12 02:31:30 +00:00
#include <Interpreters/Join.h>
#include <Interpreters/join_common.h>
#include <Interpreters/AnalyzedJoin.h>
#include <Interpreters/joinDispatch.h>
#include <Interpreters/NullableUtils.h>
#include <DataStreams/IBlockInputStream.h>
#include <DataStreams/materializeBlock.h>
#include <Core/ColumnNumbers.h>
2017-07-13 20:58:19 +00:00
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
2018-12-10 10:21:32 +00:00
#include <DataTypes/DataTypeLowCardinality.h>
2014-06-12 02:31:30 +00:00
namespace DB
{
2016-01-12 02:21:15 +00:00
namespace ErrorCodes
{
2019-08-05 14:03:14 +00:00
extern const int UNSUPPORTED_JOIN_KEYS;
extern const int LOGICAL_ERROR;
extern const int SET_SIZE_LIMIT_EXCEEDED;
extern const int TYPE_MISMATCH;
extern const int ILLEGAL_COLUMN;
2016-01-12 02:21:15 +00:00
}
2019-10-29 19:39:42 +00:00
static ColumnPtr filterWithBlanks(ColumnPtr src_column, const IColumn::Filter & filter, bool inverse_filter = false)
{
ColumnPtr column = src_column->convertToFullColumnIfConst();
MutableColumnPtr mut_column = column->cloneEmpty();
mut_column->reserve(column->size());
if (inverse_filter)
{
for (size_t row = 0; row < filter.size(); ++row)
{
if (filter[row])
mut_column->insertDefault();
else
mut_column->insertFrom(*column, row);
}
}
else
{
for (size_t row = 0; row < filter.size(); ++row)
{
if (filter[row])
mut_column->insertFrom(*column, row);
else
mut_column->insertDefault();
}
}
return mut_column;
}
static ColumnWithTypeAndName correctNullability(ColumnWithTypeAndName && column, bool nullable)
{
if (nullable)
2019-10-29 19:39:42 +00:00
{
2019-09-11 18:03:21 +00:00
JoinCommon::convertColumnToNullable(column);
2019-10-29 19:39:42 +00:00
}
else
{
/// We have to replace values masked by NULLs with defaults.
if (column.column)
if (auto * nullable_column = checkAndGetColumn<ColumnNullable>(*column.column))
column.column = filterWithBlanks(column.column, nullable_column->getNullMapColumn().getData(), true);
JoinCommon::removeColumnNullability(column);
}
return std::move(column);
}
static ColumnWithTypeAndName correctNullability(ColumnWithTypeAndName && column, bool nullable, const ColumnUInt8 & negative_null_map)
{
if (nullable)
{
2019-09-11 18:03:21 +00:00
JoinCommon::convertColumnToNullable(column);
2019-08-28 14:14:09 +00:00
if (column.type->isNullable() && negative_null_map.size())
{
MutableColumnPtr mutable_column = (*std::move(column.column)).mutate();
assert_cast<ColumnNullable &>(*mutable_column).applyNegatedNullMap(negative_null_map);
column.column = std::move(mutable_column);
}
}
2019-10-29 19:39:42 +00:00
else
JoinCommon::removeColumnNullability(column);
return std::move(column);
}
static void changeNullability(MutableColumnPtr & mutable_column)
{
ColumnPtr column = std::move(mutable_column);
if (auto * nullable = checkAndGetColumn<ColumnNullable>(*column))
column = nullable->getNestedColumnPtr();
else
column = makeNullable(column);
mutable_column = (*std::move(column)).mutate();
}
Join::Join(std::shared_ptr<AnalyzedJoin> table_join_, const Block & right_sample_block, bool any_take_last_row_)
: table_join(table_join_)
, kind(table_join->kind())
, strictness(table_join->strictness())
, key_names_right(table_join->keyNamesRight())
2019-09-18 12:24:35 +00:00
, nullable_right_side(table_join->forceNullableRight())
, nullable_left_side(table_join->forceNullableLeft())
2019-09-09 19:43:37 +00:00
, any_take_last_row(any_take_last_row_)
2019-10-11 17:56:26 +00:00
, asof_inequality(table_join->getAsofInequality())
2019-12-19 15:50:28 +00:00
, data(std::make_shared<RightTableData>())
2019-09-09 19:43:37 +00:00
, log(&Logger::get("Join"))
{
2019-09-09 19:43:37 +00:00
setSampleBlock(right_sample_block);
}
Join::Type Join::chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes)
2015-03-02 01:10:58 +00:00
{
size_t keys_size = key_columns.size();
if (keys_size == 0)
return Type::CROSS;
bool all_fixed = true;
size_t keys_bytes = 0;
key_sizes.resize(keys_size);
for (size_t j = 0; j < keys_size; ++j)
{
if (!key_columns[j]->isFixedAndContiguous())
{
all_fixed = false;
break;
}
key_sizes[j] = key_columns[j]->sizeOfValueIfFixed();
keys_bytes += key_sizes[j];
}
2017-04-02 17:37:49 +00:00
/// If there is one numeric key that fits in 64 bits
if (keys_size == 1 && key_columns[0]->isNumeric())
{
size_t size_of_field = key_columns[0]->sizeOfValueIfFixed();
if (size_of_field == 1)
return Type::key8;
if (size_of_field == 2)
return Type::key16;
if (size_of_field == 4)
return Type::key32;
if (size_of_field == 8)
return Type::key64;
if (size_of_field == 16)
return Type::keys128;
throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16.", ErrorCodes::LOGICAL_ERROR);
}
2017-04-02 17:37:49 +00:00
/// If the keys fit in N bits, we will use a hash table for N-bit-packed keys
if (all_fixed && keys_bytes <= 16)
return Type::keys128;
if (all_fixed && keys_bytes <= 32)
return Type::keys256;
/// If there is single string key, use hash table of it's values.
ColumnConst unification (#1011) * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * Fixed error in ColumnArray::replicateGeneric [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150].
2017-07-21 06:35:58 +00:00
if (keys_size == 1
&& (typeid_cast<const ColumnString *>(key_columns[0])
|| (isColumnConst(*key_columns[0]) && typeid_cast<const ColumnString *>(&assert_cast<const ColumnConst *>(key_columns[0])->getDataColumn()))))
return Type::key_string;
if (keys_size == 1 && typeid_cast<const ColumnFixedString *>(key_columns[0]))
return Type::key_fixed_string;
/// Otherwise, will use set of cryptographic hashes of unambiguously serialized values.
return Type::hashed;
2015-03-02 01:10:58 +00:00
}
2019-03-28 18:35:50 +00:00
static const IColumn * extractAsofColumn(const ColumnRawPtrs & key_columns)
{
2019-03-28 18:35:50 +00:00
return key_columns.back();
}
2019-12-02 18:07:27 +00:00
template<typename KeyGetter, bool is_asof_join>
2019-03-28 18:35:50 +00:00
static KeyGetter createKeyGetter(const ColumnRawPtrs & key_columns, const Sizes & key_sizes)
{
2019-12-02 18:07:27 +00:00
if constexpr (is_asof_join)
{
2019-03-28 18:35:50 +00:00
auto key_column_copy = key_columns;
auto key_size_copy = key_sizes;
key_column_copy.pop_back();
key_size_copy.pop_back();
return KeyGetter(key_column_copy, key_size_copy, nullptr);
}
2019-03-28 18:35:50 +00:00
else
return KeyGetter(key_columns, key_sizes, nullptr);
}
2019-01-24 14:56:04 +00:00
template <Join::Type type, typename Value, typename Mapped>
struct KeyGetterForTypeImpl;
2019-01-24 14:56:04 +00:00
template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<Join::Type::key8, Value, Mapped>
{
using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt8, false>;
};
template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<Join::Type::key16, Value, Mapped>
{
using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt16, false>;
};
template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<Join::Type::key32, Value, Mapped>
{
using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt32, false>;
};
template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<Join::Type::key64, Value, Mapped>
{
using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt64, false>;
};
template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<Join::Type::key_string, Value, Mapped>
{
using Type = ColumnsHashing::HashMethodString<Value, Mapped, true, false>;
2019-01-24 14:56:04 +00:00
};
template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<Join::Type::key_fixed_string, Value, Mapped>
{
using Type = ColumnsHashing::HashMethodFixedString<Value, Mapped, true, false>;
2019-01-24 14:56:04 +00:00
};
template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<Join::Type::keys128, Value, Mapped>
{
using Type = ColumnsHashing::HashMethodKeysFixed<Value, UInt128, Mapped, false, false, false>;
};
template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<Join::Type::keys256, Value, Mapped>
{
using Type = ColumnsHashing::HashMethodKeysFixed<Value, UInt256, Mapped, false, false, false>;
};
template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<Join::Type::hashed, Value, Mapped>
{
using Type = ColumnsHashing::HashMethodHashed<Value, Mapped, false>;
};
template <Join::Type type, typename Data>
struct KeyGetterForType
{
using Value = typename Data::value_type;
using Mapped_t = typename Data::mapped_type;
using Mapped = std::conditional_t<std::is_const_v<Data>, const Mapped_t, Mapped_t>;
using Type = typename KeyGetterForTypeImpl<type, Value, Mapped>::Type;
};
2015-03-02 01:10:58 +00:00
void Join::init(Type type_)
{
2019-12-19 15:50:28 +00:00
data->type = type_;
if (kind == ASTTableJoin::Kind::Cross)
return;
2019-12-19 15:50:28 +00:00
joinDispatchInit(kind, strictness, data->maps);
joinDispatch(kind, strictness, data->maps, [&](auto, auto, auto & map) { map.create(data->type); });
}
size_t Join::getTotalRowCount() const
{
size_t res = 0;
2019-12-19 15:50:28 +00:00
if (data->type == Type::CROSS)
{
2019-12-19 15:50:28 +00:00
for (const auto & block : data->blocks)
res += block.rows();
}
else
{
2019-12-19 15:50:28 +00:00
joinDispatch(kind, strictness, data->maps, [&](auto, auto, auto & map) { res += map.getTotalRowCount(data->type); });
}
return res;
}
size_t Join::getTotalByteCount() const
{
size_t res = 0;
2019-12-19 15:50:28 +00:00
if (data->type == Type::CROSS)
{
2019-12-19 15:50:28 +00:00
for (const auto & block : data->blocks)
res += block.bytes();
}
else
{
2019-12-19 15:50:28 +00:00
joinDispatch(kind, strictness, data->maps, [&](auto, auto, auto & map) { res += map.getTotalByteCountImpl(data->type); });
res += data->pool.size();
}
return res;
}
void Join::setSampleBlock(const Block & block)
{
2020-01-11 09:50:41 +00:00
/// You have to restore this lock if you call the function outside of ctor.
2019-09-09 19:43:37 +00:00
//std::unique_lock lock(rwlock);
LOG_DEBUG(log, "setSampleBlock: " << block.dumpStructure());
if (!empty())
return;
2019-09-11 18:03:21 +00:00
ColumnRawPtrs key_columns = JoinCommon::extractKeysForJoin(key_names_right, block, right_table_keys, sample_block_with_columns_to_add);
initRightBlockStructure();
initRequiredRightKeys();
JoinCommon::createMissedColumns(sample_block_with_columns_to_add);
if (nullable_right_side)
JoinCommon::convertColumnsToNullable(sample_block_with_columns_to_add);
if (strictness == ASTTableJoin::Strictness::Asof)
{
2019-03-28 19:31:11 +00:00
if (kind != ASTTableJoin::Kind::Left and kind != ASTTableJoin::Kind::Inner)
throw Exception("ASOF only supports LEFT and INNER as base joins", ErrorCodes::NOT_IMPLEMENTED);
const IColumn * asof_column = key_columns.back();
size_t asof_size;
2019-04-01 16:44:15 +00:00
asof_type = AsofRowRefs::getTypeSize(asof_column, asof_size);
if (!asof_type)
{
2019-05-02 23:46:04 +00:00
std::string msg = "ASOF join not supported for type: ";
msg += asof_column->getFamilyName();
throw Exception(msg, ErrorCodes::BAD_TYPE_OF_FIELD);
}
key_columns.pop_back();
if (key_columns.empty())
throw Exception("ASOF join cannot be done without a joining column", ErrorCodes::LOGICAL_ERROR);
/// this is going to set up the appropriate hash table for the direct lookup part of the join
/// However, this does not depend on the size of the asof join key (as that goes into the BST)
/// Therefore, add it back in such that it can be extracted appropriately from the full stored
/// key_columns and key_sizes
init(chooseMethod(key_columns, key_sizes));
key_sizes.push_back(asof_size);
}
else
{
/// Choose data structure to use for JOIN.
init(chooseMethod(key_columns, key_sizes));
}
}
namespace
{
2017-04-02 17:37:49 +00:00
/// Inserting an element into a hash table of the form `key -> reference to a string`, which will then be used by JOIN.
template <typename Map, typename KeyGetter>
2019-12-02 18:07:27 +00:00
struct Inserter
{
2019-12-02 18:07:27 +00:00
static ALWAYS_INLINE void insertOne(const Join & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i,
Arena & pool)
{
2019-01-24 14:56:04 +00:00
auto emplace_result = key_getter.emplaceKey(map, i, pool);
if (emplace_result.isInserted() || join.anyTakeLastRow())
2019-01-24 14:56:04 +00:00
new (&emplace_result.getMapped()) typename Map::mapped_type(stored_block, i);
}
2019-12-02 18:07:27 +00:00
static ALWAYS_INLINE void insertAll(const Join &, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool)
{
2019-01-24 14:56:04 +00:00
auto emplace_result = key_getter.emplaceKey(map, i, pool);
2019-01-24 14:56:04 +00:00
if (emplace_result.isInserted())
new (&emplace_result.getMapped()) typename Map::mapped_type(stored_block, i);
else
{
2019-05-14 14:40:43 +00:00
/// The first element of the list is stored in the value of the hash table, the rest in the pool.
2019-05-14 14:39:03 +00:00
emplace_result.getMapped().insert({stored_block, i}, pool);
}
}
2019-12-02 18:07:27 +00:00
static ALWAYS_INLINE void insertAsof(Join & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool,
const IColumn * asof_column)
{
auto emplace_result = key_getter.emplaceKey(map, i, pool);
typename Map::mapped_type * time_series_map = &emplace_result.getMapped();
if (emplace_result.isInserted())
time_series_map = new (time_series_map) typename Map::mapped_type(join.getAsofType());
time_series_map->insert(join.getAsofType(), asof_column, stored_block, i);
}
};
template <ASTTableJoin::Strictness STRICTNESS, typename KeyGetter, typename Map, bool has_null_map>
void NO_INLINE insertFromBlockImplTypeCase(
2019-04-02 18:50:35 +00:00
Join & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns,
2019-01-24 14:56:04 +00:00
const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool)
{
2019-12-06 15:35:23 +00:00
[[maybe_unused]] constexpr bool mapped_one = std::is_same_v<typename Map::mapped_type, JoinStuff::MappedOne> ||
2019-12-02 18:07:27 +00:00
std::is_same_v<typename Map::mapped_type, JoinStuff::MappedOneFlagged>;
constexpr bool is_asof_join = STRICTNESS == ASTTableJoin::Strictness::Asof;
2019-03-28 18:35:50 +00:00
const IColumn * asof_column [[maybe_unused]] = nullptr;
2019-12-02 18:07:27 +00:00
if constexpr (is_asof_join)
2019-03-28 18:35:50 +00:00
asof_column = extractAsofColumn(key_columns);
2019-12-02 18:07:27 +00:00
auto key_getter = createKeyGetter<KeyGetter, is_asof_join>(key_columns, key_sizes);
for (size_t i = 0; i < rows; ++i)
{
if (has_null_map && (*null_map)[i])
continue;
2019-12-02 18:07:27 +00:00
if constexpr (is_asof_join)
Inserter<Map, KeyGetter>::insertAsof(join, map, key_getter, stored_block, i, pool, asof_column);
else if constexpr (mapped_one)
Inserter<Map, KeyGetter>::insertOne(join, map, key_getter, stored_block, i, pool);
else
2019-12-02 18:07:27 +00:00
Inserter<Map, KeyGetter>::insertAll(join, map, key_getter, stored_block, i, pool);
}
}
template <ASTTableJoin::Strictness STRICTNESS, typename KeyGetter, typename Map>
void insertFromBlockImplType(
2019-04-02 18:50:35 +00:00
Join & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns,
2019-01-24 14:56:04 +00:00
const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool)
{
if (null_map)
2019-03-30 01:32:08 +00:00
insertFromBlockImplTypeCase<STRICTNESS, KeyGetter, Map, true>(join, map, rows, key_columns, key_sizes, stored_block, null_map, pool);
else
2019-03-30 01:32:08 +00:00
insertFromBlockImplTypeCase<STRICTNESS, KeyGetter, Map, false>(join, map, rows, key_columns, key_sizes, stored_block, null_map, pool);
}
template <ASTTableJoin::Strictness STRICTNESS, typename Maps>
void insertFromBlockImpl(
2019-04-02 18:50:35 +00:00
Join & join, Join::Type type, Maps & maps, size_t rows, const ColumnRawPtrs & key_columns,
2019-01-24 14:56:04 +00:00
const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool)
{
switch (type)
{
case Join::Type::EMPTY: break;
case Join::Type::CROSS: break; /// Do nothing. We have already saved block, and it is enough.
#define M(TYPE) \
case Join::Type::TYPE: \
2019-01-24 14:56:04 +00:00
insertFromBlockImplType<STRICTNESS, typename KeyGetterForType<Join::Type::TYPE, std::remove_reference_t<decltype(*maps.TYPE)>>::Type>(\
2019-03-30 01:32:08 +00:00
join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, pool); \
break;
APPLY_FOR_JOIN_VARIANTS(M)
#undef M
}
}
}
void Join::initRequiredRightKeys()
2019-03-31 23:09:00 +00:00
{
const Names & left_keys = table_join->keyNamesLeft();
const Names & right_keys = table_join->keyNamesRight();
NameSet required_keys(table_join->requiredRightKeys().begin(), table_join->requiredRightKeys().end());
for (size_t i = 0; i < right_keys.size(); ++i)
2019-03-31 23:09:00 +00:00
{
const String & right_key_name = right_keys[i];
if (required_keys.count(right_key_name) && !required_right_keys.has(right_key_name))
2019-03-31 23:09:00 +00:00
{
const auto & right_key = right_table_keys.getByName(right_key_name);
required_right_keys.insert(right_key);
required_right_keys_sources.push_back(left_keys[i]);
2019-03-31 23:09:00 +00:00
}
}
}
void Join::initRightBlockStructure()
{
2019-12-19 15:50:28 +00:00
auto & saved_block_sample = data->sample_block;
if (isRightOrFull(kind))
2019-03-31 23:09:00 +00:00
{
/// Save keys for NonJoinedBlockInputStream
saved_block_sample = right_table_keys.cloneEmpty();
}
else if (strictness == ASTTableJoin::Strictness::Asof)
{
/// Save ASOF key
saved_block_sample.insert(right_table_keys.safeGetByPosition(right_table_keys.columns() - 1));
}
2019-03-31 23:09:00 +00:00
/// Save non key columns
for (auto & column : sample_block_with_columns_to_add)
saved_block_sample.insert(column);
2019-03-31 23:09:00 +00:00
if (nullable_right_side)
JoinCommon::convertColumnsToNullable(saved_block_sample, (isFull(kind) ? right_table_keys.columns() : 0));
}
2019-03-31 23:09:00 +00:00
Block Join::structureRightBlock(const Block & block) const
{
Block structured_block;
2019-12-19 15:50:28 +00:00
for (auto & sample_column : savedBlockSample().getColumnsWithTypeAndName())
{
ColumnWithTypeAndName column = block.getByName(sample_column.name);
if (sample_column.column->isNullable())
JoinCommon::convertColumnToNullable(column);
structured_block.insert(column);
2019-03-31 23:09:00 +00:00
}
return structured_block;
2019-03-31 23:09:00 +00:00
}
bool Join::addJoinedBlock(const Block & source_block)
{
if (empty())
throw Exception("Logical error: Join was not initialized", ErrorCodes::LOGICAL_ERROR);
/// There's no optimization for right side const columns. Remove constness if any.
Block block = materializeBlock(source_block);
size_t rows = block.rows();
ColumnRawPtrs key_columns = JoinCommon::materializeColumnsInplace(block, key_names_right);
/// We will insert to the map only keys, where all components are not NULL.
ConstNullMapPtr null_map{};
2019-07-03 19:06:34 +00:00
ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
2019-12-03 14:30:51 +00:00
/// If RIGHT or FULL save blocks with nulls for NonJoinedBlockInputStream
UInt8 save_nullmap = 0;
if (isRightOrFull(kind) && null_map)
{
2019-12-03 14:30:51 +00:00
for (size_t i = 0; !save_nullmap && i < null_map->size(); ++i)
save_nullmap |= (*null_map)[i];
}
Block structured_block = structureRightBlock(block);
2019-12-03 14:30:51 +00:00
size_t total_rows = 0;
size_t total_bytes = 0;
{
2019-12-19 15:50:28 +00:00
std::unique_lock lock(data->rwlock);
2019-12-19 15:50:28 +00:00
data->blocks.emplace_back(std::move(structured_block));
Block * stored_block = &data->blocks.back();
2019-12-03 14:30:51 +00:00
if (rows)
2019-12-19 15:50:28 +00:00
data->empty = false;
2019-10-12 10:06:07 +00:00
2019-12-03 14:30:51 +00:00
if (kind != ASTTableJoin::Kind::Cross)
{
2019-12-19 15:50:28 +00:00
joinDispatch(kind, strictness, data->maps, [&](auto, auto strictness_, auto & map)
2019-12-03 14:30:51 +00:00
{
2019-12-19 15:50:28 +00:00
insertFromBlockImpl<strictness_>(*this, data->type, map, rows, key_columns, key_sizes, stored_block, null_map, data->pool);
2019-12-03 14:30:51 +00:00
});
}
2019-12-03 14:30:51 +00:00
if (save_nullmap)
2019-12-19 15:50:28 +00:00
data->blocks_nullmaps.emplace_back(stored_block, null_map_holder);
2019-12-03 14:30:51 +00:00
/// TODO: Do not calculate them every time
total_rows = getTotalRowCount();
total_bytes = getTotalByteCount();
}
2019-07-03 19:06:34 +00:00
2019-12-03 14:30:51 +00:00
return table_join->sizeLimits().check(total_rows, total_bytes, "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED);
2014-06-12 02:31:30 +00:00
}
namespace
{
2019-03-20 14:49:05 +00:00
class AddedColumns
{
public:
using TypeAndNames = std::vector<std::pair<decltype(ColumnWithTypeAndName::type), decltype(ColumnWithTypeAndName::name)>>;
2019-03-20 15:15:44 +00:00
AddedColumns(const Block & sample_block_with_columns_to_add,
const Block & block_with_columns_to_add,
2019-03-31 23:09:00 +00:00
const Block & block,
const Block & saved_block_sample,
2019-11-06 19:39:52 +00:00
const ColumnsWithTypeAndName & extras,
const Join & join_,
const ColumnRawPtrs & key_columns_,
2019-11-11 11:42:10 +00:00
const Sizes & key_sizes_)
2019-11-06 19:39:52 +00:00
: join(join_)
, key_columns(key_columns_)
, key_sizes(key_sizes_)
, rows_to_add(block.rows())
, need_filter(false)
2019-03-20 14:49:05 +00:00
{
2019-03-20 15:15:44 +00:00
size_t num_columns_to_add = sample_block_with_columns_to_add.columns();
2019-03-20 15:15:44 +00:00
columns.reserve(num_columns_to_add);
type_name.reserve(num_columns_to_add);
right_indexes.reserve(num_columns_to_add);
2019-03-20 15:15:44 +00:00
for (size_t i = 0; i < num_columns_to_add; ++i)
{
const ColumnWithTypeAndName & src_column = sample_block_with_columns_to_add.safeGetByPosition(i);
/// Don't insert column if it's in left block or not explicitly required.
2019-03-31 23:09:00 +00:00
if (!block.has(src_column.name) && block_with_columns_to_add.has(src_column.name))
addColumn(src_column);
}
2019-04-01 10:35:37 +00:00
for (auto & extra : extras)
2019-03-31 23:09:00 +00:00
addColumn(extra);
2019-04-01 10:35:37 +00:00
for (auto & tn : type_name)
right_indexes.push_back(saved_block_sample.getPositionByName(tn.second));
2019-03-20 14:49:05 +00:00
}
2019-03-20 15:15:44 +00:00
size_t size() const { return columns.size(); }
2019-03-20 14:49:05 +00:00
ColumnWithTypeAndName moveColumn(size_t i)
{
return ColumnWithTypeAndName(std::move(columns[i]), type_name[i].first, type_name[i].second);
}
2019-11-08 16:13:43 +00:00
template <bool has_defaults>
2019-03-20 14:49:05 +00:00
void appendFromBlock(const Block & block, size_t row_num)
{
2019-11-08 16:13:43 +00:00
if constexpr (has_defaults)
applyLazyDefaults();
2019-03-31 23:09:00 +00:00
for (size_t j = 0; j < right_indexes.size(); ++j)
2019-03-20 14:49:05 +00:00
columns[j]->insertFrom(*block.getByPosition(right_indexes[j]).column, row_num);
}
2019-03-20 14:49:05 +00:00
void appendDefaultRow()
{
2019-11-06 19:39:52 +00:00
++lazy_defaults_count;
2019-03-20 14:49:05 +00:00
}
2019-11-06 19:39:52 +00:00
void applyLazyDefaults()
{
if (lazy_defaults_count)
{
for (size_t j = 0; j < right_indexes.size(); ++j)
columns[j]->insertManyDefaults(lazy_defaults_count);
lazy_defaults_count = 0;
}
}
const Join & join;
const ColumnRawPtrs & key_columns;
const Sizes & key_sizes;
size_t rows_to_add;
std::unique_ptr<IColumn::Offsets> offsets_to_replicate;
bool need_filter;
2019-11-06 19:39:52 +00:00
private:
TypeAndNames type_name;
MutableColumns columns;
std::vector<size_t> right_indexes;
2019-11-06 19:39:52 +00:00
size_t lazy_defaults_count = 0;
2019-03-31 23:09:00 +00:00
void addColumn(const ColumnWithTypeAndName & src_column)
{
columns.push_back(src_column.column->cloneEmpty());
columns.back()->reserve(src_column.column->size());
type_name.emplace_back(src_column.type, src_column.name);
}
2019-03-20 15:15:44 +00:00
};
2019-11-08 16:13:43 +00:00
template <typename Map, bool add_missing>
2019-11-06 19:39:52 +00:00
void addFoundRowAll(const typename Map::mapped_type & mapped, AddedColumns & added, IColumn::Offset & current_offset)
2019-03-20 14:49:05 +00:00
{
if constexpr (add_missing)
added.applyLazyDefaults();
2019-11-06 19:39:52 +00:00
for (auto it = mapped.begin(); it.ok(); ++it)
2019-03-19 16:53:36 +00:00
{
added.appendFromBlock<false>(*it->block, it->row_num);
2019-11-06 19:39:52 +00:00
++current_offset;
2019-03-19 16:53:36 +00:00
}
2019-03-20 14:49:05 +00:00
};
template <bool add_missing, bool need_offset>
2019-03-20 14:49:05 +00:00
void addNotFoundRow(AddedColumns & added [[maybe_unused]], IColumn::Offset & current_offset [[maybe_unused]])
{
2019-11-08 16:13:43 +00:00
if constexpr (add_missing)
{
2019-03-20 14:49:05 +00:00
added.appendDefaultRow();
if constexpr (need_offset)
++current_offset;
2019-03-20 14:49:05 +00:00
}
}
2019-03-19 16:53:36 +00:00
template <bool need_filter>
void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unused]])
{
if constexpr (need_filter)
filter[pos] = 1;
}
2019-03-20 14:49:05 +00:00
/// Joins right table columns which indexes are present in right_indexes using specified map.
/// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS).
template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS, typename KeyGetter, typename Map, bool need_filter, bool has_null_map>
2019-11-11 11:42:10 +00:00
NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added_columns, const ConstNullMapPtr & null_map [[maybe_unused]])
2019-03-20 14:49:05 +00:00
{
constexpr bool is_any_join = STRICTNESS == ASTTableJoin::Strictness::Any;
constexpr bool is_all_join = STRICTNESS == ASTTableJoin::Strictness::All;
constexpr bool is_asof_join = STRICTNESS == ASTTableJoin::Strictness::Asof;
2019-12-02 18:07:27 +00:00
constexpr bool is_semi_join = STRICTNESS == ASTTableJoin::Strictness::Semi;
constexpr bool is_anti_join = STRICTNESS == ASTTableJoin::Strictness::Anti;
constexpr bool left = KIND == ASTTableJoin::Kind::Left;
constexpr bool right = KIND == ASTTableJoin::Kind::Right;
2019-12-02 18:07:27 +00:00
constexpr bool full = KIND == ASTTableJoin::Kind::Full;
2019-12-02 18:07:27 +00:00
constexpr bool add_missing = (left || full) && !is_semi_join;
constexpr bool need_replication = is_all_join || (is_any_join && right) || (is_semi_join && right);
2019-11-06 19:39:52 +00:00
size_t rows = added_columns.rows_to_add;
IColumn::Filter filter;
if constexpr (need_filter)
filter = IColumn::Filter(rows, 0);
2019-03-20 14:49:05 +00:00
Arena pool;
2019-11-11 11:42:10 +00:00
if constexpr (need_replication)
added_columns.offsets_to_replicate = std::make_unique<IColumn::Offsets>(rows);
2019-03-28 18:35:50 +00:00
const IColumn * asof_column [[maybe_unused]] = nullptr;
if constexpr (is_asof_join)
2019-11-06 19:39:52 +00:00
asof_column = extractAsofColumn(added_columns.key_columns);
2019-03-28 18:35:50 +00:00
2019-12-02 18:07:27 +00:00
auto key_getter = createKeyGetter<KeyGetter, is_asof_join>(added_columns.key_columns, added_columns.key_sizes);
IColumn::Offset current_offset = 0;
2019-03-20 14:49:05 +00:00
for (size_t i = 0; i < rows; ++i)
{
if constexpr (has_null_map)
{
2019-11-11 11:42:10 +00:00
if ((*null_map)[i])
{
addNotFoundRow<add_missing, need_replication>(added_columns, current_offset);
2019-11-11 11:42:10 +00:00
if constexpr (need_replication)
(*added_columns.offsets_to_replicate)[i] = current_offset;
continue;
}
2019-03-20 14:49:05 +00:00
}
2019-11-11 11:42:10 +00:00
auto find_result = key_getter.findKey(map, i, pool);
if (find_result.isFound())
2019-03-20 14:49:05 +00:00
{
2019-11-11 11:42:10 +00:00
auto & mapped = find_result.getMapped();
2019-03-20 14:49:05 +00:00
2019-11-11 11:42:10 +00:00
if constexpr (is_asof_join)
{
2019-11-11 11:42:10 +00:00
const Join & join = added_columns.join;
if (const RowRef * found = mapped.findAsof(join.getAsofType(), join.getAsofInequality(), asof_column, i))
2019-03-30 21:30:21 +00:00
{
setUsed<need_filter>(filter, i);
2019-11-06 19:39:52 +00:00
mapped.setUsed();
2019-11-11 11:42:10 +00:00
added_columns.appendFromBlock<add_missing>(*found->block, found->row_num);
2019-03-30 21:30:21 +00:00
}
else
addNotFoundRow<add_missing, need_replication>(added_columns, current_offset);
2019-11-11 11:42:10 +00:00
}
else if constexpr (is_all_join)
{
setUsed<need_filter>(filter, i);
2019-11-11 11:42:10 +00:00
mapped.setUsed();
addFoundRowAll<Map, add_missing>(mapped, added_columns, current_offset);
}
2019-12-02 18:07:27 +00:00
else if constexpr ((is_any_join || is_semi_join) && right)
2019-11-11 11:42:10 +00:00
{
2020-01-11 09:50:41 +00:00
/// Use first appeared left key + it needs left columns replication
2019-11-11 11:42:10 +00:00
if (mapped.setUsedOnce())
{
setUsed<need_filter>(filter, i);
2019-11-11 11:42:10 +00:00
addFoundRowAll<Map, add_missing>(mapped, added_columns, current_offset);
}
}
2019-11-11 11:42:10 +00:00
else if constexpr (is_any_join && KIND == ASTTableJoin::Kind::Inner)
{
2020-01-11 09:50:41 +00:00
/// Use first appeared left key only
2019-11-11 11:42:10 +00:00
if (mapped.setUsedOnce())
{
setUsed<need_filter>(filter, i);
2019-11-08 16:13:43 +00:00
added_columns.appendFromBlock<add_missing>(*mapped.block, mapped.row_num);
}
}
2019-12-02 18:07:27 +00:00
else if constexpr (is_any_join && full)
2019-11-11 11:42:10 +00:00
{
/// TODO
}
2019-12-02 18:07:27 +00:00
else if constexpr (is_anti_join)
{
if constexpr (right)
mapped.setUsed();
}
else /// ANY LEFT, SEMI LEFT, old ANY (RightAny)
2019-11-11 11:42:10 +00:00
{
setUsed<need_filter>(filter, i);
2019-11-11 11:42:10 +00:00
mapped.setUsed();
added_columns.appendFromBlock<add_missing>(*mapped.block, mapped.row_num);
}
}
2019-11-11 11:42:10 +00:00
else
2019-12-02 18:07:27 +00:00
{
if constexpr (is_anti_join && left)
setUsed<need_filter>(filter, i);
addNotFoundRow<add_missing, need_replication>(added_columns, current_offset);
}
if constexpr (need_replication)
2019-11-06 19:39:52 +00:00
(*added_columns.offsets_to_replicate)[i] = current_offset;
2019-03-19 16:53:36 +00:00
}
2019-11-06 19:39:52 +00:00
added_columns.applyLazyDefaults();
2019-11-11 11:42:10 +00:00
return filter;
2019-03-20 14:49:05 +00:00
}
2019-03-20 14:49:05 +00:00
template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS, typename KeyGetter, typename Map>
2019-11-11 11:42:10 +00:00
IColumn::Filter joinRightColumnsSwitchNullability(const Map & map, AddedColumns & added_columns, const ConstNullMapPtr & null_map)
2019-03-20 14:49:05 +00:00
{
if (added_columns.need_filter)
{
if (null_map)
return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, true, true>(map, added_columns, null_map);
else
return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, true, false>(map, added_columns, nullptr);
}
2019-03-20 14:49:05 +00:00
else
{
if (null_map)
return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, false, true>(map, added_columns, null_map);
else
return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, false, false>(map, added_columns, nullptr);
}
2019-03-20 14:49:05 +00:00
}
2019-03-19 16:53:36 +00:00
2019-03-20 14:49:05 +00:00
template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS, typename Maps>
2019-11-11 11:42:10 +00:00
IColumn::Filter switchJoinRightColumns(const Maps & maps_, AddedColumns & added_columns, Join::Type type, const ConstNullMapPtr & null_map)
2019-03-20 14:49:05 +00:00
{
switch (type)
{
#define M(TYPE) \
case Join::Type::TYPE: \
2019-11-11 11:42:10 +00:00
return joinRightColumnsSwitchNullability<KIND, STRICTNESS,\
2019-11-06 20:25:27 +00:00
typename KeyGetterForType<Join::Type::TYPE, const std::remove_reference_t<decltype(*maps_.TYPE)>>::Type>(\
2019-11-11 11:42:10 +00:00
*maps_.TYPE, added_columns, null_map);\
2019-11-06 19:39:52 +00:00
break;
2019-03-20 14:49:05 +00:00
APPLY_FOR_JOIN_VARIANTS(M)
#undef M
default:
2019-08-05 14:03:14 +00:00
throw Exception("Unsupported JOIN keys. Type: " + toString(static_cast<UInt32>(type)), ErrorCodes::UNSUPPORTED_JOIN_KEYS);
}
}
2019-03-20 14:49:05 +00:00
} /// nameless
template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS, typename Maps>
void Join::joinBlockImpl(
Block & block,
const Names & key_names_left,
const Block & block_with_columns_to_add,
const Maps & maps_) const
{
constexpr bool is_any_join = STRICTNESS == ASTTableJoin::Strictness::Any;
constexpr bool is_all_join = STRICTNESS == ASTTableJoin::Strictness::All;
constexpr bool is_asof_join = STRICTNESS == ASTTableJoin::Strictness::Asof;
2019-12-02 18:07:27 +00:00
constexpr bool is_semi_join = STRICTNESS == ASTTableJoin::Strictness::Semi;
constexpr bool is_anti_join = STRICTNESS == ASTTableJoin::Strictness::Anti;
constexpr bool left = KIND == ASTTableJoin::Kind::Left;
constexpr bool right = KIND == ASTTableJoin::Kind::Right;
2019-12-02 18:07:27 +00:00
constexpr bool inner = KIND == ASTTableJoin::Kind::Inner;
constexpr bool full = KIND == ASTTableJoin::Kind::Full;
2019-12-02 18:07:27 +00:00
constexpr bool need_replication = is_all_join || (is_any_join && right) || (is_semi_join && right);
constexpr bool need_filter = !need_replication && (inner || right || (is_semi_join && left) || (is_anti_join && left));
/// Rare case, when keys are constant or low cardinality. To avoid code bloat, simply materialize them.
Columns materialized_keys = JoinCommon::materializeColumns(block, key_names_left);
ColumnRawPtrs key_columns = JoinCommon::getRawPointers(materialized_keys);
/// Keys with NULL value in any column won't join to anything.
ConstNullMapPtr null_map{};
2019-07-03 19:06:34 +00:00
ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
size_t existing_columns = block.columns();
2017-04-02 17:37:49 +00:00
/** If you use FULL or RIGHT JOIN, then the columns from the "left" table must be materialized.
* Because if they are constants, then in the "not joined" rows, they may have different values
* - default values, which can differ from the values of these constants.
*/
2019-12-02 18:07:27 +00:00
if constexpr (right || full)
{
2019-09-12 12:59:53 +00:00
materializeBlockInplace(block);
2019-09-11 18:03:21 +00:00
2019-09-12 14:09:05 +00:00
if (nullable_left_side)
2019-09-11 18:03:21 +00:00
JoinCommon::convertColumnsToNullable(block);
}
/** For LEFT/INNER JOIN, the saved blocks do not contain keys.
* For FULL/RIGHT JOIN, the saved blocks contain keys;
* but they will not be used at this stage of joining (and will be in `AdderNonJoined`), and they need to be skipped.
2019-03-31 23:09:00 +00:00
* For ASOF, the last column is used as the ASOF column
*/
2019-03-31 23:09:00 +00:00
ColumnsWithTypeAndName extras;
if constexpr (is_asof_join)
2019-09-11 15:57:09 +00:00
extras.push_back(right_table_keys.getByName(key_names_right.back()));
2019-12-19 15:50:28 +00:00
AddedColumns added_columns(sample_block_with_columns_to_add, block_with_columns_to_add, block, savedBlockSample(),
2019-11-11 11:42:10 +00:00
extras, *this, key_columns, key_sizes);
bool has_required_right_keys = (required_right_keys.columns() != 0);
added_columns.need_filter = need_filter || has_required_right_keys;
2019-03-20 12:08:38 +00:00
2019-12-19 15:50:28 +00:00
IColumn::Filter row_filter = switchJoinRightColumns<KIND, STRICTNESS>(maps_, added_columns, data->type, null_map);
2019-11-06 19:39:52 +00:00
for (size_t i = 0; i < added_columns.size(); ++i)
block.insert(added_columns.moveColumn(i));
std::vector<size_t> right_keys_to_replicate [[maybe_unused]];
if constexpr (need_filter)
{
/// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones.
for (size_t i = 0; i < existing_columns; ++i)
block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(row_filter, -1);
2019-10-29 19:39:42 +00:00
/// Add join key columns from right block if needed.
for (size_t i = 0; i < required_right_keys.columns(); ++i)
{
const auto & right_key = required_right_keys.getByPosition(i);
const auto & left_name = required_right_keys_sources[i];
const auto & col = block.getByName(left_name);
bool is_nullable = nullable_right_side || right_key.type->isNullable();
block.insert(correctNullability({col.column, col.type, right_key.name}, is_nullable));
}
}
else if (has_required_right_keys)
{
/// Some trash to represent IColumn::Filter as ColumnUInt8 needed for ColumnNullable::applyNullMap()
auto null_map_filter_ptr = ColumnUInt8::create();
ColumnUInt8 & null_map_filter = assert_cast<ColumnUInt8 &>(*null_map_filter_ptr);
null_map_filter.getData().swap(row_filter);
const IColumn::Filter & filter = null_map_filter.getData();
2019-10-29 19:39:42 +00:00
/// Add join key columns from right block if needed.
for (size_t i = 0; i < required_right_keys.columns(); ++i)
{
const auto & right_key = required_right_keys.getByPosition(i);
const auto & left_name = required_right_keys_sources[i];
const auto & col = block.getByName(left_name);
bool is_nullable = nullable_right_side || right_key.type->isNullable();
ColumnPtr thin_column = filterWithBlanks(col.column, filter);
block.insert(correctNullability({thin_column, col.type, right_key.name}, is_nullable, null_map_filter));
if constexpr (need_replication)
right_keys_to_replicate.push_back(block.getPositionByName(right_key.name));
}
}
if constexpr (need_replication)
{
2019-11-06 19:39:52 +00:00
std::unique_ptr<IColumn::Offsets> & offsets_to_replicate = added_columns.offsets_to_replicate;
/// If ALL ... JOIN - we replicate all the columns except the new ones.
for (size_t i = 0; i < existing_columns; ++i)
block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate);
/// Replicate additional right keys
for (size_t pos : right_keys_to_replicate)
block.safeGetByPosition(pos).column = block.safeGetByPosition(pos).column->replicate(*offsets_to_replicate);
}
}
2015-07-23 20:23:24 +00:00
void Join::joinBlockImplCross(Block & block) const
{
2017-04-02 17:37:49 +00:00
/// Add new columns to the block.
size_t num_existing_columns = block.columns();
size_t num_columns_to_add = sample_block_with_columns_to_add.columns();
size_t rows_left = block.rows();
ColumnRawPtrs src_left_columns(num_existing_columns);
MutableColumns dst_columns(num_existing_columns + num_columns_to_add);
for (size_t i = 0; i < num_existing_columns; ++i)
{
src_left_columns[i] = block.getByPosition(i).column.get();
dst_columns[i] = src_left_columns[i]->cloneEmpty();
}
for (size_t i = 0; i < num_columns_to_add; ++i)
{
const ColumnWithTypeAndName & src_column = sample_block_with_columns_to_add.getByPosition(i);
dst_columns[num_existing_columns + i] = src_column.column->cloneEmpty();
block.insert(src_column);
}
2017-04-02 17:37:49 +00:00
/// NOTE It would be better to use `reserve`, as well as `replicate` methods to duplicate the values of the left block.
for (size_t i = 0; i < rows_left; ++i)
{
2019-12-19 15:50:28 +00:00
for (const Block & block_right : data->blocks)
{
size_t rows_right = block_right.rows();
for (size_t col_num = 0; col_num < num_existing_columns; ++col_num)
for (size_t j = 0; j < rows_right; ++j)
dst_columns[col_num]->insertFrom(*src_left_columns[col_num], i);
for (size_t col_num = 0; col_num < num_columns_to_add; ++col_num)
{
const IColumn * column_right = block_right.getByPosition(col_num).column.get();
for (size_t j = 0; j < rows_right; ++j)
dst_columns[num_existing_columns + col_num]->insertFrom(*column_right, j);
}
}
}
block = block.cloneWithColumns(std::move(dst_columns));
2015-07-23 20:23:24 +00:00
}
static void checkTypeOfKey(const Block & block_left, const Block & block_right)
{
auto & [c1, left_type_origin, left_name] = block_left.safeGetByPosition(0);
auto & [c2, right_type_origin, right_name] = block_right.safeGetByPosition(0);
auto left_type = removeNullable(left_type_origin);
auto right_type = removeNullable(right_type_origin);
if (!left_type->equals(*right_type))
throw Exception("Type mismatch of columns to joinGet by: "
+ left_name + " " + left_type->getName() + " at left, "
+ right_name + " " + right_type->getName() + " at right",
ErrorCodes::TYPE_MISMATCH);
}
DataTypePtr Join::joinGetReturnType(const String & column_name) const
{
2019-12-19 15:50:28 +00:00
std::shared_lock lock(data->rwlock);
if (!sample_block_with_columns_to_add.has(column_name))
throw Exception("StorageJoin doesn't contain column " + column_name, ErrorCodes::LOGICAL_ERROR);
return sample_block_with_columns_to_add.getByName(column_name).type;
}
template <typename Maps>
void Join::joinGetImpl(Block & block, const String & column_name, const Maps & maps_) const
{
joinBlockImpl<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::RightAny>(
2019-09-10 18:39:10 +00:00
block, {block.getByPosition(0).name}, {sample_block_with_columns_to_add.getByName(column_name)}, maps_);
}
// TODO: support composite key
2019-01-24 17:12:05 +00:00
// TODO: return multiple columns as named tuple
// TODO: return array of values when strictness == ASTTableJoin::Strictness::All
void Join::joinGet(Block & block, const String & column_name) const
{
2019-12-19 15:50:28 +00:00
std::shared_lock lock(data->rwlock);
if (key_names_right.size() != 1)
throw Exception("joinGet only supports StorageJoin containing exactly one key", ErrorCodes::LOGICAL_ERROR);
2019-09-11 15:57:09 +00:00
checkTypeOfKey(block, right_table_keys);
if ((strictness == ASTTableJoin::Strictness::Any || strictness == ASTTableJoin::Strictness::RightAny) &&
kind == ASTTableJoin::Kind::Left)
{
2019-12-19 15:50:28 +00:00
joinGetImpl(block, column_name, std::get<MapsOne>(data->maps));
}
else
throw Exception("joinGet only supports StorageJoin of type Left Any", ErrorCodes::LOGICAL_ERROR);
}
void Join::joinBlock(Block & block, Block &)
{
2019-12-19 15:50:28 +00:00
std::shared_lock lock(data->rwlock);
const Names & key_names_left = table_join->keyNamesLeft();
2019-09-11 18:03:21 +00:00
JoinCommon::checkTypesOfKeys(block, key_names_left, right_table_keys, key_names_right);
2019-12-19 15:50:28 +00:00
if (joinDispatch(kind, strictness, data->maps, [&](auto kind_, auto strictness_, auto & map)
2019-01-14 21:40:02 +00:00
{
2019-09-10 18:39:10 +00:00
joinBlockImpl<kind_, strictness_>(block, key_names_left, sample_block_with_columns_to_add, map);
}))
2019-01-14 21:40:02 +00:00
{
/// Joined
}
else if (kind == ASTTableJoin::Kind::Cross)
joinBlockImplCross(block);
else
throw Exception("Logical error: unknown combination of JOIN", ErrorCodes::LOGICAL_ERROR);
}
void Join::joinTotals(Block & block) const
{
2019-09-19 14:53:03 +00:00
JoinCommon::joinTotals(totals, sample_block_with_columns_to_add, key_names_right, block);
}
template <typename Mapped>
2019-12-02 18:07:27 +00:00
struct AdderNonJoined
{
static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_right)
{
2019-12-02 18:07:27 +00:00
constexpr bool mapped_asof = std::is_same_v<Mapped, JoinStuff::MappedAsof>;
2019-12-06 15:35:23 +00:00
[[maybe_unused]] constexpr bool mapped_one = std::is_same_v<Mapped, JoinStuff::MappedOne> || std::is_same_v<Mapped, JoinStuff::MappedOneFlagged>;
2019-12-02 18:07:27 +00:00
if constexpr (mapped_asof)
{
2019-12-02 18:07:27 +00:00
/// Do nothing
}
2019-12-02 18:07:27 +00:00
else if constexpr (mapped_one)
{
for (size_t j = 0; j < columns_right.size(); ++j)
{
2019-12-02 18:07:27 +00:00
const auto & mapped_column = mapped.block->getByPosition(j).column;
columns_right[j]->insertFrom(*mapped_column, mapped.row_num);
}
++rows_added;
}
2019-12-02 18:07:27 +00:00
else
{
for (auto it = mapped.begin(); it.ok(); ++it)
{
for (size_t j = 0; j < columns_right.size(); ++j)
{
const auto & mapped_column = it->block->getByPosition(j).column;
columns_right[j]->insertFrom(*mapped_column, it->row_num);
}
2019-12-02 18:07:27 +00:00
++rows_added;
}
}
}
};
2019-12-02 18:07:27 +00:00
2017-04-02 17:37:49 +00:00
/// Stream from not joined earlier rows of the right table.
class NonJoinedBlockInputStream : public IBlockInputStream
{
public:
2019-11-05 20:22:20 +00:00
NonJoinedBlockInputStream(const Join & parent_, const Block & result_sample_block_, UInt64 max_block_size_)
2019-07-03 19:06:34 +00:00
: parent(parent_)
, max_block_size(max_block_size_)
2019-11-05 20:22:20 +00:00
, result_sample_block(materializeBlock(result_sample_block_))
{
bool remap_keys = parent.table_join->hasUsing();
std::unordered_map<size_t, size_t> left_to_right_key_remap;
for (size_t i = 0; i < parent.table_join->keyNamesLeft().size(); ++i)
{
const String & left_key_name = parent.table_join->keyNamesLeft()[i];
const String & right_key_name = parent.table_join->keyNamesRight()[i];
2019-11-05 20:22:20 +00:00
size_t left_key_pos = result_sample_block.getPositionByName(left_key_name);
2019-12-19 15:50:28 +00:00
size_t right_key_pos = parent.savedBlockSample().getPositionByName(right_key_name);
if (remap_keys && !parent.required_right_keys.has(right_key_name))
left_to_right_key_remap[left_key_pos] = right_key_pos;
}
2019-11-05 20:22:20 +00:00
/// result_sample_block: left_sample_block + left expressions, right not key columns, required right keys
size_t left_columns_count = result_sample_block.columns() -
parent.sample_block_with_columns_to_add.columns() - parent.required_right_keys.columns();
2019-11-05 20:22:20 +00:00
for (size_t left_pos = 0; left_pos < left_columns_count; ++left_pos)
{
/// We need right 'x' for 'RIGHT JOIN ... USING(x)'.
if (left_to_right_key_remap.count(left_pos))
{
size_t right_key_pos = left_to_right_key_remap[left_pos];
setRightIndex(right_key_pos, left_pos);
}
else
column_indices_left.emplace_back(left_pos);
}
2019-12-19 15:50:28 +00:00
const auto & saved_block_sample = parent.savedBlockSample();
for (size_t right_pos = 0; right_pos < saved_block_sample.columns(); ++right_pos)
{
2019-12-19 15:50:28 +00:00
const String & name = saved_block_sample.getByPosition(right_pos).name;
if (!result_sample_block.has(name))
continue;
size_t result_position = result_sample_block.getPositionByName(name);
/// Don't remap left keys twice. We need only qualified right keys here
2019-11-05 20:22:20 +00:00
if (result_position < left_columns_count)
continue;
setRightIndex(right_pos, result_position);
}
if (column_indices_left.size() + column_indices_right.size() + same_result_keys.size() != result_sample_block.columns())
throw Exception("Error in columns mapping in RIGHT|FULL JOIN. Left: " + toString(column_indices_left.size()) +
", right: " + toString(column_indices_right.size()) +
", same: " + toString(same_result_keys.size()) +
", result: " + toString(result_sample_block.columns()),
ErrorCodes::LOGICAL_ERROR);
}
String getName() const override { return "NonJoined"; }
Block getHeader() const override { return result_sample_block; }
protected:
Block readImpl() override
{
2019-12-19 15:50:28 +00:00
if (parent.data->blocks.empty())
return Block();
2019-07-03 19:06:34 +00:00
return createBlock();
}
private:
const Join & parent;
2019-02-10 16:55:12 +00:00
UInt64 max_block_size;
Block result_sample_block;
2019-11-05 20:22:20 +00:00
/// Indices of columns in result_sample_block that should be generated
std::vector<size_t> column_indices_left;
/// Indices of columns that come from the right-side table: right_pos -> result_pos
std::unordered_map<size_t, size_t> column_indices_right;
///
std::unordered_map<size_t, size_t> same_result_keys;
/// Which right columns (saved in parent) need nullability change before placing them in result block
std::vector<size_t> right_nullability_changes;
2019-07-04 12:12:39 +00:00
std::any position;
std::optional<Join::BlockNullmapList::const_iterator> nulls_position;
void setRightIndex(size_t right_pos, size_t result_position)
{
if (!column_indices_right.count(right_pos))
{
column_indices_right[right_pos] = result_position;
if (hasNullabilityChange(right_pos, result_position))
right_nullability_changes.push_back(right_pos);
}
else
same_result_keys[result_position] = column_indices_right[right_pos];
}
bool hasNullabilityChange(size_t right_pos, size_t result_pos) const
{
2019-12-19 15:50:28 +00:00
const auto & src = parent.savedBlockSample().getByPosition(right_pos).column;
const auto & dst = result_sample_block.getByPosition(result_pos).column;
return src->isNullable() != dst->isNullable();
}
Block createBlock()
{
2019-12-19 15:50:28 +00:00
MutableColumns columns_right = parent.savedBlockSample().cloneEmptyColumns();
size_t rows_added = 0;
2019-07-03 19:06:34 +00:00
auto fill_callback = [&](auto, auto strictness, auto & map)
{
rows_added = fillColumnsFromMap<strictness>(map, columns_right);
2019-07-03 19:06:34 +00:00
};
2019-12-19 15:50:28 +00:00
if (!joinDispatch(parent.kind, parent.strictness, parent.data->maps, fill_callback))
2019-07-03 19:06:34 +00:00
throw Exception("Logical error: unknown JOIN strictness (must be on of: ANY, ALL, ASOF)", ErrorCodes::LOGICAL_ERROR);
fillNullsFromBlocks(columns_right, rows_added);
if (!rows_added)
return {};
for (size_t pos : right_nullability_changes)
changeNullability(columns_right[pos]);
Block res = result_sample_block.cloneEmpty();
/// @note it's possible to make ColumnConst here and materialize it later
for (size_t pos : column_indices_left)
res.getByPosition(pos).column = res.getByPosition(pos).column->cloneResized(rows_added);
for (auto & pr : column_indices_right)
{
auto & right_column = columns_right[pr.first];
auto & result_column = res.getByPosition(pr.second).column;
#ifndef NDEBUG
if (result_column->getName() != right_column->getName())
throw Exception("Wrong columns assign in RIGHT|FULL JOIN: " + result_column->getName() +
" " + right_column->getName(), ErrorCodes::LOGICAL_ERROR);
#endif
result_column = std::move(right_column);
}
for (auto & pr : same_result_keys)
{
auto & src_column = res.getByPosition(pr.second).column;
auto & dst_column = res.getByPosition(pr.first).column;
if (src_column->isNullable() && !dst_column->isNullable())
{
auto * nullable = checkAndGetColumn<ColumnNullable>(*src_column);
dst_column = nullable->getNestedColumnPtr();
}
else if (!src_column->isNullable() && dst_column->isNullable())
dst_column = makeNullable(src_column);
else
dst_column = src_column;
}
return res;
}
2019-07-03 19:06:34 +00:00
template <ASTTableJoin::Strictness STRICTNESS, typename Maps>
size_t fillColumnsFromMap(const Maps & maps, MutableColumns & columns_keys_and_right)
{
2019-12-19 15:50:28 +00:00
switch (parent.data->type)
2019-07-03 19:06:34 +00:00
{
#define M(TYPE) \
case Join::Type::TYPE: \
return fillColumns<STRICTNESS>(*maps.TYPE, columns_keys_and_right);
APPLY_FOR_JOIN_VARIANTS(M)
#undef M
default:
2019-12-19 15:50:28 +00:00
throw Exception("Unsupported JOIN keys. Type: " + toString(static_cast<UInt32>(parent.data->type)),
2019-08-05 14:03:14 +00:00
ErrorCodes::UNSUPPORTED_JOIN_KEYS);
2019-07-03 19:06:34 +00:00
}
__builtin_unreachable();
}
template <ASTTableJoin::Strictness STRICTNESS, typename Map>
size_t fillColumns(const Map & map, MutableColumns & columns_keys_and_right)
{
2019-07-03 19:06:34 +00:00
using Mapped = typename Map::mapped_type;
using Iterator = typename Map::const_iterator;
size_t rows_added = 0;
2019-07-04 12:12:39 +00:00
if (!position.has_value())
position = std::make_any<Iterator>(map.begin());
2019-07-04 12:12:39 +00:00
Iterator & it = std::any_cast<Iterator &>(position);
auto end = map.end();
for (; it != end; ++it)
{
2019-10-29 15:16:51 +00:00
const Mapped & mapped = it->getMapped();
2019-12-02 18:07:27 +00:00
2019-07-03 19:06:34 +00:00
if (mapped.getUsed())
continue;
2019-12-02 18:07:27 +00:00
AdderNonJoined<Mapped>::add(mapped, rows_added, columns_keys_and_right);
if (rows_added >= max_block_size)
{
++it;
break;
}
}
return rows_added;
}
2019-07-03 19:06:34 +00:00
void fillNullsFromBlocks(MutableColumns & columns_keys_and_right, size_t & rows_added)
{
2019-07-04 12:12:39 +00:00
if (!nulls_position.has_value())
2019-12-19 15:50:28 +00:00
nulls_position = parent.data->blocks_nullmaps.begin();
2019-07-03 19:06:34 +00:00
2019-12-19 15:50:28 +00:00
auto end = parent.data->blocks_nullmaps.end();
2019-07-03 19:06:34 +00:00
2019-07-04 12:12:39 +00:00
for (auto & it = *nulls_position; it != end && rows_added < max_block_size; ++it)
2019-07-03 19:06:34 +00:00
{
2019-07-04 12:12:39 +00:00
const Block * block = it->first;
const NullMap & nullmap = assert_cast<const ColumnUInt8 &>(*it->second).getData();
2019-07-03 19:06:34 +00:00
for (size_t row = 0; row < nullmap.size(); ++row)
{
if (nullmap[row])
{
for (size_t col = 0; col < columns_keys_and_right.size(); ++col)
columns_keys_and_right[col]->insertFrom(*block->getByPosition(col).column, row);
++rows_added;
}
}
}
}
};
2019-11-05 20:22:20 +00:00
BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & result_sample_block, UInt64 max_block_size) const
{
2019-12-02 18:07:27 +00:00
if (table_join->strictness() == ASTTableJoin::Strictness::Asof ||
table_join->strictness() == ASTTableJoin::Strictness::Semi)
return {};
if (isRightOrFull(table_join->kind()))
2019-11-05 20:22:20 +00:00
return std::make_shared<NonJoinedBlockInputStream>(*this, result_sample_block, max_block_size);
return {};
}
2019-12-22 02:44:04 +00:00
bool Join::hasStreamWithNonJoinedRows() const
{
if (table_join->strictness() == ASTTableJoin::Strictness::Asof ||
table_join->strictness() == ASTTableJoin::Strictness::Semi)
return false;
return isRightOrFull(table_join->kind());
}
2014-06-12 02:31:30 +00:00
}