ClickHouse/src/Interpreters/join_common.cpp

189 lines
5.6 KiB
C++
Raw Normal View History

#include <Interpreters/join_common.h>
2019-09-10 18:39:10 +00:00
#include <Columns/ColumnNullable.h>
2019-09-11 18:03:21 +00:00
#include <DataTypes/DataTypeNullable.h>
2019-09-10 18:39:10 +00:00
#include <DataTypes/DataTypeLowCardinality.h>
2019-09-11 18:03:21 +00:00
#include <DataStreams/materializeBlock.h>
2019-09-10 18:39:10 +00:00
namespace DB
{
2019-09-11 18:03:21 +00:00
namespace ErrorCodes
{
extern const int TYPE_MISMATCH;
}
namespace JoinCommon
{
void convertColumnToNullable(ColumnWithTypeAndName & column)
{
if (column.type->isNullable() || !column.type->canBeInsideNullable())
return;
column.type = makeNullable(column.type);
if (column.column)
column.column = makeNullable(column.column);
}
void convertColumnsToNullable(Block & block, size_t starting_pos)
{
for (size_t i = starting_pos; i < block.columns(); ++i)
convertColumnToNullable(block.getByPosition(i));
}
2019-10-29 19:39:42 +00:00
/// @warning It assumes that every NULL has default value in nested column (or it does not matter)
void removeColumnNullability(ColumnWithTypeAndName & column)
{
if (!column.type->isNullable())
return;
column.type = static_cast<const DataTypeNullable &>(*column.type).getNestedType();
if (column.column)
{
2020-04-22 06:01:33 +00:00
const auto * nullable_column = checkAndGetColumn<ColumnNullable>(*column.column);
2019-10-29 19:39:42 +00:00
ColumnPtr nested_column = nullable_column->getNestedColumnPtr();
2020-05-14 08:30:18 +00:00
MutableColumnPtr mutable_column = IColumn::mutate(std::move(nested_column));
2019-10-29 19:39:42 +00:00
column.column = std::move(mutable_column);
}
}
ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names)
{
ColumnRawPtrs ptrs;
ptrs.reserve(names.size());
2020-04-22 06:01:33 +00:00
for (const auto & column_name : names)
{
auto & column = block.getByName(column_name).column;
column = recursiveRemoveLowCardinality(column->convertToFullColumnIfConst());
ptrs.push_back(column.get());
}
return ptrs;
}
Columns materializeColumns(const Block & block, const Names & names)
{
Columns materialized;
materialized.reserve(names.size());
2020-04-22 06:01:33 +00:00
for (const auto & column_name : names)
{
const auto & src_column = block.getByName(column_name).column;
materialized.emplace_back(recursiveRemoveLowCardinality(src_column->convertToFullColumnIfConst()));
}
return materialized;
}
ColumnRawPtrs getRawPointers(const Columns & columns)
{
ColumnRawPtrs ptrs;
ptrs.reserve(columns.size());
2020-04-22 06:01:33 +00:00
for (const auto & column : columns)
ptrs.push_back(column.get());
return ptrs;
}
void removeLowCardinalityInplace(Block & block)
{
for (size_t i = 0; i < block.columns(); ++i)
{
auto & col = block.getByPosition(i);
col.column = recursiveRemoveLowCardinality(col.column);
col.type = recursiveRemoveLowCardinality(col.type);
}
}
void splitAdditionalColumns(const Block & sample_block, const Names & key_names, Block & block_keys, Block & block_others)
2019-09-10 18:39:10 +00:00
{
block_others = materializeBlock(sample_block);
2019-09-10 18:39:10 +00:00
for (const String & column_name : key_names)
2019-09-10 18:39:10 +00:00
{
/// Extract right keys with correct keys order. There could be the same key names.
if (!block_keys.has(column_name))
2019-09-10 18:39:10 +00:00
{
auto & col = block_others.getByName(column_name);
block_keys.insert(col);
block_others.erase(column_name);
2019-09-10 18:39:10 +00:00
}
}
}
2019-09-10 18:39:10 +00:00
ColumnRawPtrs extractKeysForJoin(const Block & block_keys, const Names & key_names)
{
size_t keys_size = key_names.size();
ColumnRawPtrs key_columns(keys_size);
2019-09-10 18:39:10 +00:00
for (size_t i = 0; i < keys_size; ++i)
{
const String & column_name = key_names[i];
key_columns[i] = block_keys.getByName(column_name).column.get();
2019-09-10 18:39:10 +00:00
/// We will join only keys, where all components are not NULL.
2020-04-22 06:01:33 +00:00
if (const auto * nullable = checkAndGetColumn<ColumnNullable>(*key_columns[i]))
2019-09-10 18:39:10 +00:00
key_columns[i] = &nullable->getNestedColumn();
}
return key_columns;
}
2019-09-11 18:03:21 +00:00
void checkTypesOfKeys(const Block & block_left, const Names & key_names_left, const Block & block_right, const Names & key_names_right)
{
size_t keys_size = key_names_left.size();
for (size_t i = 0; i < keys_size; ++i)
{
DataTypePtr left_type = removeNullable(recursiveRemoveLowCardinality(block_left.getByName(key_names_left[i]).type));
DataTypePtr right_type = removeNullable(recursiveRemoveLowCardinality(block_right.getByName(key_names_right[i]).type));
if (!left_type->equals(*right_type))
throw Exception("Type mismatch of columns to JOIN by: "
+ key_names_left[i] + " " + left_type->getName() + " at left, "
+ key_names_right[i] + " " + right_type->getName() + " at right",
ErrorCodes::TYPE_MISMATCH);
}
}
2019-09-10 18:39:10 +00:00
void createMissedColumns(Block & block)
{
for (size_t i = 0; i < block.columns(); ++i)
{
auto & column = block.getByPosition(i);
if (!column.column)
column.column = column.type->createColumn();
}
}
2019-09-19 14:53:03 +00:00
void joinTotals(const Block & totals, const Block & columns_to_add, const Names & key_names_right, Block & block)
{
if (Block totals_without_keys = totals)
{
for (const auto & name : key_names_right)
totals_without_keys.erase(totals_without_keys.getPositionByName(name));
for (size_t i = 0; i < totals_without_keys.columns(); ++i)
block.insert(totals_without_keys.safeGetByPosition(i));
}
else
{
/// We will join empty `totals` - from one row with the default values.
for (size_t i = 0; i < columns_to_add.columns(); ++i)
{
const auto & col = columns_to_add.getByPosition(i);
block.insert({
col.type->createColumnConstWithDefaultValue(1)->convertToFullColumnIfConst(),
col.type,
col.name});
}
}
}
2019-09-10 18:39:10 +00:00
}
2019-09-11 18:03:21 +00:00
}