mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 00:22:29 +00:00
dbms: removing temporary columns early [#METR-18361].
This commit is contained in:
parent
9f5aa6c26e
commit
a62fdf0e24
@ -386,23 +386,16 @@ public:
|
||||
throw Exception("Second argument for function '" + getName() + "' must be Set; found " + column_set_ptr->getName(),
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
/// Столбцы, которые проверяются на принадлежность множеству.
|
||||
ColumnNumbers left_arguments;
|
||||
Block block_of_key_columns;
|
||||
|
||||
/// Первый аргумент может быть tuple или одиночным столбцом.
|
||||
const ColumnTuple * tuple = typeid_cast<const ColumnTuple *>(&*block.getByPosition(arguments[0]).column);
|
||||
if (tuple)
|
||||
{
|
||||
/// Находим в блоке столбцы из tuple.
|
||||
const Block & tuple_elems = tuple->getData();
|
||||
size_t tuple_size = tuple_elems.columns();
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
left_arguments.push_back(block.getPositionByName(tuple_elems.getByPosition(i).name));
|
||||
}
|
||||
block_of_key_columns = tuple->getData();
|
||||
else
|
||||
left_arguments.push_back(arguments[0]);
|
||||
block_of_key_columns.insert(block.getByPosition(arguments[0]));
|
||||
|
||||
column_set->getData()->execute(block, left_arguments, result, negative);
|
||||
block.getByPosition(result).column = column_set->getData()->execute(block_of_key_columns, negative);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -288,10 +288,10 @@ public:
|
||||
// Возвращает false, если превышено какое-нибудь ограничение, и больше не нужно вставлять.
|
||||
bool insertFromBlock(const Block & block, bool create_ordered_set = false);
|
||||
|
||||
/** Для указанных столбцов блока проверить принадлежность их значений множеству.
|
||||
/** Для столбцов блока проверить принадлежность их значений множеству.
|
||||
* Записать результат в столбец в позиции result.
|
||||
*/
|
||||
void execute(Block & block, const ColumnNumbers & arguments, size_t result, bool negative) const;
|
||||
ColumnPtr execute(const Block & block, bool negative) const;
|
||||
|
||||
std::string describe() const
|
||||
{
|
||||
|
@ -375,7 +375,9 @@ std::string ExpressionAction::toString() const
|
||||
switch (type)
|
||||
{
|
||||
case ADD_COLUMN:
|
||||
ss << "ADD " << result_name << " " << result_type->getName() << " " << added_column->getName();
|
||||
ss << "ADD " << result_name << " "
|
||||
<< (result_type ? result_type->getName() : "(no type)") << " "
|
||||
<< (added_column ? added_column->getName() : "(no column)");
|
||||
break;
|
||||
|
||||
case REMOVE_COLUMN:
|
||||
@ -387,7 +389,9 @@ std::string ExpressionAction::toString() const
|
||||
break;
|
||||
|
||||
case APPLY_FUNCTION:
|
||||
ss << "FUNCTION " << result_name << " " << result_type->getName() << " = " << function->getName() << "(";
|
||||
ss << "FUNCTION " << result_name << " "
|
||||
<< (result_type ? result_type->getName() : "(no type)") << " = "
|
||||
<< (function ? function->getName() : "(no function)") << "(";
|
||||
for (size_t i = 0; i < argument_names.size(); ++i)
|
||||
{
|
||||
if (i)
|
||||
@ -628,8 +632,6 @@ std::string ExpressionActions::getSmallestColumn(const NamesAndTypesList & colum
|
||||
|
||||
void ExpressionActions::finalize(const Names & output_columns)
|
||||
{
|
||||
// std::cerr << "finalize\n";
|
||||
|
||||
NameSet final_columns;
|
||||
for (size_t i = 0; i < output_columns.size(); ++i)
|
||||
{
|
||||
@ -756,13 +758,67 @@ void ExpressionActions::finalize(const Names & output_columns)
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = static_cast<int>(sample_block.columns()) - 1; i >= 0; --i)
|
||||
/* std::cerr << "\n";
|
||||
for (const auto & action : actions)
|
||||
std::cerr << action.toString() << "\n";
|
||||
std::cerr << "\n";*/
|
||||
|
||||
/// Удаление ненужных временных столбцов.
|
||||
|
||||
/// Если у столбца после выполнения функции refcount = 0, то его можно удалить.
|
||||
std::map<String, int> columns_refcount;
|
||||
|
||||
for (const auto & name : final_columns)
|
||||
++columns_refcount[name];
|
||||
|
||||
for (const auto & action : actions)
|
||||
{
|
||||
const std::string & name = sample_block.getByPosition(i).name;
|
||||
if (!final_columns.count(name))
|
||||
add(ExpressionAction::removeColumn(name));
|
||||
if (!action.source_name.empty())
|
||||
++columns_refcount[action.source_name];
|
||||
|
||||
for (const auto & name : action.argument_names)
|
||||
++columns_refcount[name];
|
||||
|
||||
for (const auto & name : action.prerequisite_names)
|
||||
++columns_refcount[name];
|
||||
}
|
||||
|
||||
Actions new_actions;
|
||||
new_actions.reserve(actions.size());
|
||||
|
||||
for (const auto & action : actions)
|
||||
{
|
||||
new_actions.push_back(action);
|
||||
|
||||
auto process = [&] (const String & name)
|
||||
{
|
||||
auto refcount = --columns_refcount[name];
|
||||
if (refcount <= 0)
|
||||
{
|
||||
new_actions.push_back(ExpressionAction::removeColumn(name));
|
||||
if (sample_block.has(name))
|
||||
sample_block.erase(name);
|
||||
std::cerr << "Removing " << name << "\n";
|
||||
}
|
||||
};
|
||||
|
||||
if (!action.source_name.empty())
|
||||
process(action.source_name);
|
||||
|
||||
for (const auto & name : action.argument_names)
|
||||
process(name);
|
||||
|
||||
for (const auto & name : action.prerequisite_names)
|
||||
process(name);
|
||||
}
|
||||
|
||||
actions.swap(new_actions);
|
||||
|
||||
/* std::cerr << "\n";
|
||||
for (const auto & action : actions)
|
||||
std::cerr << action.toString() << "\n";
|
||||
std::cerr << "\n";*/
|
||||
|
||||
optimize();
|
||||
checkLimits(sample_block);
|
||||
}
|
||||
|
@ -478,12 +478,17 @@ void Set::createFromAST(DataTypes & types, ASTPtr node, const Context & context,
|
||||
}
|
||||
|
||||
|
||||
void Set::execute(Block & block, const ColumnNumbers & arguments, size_t result, bool negative) const
|
||||
ColumnPtr Set::execute(const Block & block, bool negative) const
|
||||
{
|
||||
ColumnUInt8 * c_res = new ColumnUInt8;
|
||||
block.getByPosition(result).column = c_res;
|
||||
ColumnUInt8::Container_t & vec_res = c_res->getData();
|
||||
vec_res.resize(block.getByPosition(arguments[0]).column->size());
|
||||
size_t num_key_columns = block.columns();
|
||||
|
||||
if (0 == num_key_columns)
|
||||
throw Exception("Logical error: no columns passed to Set::execute method.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
ColumnUInt8 * p_res = new ColumnUInt8;
|
||||
ColumnPtr res = p_res;
|
||||
ColumnUInt8::Container_t & vec_res = p_res->getData();
|
||||
vec_res.resize(block.getByPosition(0).column->size());
|
||||
|
||||
Poco::ScopedReadRWLock lock(rwlock);
|
||||
|
||||
@ -494,19 +499,19 @@ void Set::execute(Block & block, const ColumnNumbers & arguments, size_t result,
|
||||
memset(&vec_res[0], 1, vec_res.size());
|
||||
else
|
||||
memset(&vec_res[0], 0, vec_res.size());
|
||||
return;
|
||||
return res;
|
||||
}
|
||||
|
||||
DataTypeArray * array_type = typeid_cast<DataTypeArray *>(&*block.getByPosition(arguments[0]).type);
|
||||
const DataTypeArray * array_type = typeid_cast<const DataTypeArray *>(&*block.getByPosition(0).type);
|
||||
|
||||
if (array_type)
|
||||
{
|
||||
if (data_types.size() != 1 || arguments.size() != 1)
|
||||
if (data_types.size() != 1 || num_key_columns != 1)
|
||||
throw Exception("Number of columns in section IN doesn't match.", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH);
|
||||
if (array_type->getNestedType()->getName() != data_types[0]->getName())
|
||||
throw Exception(std::string() + "Types in section IN don't match: " + data_types[0]->getName() + " on the right, " + array_type->getNestedType()->getName() + " on the left.", ErrorCodes::TYPE_MISMATCH);
|
||||
|
||||
IColumn * in_column = &*block.getByPosition(arguments[0]).column;
|
||||
const IColumn * in_column = &*block.getByPosition(0).column;
|
||||
|
||||
/// Константный столбец слева от IN поддерживается не напрямую. Для этого, он сначала материализуется.
|
||||
ColumnPtr materialized_column;
|
||||
@ -516,24 +521,26 @@ void Set::execute(Block & block, const ColumnNumbers & arguments, size_t result,
|
||||
in_column = materialized_column.get();
|
||||
}
|
||||
|
||||
if (ColumnArray * col = typeid_cast<ColumnArray *>(in_column))
|
||||
if (const ColumnArray * col = typeid_cast<const ColumnArray *>(in_column))
|
||||
executeArray(col, vec_res, negative);
|
||||
else
|
||||
throw Exception("Unexpected array column type: " + in_column->getName(), ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (data_types.size() != arguments.size())
|
||||
if (data_types.size() != num_key_columns)
|
||||
throw Exception("Number of columns in section IN doesn't match.", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH);
|
||||
|
||||
/// Запоминаем столбцы, с которыми будем работать. Также проверим, что типы данных правильные.
|
||||
ConstColumnPlainPtrs key_columns(arguments.size());
|
||||
for (size_t i = 0; i < arguments.size(); ++i)
|
||||
ConstColumnPlainPtrs key_columns(num_key_columns);
|
||||
for (size_t i = 0; i < num_key_columns; ++i)
|
||||
{
|
||||
key_columns[i] = block.getByPosition(arguments[i]).column;
|
||||
key_columns[i] = block.getByPosition(i).column;
|
||||
|
||||
if (data_types[i]->getName() != block.getByPosition(arguments[i]).type->getName())
|
||||
throw Exception("Types of column " + toString(i + 1) + " in section IN don't match: " + data_types[i]->getName() + " on the right, " + block.getByPosition(arguments[i]).type->getName() + " on the left.", ErrorCodes::TYPE_MISMATCH);
|
||||
if (data_types[i]->getName() != block.getByPosition(i).type->getName())
|
||||
throw Exception("Types of column " + toString(i + 1) + " in section IN don't match: "
|
||||
+ data_types[i]->getName() + " on the right, " + block.getByPosition(i).type->getName() + " on the left.",
|
||||
ErrorCodes::TYPE_MISMATCH);
|
||||
}
|
||||
|
||||
/// Константные столбцы слева от IN поддерживается не напрямую. Для этого, они сначала материализуется.
|
||||
@ -549,6 +556,8 @@ void Set::execute(Block & block, const ColumnNumbers & arguments, size_t result,
|
||||
|
||||
executeOrdinary(key_columns, vec_res, negative);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user