dbms: removing temporary columns early [#METR-18361].

This commit is contained in:
Alexey Milovidov 2015-10-08 06:41:11 +03:00
parent 9f5aa6c26e
commit a62fdf0e24
4 changed files with 95 additions and 37 deletions

View File

@ -386,23 +386,16 @@ public:
throw Exception("Second argument for function '" + getName() + "' must be Set; found " + column_set_ptr->getName(),
ErrorCodes::ILLEGAL_COLUMN);
/// Столбцы, которые проверяются на принадлежность множеству.
ColumnNumbers left_arguments;
Block block_of_key_columns;
/// Первый аргумент может быть tuple или одиночным столбцом.
const ColumnTuple * tuple = typeid_cast<const ColumnTuple *>(&*block.getByPosition(arguments[0]).column);
if (tuple)
{
/// Находим в блоке столбцы из tuple.
const Block & tuple_elems = tuple->getData();
size_t tuple_size = tuple_elems.columns();
for (size_t i = 0; i < tuple_size; ++i)
left_arguments.push_back(block.getPositionByName(tuple_elems.getByPosition(i).name));
}
block_of_key_columns = tuple->getData();
else
left_arguments.push_back(arguments[0]);
block_of_key_columns.insert(block.getByPosition(arguments[0]));
column_set->getData()->execute(block, left_arguments, result, negative);
block.getByPosition(result).column = column_set->getData()->execute(block_of_key_columns, negative);
}
};

View File

@ -288,10 +288,10 @@ public:
// Возвращает false, если превышено какое-нибудь ограничение, и больше не нужно вставлять.
bool insertFromBlock(const Block & block, bool create_ordered_set = false);
/** Для указанных столбцов блока проверить принадлежность их значений множеству.
/** Для столбцов блока проверить принадлежность их значений множеству.
* Записать результат в столбец в позиции result.
*/
void execute(Block & block, const ColumnNumbers & arguments, size_t result, bool negative) const;
ColumnPtr execute(const Block & block, bool negative) const;
std::string describe() const
{

View File

@ -375,7 +375,9 @@ std::string ExpressionAction::toString() const
switch (type)
{
case ADD_COLUMN:
ss << "ADD " << result_name << " " << result_type->getName() << " " << added_column->getName();
ss << "ADD " << result_name << " "
<< (result_type ? result_type->getName() : "(no type)") << " "
<< (added_column ? added_column->getName() : "(no column)");
break;
case REMOVE_COLUMN:
@ -387,7 +389,9 @@ std::string ExpressionAction::toString() const
break;
case APPLY_FUNCTION:
ss << "FUNCTION " << result_name << " " << result_type->getName() << " = " << function->getName() << "(";
ss << "FUNCTION " << result_name << " "
<< (result_type ? result_type->getName() : "(no type)") << " = "
<< (function ? function->getName() : "(no function)") << "(";
for (size_t i = 0; i < argument_names.size(); ++i)
{
if (i)
@ -628,8 +632,6 @@ std::string ExpressionActions::getSmallestColumn(const NamesAndTypesList & colum
void ExpressionActions::finalize(const Names & output_columns)
{
// std::cerr << "finalize\n";
NameSet final_columns;
for (size_t i = 0; i < output_columns.size(); ++i)
{
@ -756,13 +758,67 @@ void ExpressionActions::finalize(const Names & output_columns)
}
}
for (int i = static_cast<int>(sample_block.columns()) - 1; i >= 0; --i)
/* std::cerr << "\n";
for (const auto & action : actions)
std::cerr << action.toString() << "\n";
std::cerr << "\n";*/
/// Удаление ненужных временных столбцов.
/// Если у столбца после выполнения функции refcount = 0, то его можно удалить.
std::map<String, int> columns_refcount;
for (const auto & name : final_columns)
++columns_refcount[name];
for (const auto & action : actions)
{
const std::string & name = sample_block.getByPosition(i).name;
if (!final_columns.count(name))
add(ExpressionAction::removeColumn(name));
if (!action.source_name.empty())
++columns_refcount[action.source_name];
for (const auto & name : action.argument_names)
++columns_refcount[name];
for (const auto & name : action.prerequisite_names)
++columns_refcount[name];
}
Actions new_actions;
new_actions.reserve(actions.size());
for (const auto & action : actions)
{
new_actions.push_back(action);
auto process = [&] (const String & name)
{
auto refcount = --columns_refcount[name];
if (refcount <= 0)
{
new_actions.push_back(ExpressionAction::removeColumn(name));
if (sample_block.has(name))
sample_block.erase(name);
std::cerr << "Removing " << name << "\n";
}
};
if (!action.source_name.empty())
process(action.source_name);
for (const auto & name : action.argument_names)
process(name);
for (const auto & name : action.prerequisite_names)
process(name);
}
actions.swap(new_actions);
/* std::cerr << "\n";
for (const auto & action : actions)
std::cerr << action.toString() << "\n";
std::cerr << "\n";*/
optimize();
checkLimits(sample_block);
}

View File

@ -478,12 +478,17 @@ void Set::createFromAST(DataTypes & types, ASTPtr node, const Context & context,
}
void Set::execute(Block & block, const ColumnNumbers & arguments, size_t result, bool negative) const
ColumnPtr Set::execute(const Block & block, bool negative) const
{
ColumnUInt8 * c_res = new ColumnUInt8;
block.getByPosition(result).column = c_res;
ColumnUInt8::Container_t & vec_res = c_res->getData();
vec_res.resize(block.getByPosition(arguments[0]).column->size());
size_t num_key_columns = block.columns();
if (0 == num_key_columns)
throw Exception("Logical error: no columns passed to Set::execute method.", ErrorCodes::LOGICAL_ERROR);
ColumnUInt8 * p_res = new ColumnUInt8;
ColumnPtr res = p_res;
ColumnUInt8::Container_t & vec_res = p_res->getData();
vec_res.resize(block.getByPosition(0).column->size());
Poco::ScopedReadRWLock lock(rwlock);
@ -494,19 +499,19 @@ void Set::execute(Block & block, const ColumnNumbers & arguments, size_t result,
memset(&vec_res[0], 1, vec_res.size());
else
memset(&vec_res[0], 0, vec_res.size());
return;
return res;
}
DataTypeArray * array_type = typeid_cast<DataTypeArray *>(&*block.getByPosition(arguments[0]).type);
const DataTypeArray * array_type = typeid_cast<const DataTypeArray *>(&*block.getByPosition(0).type);
if (array_type)
{
if (data_types.size() != 1 || arguments.size() != 1)
if (data_types.size() != 1 || num_key_columns != 1)
throw Exception("Number of columns in section IN doesn't match.", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH);
if (array_type->getNestedType()->getName() != data_types[0]->getName())
throw Exception(std::string() + "Types in section IN don't match: " + data_types[0]->getName() + " on the right, " + array_type->getNestedType()->getName() + " on the left.", ErrorCodes::TYPE_MISMATCH);
IColumn * in_column = &*block.getByPosition(arguments[0]).column;
const IColumn * in_column = &*block.getByPosition(0).column;
/// Константный столбец слева от IN поддерживается не напрямую. Для этого, он сначала материализуется.
ColumnPtr materialized_column;
@ -516,24 +521,26 @@ void Set::execute(Block & block, const ColumnNumbers & arguments, size_t result,
in_column = materialized_column.get();
}
if (ColumnArray * col = typeid_cast<ColumnArray *>(in_column))
if (const ColumnArray * col = typeid_cast<const ColumnArray *>(in_column))
executeArray(col, vec_res, negative);
else
throw Exception("Unexpected array column type: " + in_column->getName(), ErrorCodes::ILLEGAL_COLUMN);
}
else
{
if (data_types.size() != arguments.size())
if (data_types.size() != num_key_columns)
throw Exception("Number of columns in section IN doesn't match.", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH);
/// Запоминаем столбцы, с которыми будем работать. Также проверим, что типы данных правильные.
ConstColumnPlainPtrs key_columns(arguments.size());
for (size_t i = 0; i < arguments.size(); ++i)
ConstColumnPlainPtrs key_columns(num_key_columns);
for (size_t i = 0; i < num_key_columns; ++i)
{
key_columns[i] = block.getByPosition(arguments[i]).column;
key_columns[i] = block.getByPosition(i).column;
if (data_types[i]->getName() != block.getByPosition(arguments[i]).type->getName())
throw Exception("Types of column " + toString(i + 1) + " in section IN don't match: " + data_types[i]->getName() + " on the right, " + block.getByPosition(arguments[i]).type->getName() + " on the left.", ErrorCodes::TYPE_MISMATCH);
if (data_types[i]->getName() != block.getByPosition(i).type->getName())
throw Exception("Types of column " + toString(i + 1) + " in section IN don't match: "
+ data_types[i]->getName() + " on the right, " + block.getByPosition(i).type->getName() + " on the left.",
ErrorCodes::TYPE_MISMATCH);
}
/// Константные столбцы слева от IN поддерживается не напрямую. Для этого, они сначала материализуется.
@ -549,6 +556,8 @@ void Set::execute(Block & block, const ColumnNumbers & arguments, size_t result,
executeOrdinary(key_columns, vec_res, negative);
}
return res;
}