2014-01-04 04:53:07 +00:00
|
|
|
|
#include <DB/Common/ProfileEvents.h>
|
2013-05-24 10:49:19 +00:00
|
|
|
|
#include <DB/Interpreters/ExpressionActions.h>
|
2014-06-13 02:05:05 +00:00
|
|
|
|
#include <DB/Interpreters/Join.h>
|
2013-05-29 11:46:51 +00:00
|
|
|
|
#include <DB/Columns/ColumnsNumber.h>
|
2013-10-26 19:00:13 +00:00
|
|
|
|
#include <DB/Columns/ColumnArray.h>
|
2013-07-22 16:49:19 +00:00
|
|
|
|
#include <DB/DataTypes/DataTypeNested.h>
|
2013-10-26 19:00:13 +00:00
|
|
|
|
#include <DB/DataTypes/DataTypeArray.h>
|
|
|
|
|
#include <DB/Functions/IFunction.h>
|
2013-05-24 10:49:19 +00:00
|
|
|
|
#include <set>
|
|
|
|
|
|
2014-01-04 04:53:07 +00:00
|
|
|
|
|
2013-05-24 10:49:19 +00:00
|
|
|
|
namespace DB
|
|
|
|
|
{
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
|
|
|
|
Names ExpressionAction::getNeededColumns() const
|
2013-06-10 16:03:23 +00:00
|
|
|
|
{
|
|
|
|
|
Names res = argument_names;
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-10 16:03:23 +00:00
|
|
|
|
res.insert(res.end(), prerequisite_names.begin(), prerequisite_names.end());
|
2013-08-01 15:28:10 +00:00
|
|
|
|
res.insert(res.end(), array_joined_columns.begin(), array_joined_columns.end());
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
|
|
|
|
for (const auto & column : projection)
|
|
|
|
|
res.push_back(column.first);
|
|
|
|
|
|
2013-06-10 16:03:23 +00:00
|
|
|
|
if (!source_name.empty())
|
|
|
|
|
res.push_back(source_name);
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-10 16:03:23 +00:00
|
|
|
|
return res;
|
|
|
|
|
}
|
2013-08-01 13:29:32 +00:00
|
|
|
|
|
2014-06-12 18:41:09 +00:00
|
|
|
|
ExpressionAction ExpressionAction::applyFunction(FunctionPtr function_,
|
2014-06-13 02:05:05 +00:00
|
|
|
|
const std::vector<std::string> & argument_names_,
|
|
|
|
|
std::string result_name_)
|
2013-06-04 14:59:05 +00:00
|
|
|
|
{
|
|
|
|
|
if (result_name_ == "")
|
|
|
|
|
{
|
|
|
|
|
result_name_ = function_->getName() + "(";
|
|
|
|
|
for (size_t i = 0 ; i < argument_names_.size(); ++i)
|
|
|
|
|
{
|
|
|
|
|
if (i)
|
|
|
|
|
result_name_ += ", ";
|
|
|
|
|
result_name_ += argument_names_[i];
|
|
|
|
|
}
|
|
|
|
|
result_name_ += ")";
|
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
|
|
|
|
ExpressionAction a;
|
2013-06-04 14:59:05 +00:00
|
|
|
|
a.type = APPLY_FUNCTION;
|
|
|
|
|
a.result_name = result_name_;
|
|
|
|
|
a.function = function_;
|
|
|
|
|
a.argument_names = argument_names_;
|
|
|
|
|
return a;
|
|
|
|
|
}
|
2013-05-24 10:49:19 +00:00
|
|
|
|
|
2014-06-12 18:41:09 +00:00
|
|
|
|
ExpressionActions::Actions ExpressionAction::getPrerequisites(Block & sample_block)
|
2013-05-24 10:49:19 +00:00
|
|
|
|
{
|
2014-06-12 18:41:09 +00:00
|
|
|
|
ExpressionActions::Actions res;
|
|
|
|
|
|
2013-05-24 10:49:19 +00:00
|
|
|
|
if (type == APPLY_FUNCTION)
|
|
|
|
|
{
|
2013-05-28 11:54:37 +00:00
|
|
|
|
if (sample_block.has(result_name))
|
|
|
|
|
throw Exception("Column '" + result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN);
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-04 13:34:46 +00:00
|
|
|
|
ColumnsWithNameAndType arguments(argument_names.size());
|
2013-05-24 10:49:19 +00:00
|
|
|
|
for (size_t i = 0; i < argument_names.size(); ++i)
|
|
|
|
|
{
|
|
|
|
|
if (!sample_block.has(argument_names[i]))
|
|
|
|
|
throw Exception("Unknown identifier: '" + argument_names[i] + "'", ErrorCodes::UNKNOWN_IDENTIFIER);
|
2013-06-04 13:34:46 +00:00
|
|
|
|
arguments[i] = sample_block.getByName(argument_names[i]);
|
2013-05-24 10:49:19 +00:00
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-04 13:34:46 +00:00
|
|
|
|
function->getReturnTypeAndPrerequisites(arguments, result_type, res);
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-04 13:34:46 +00:00
|
|
|
|
for (size_t i = 0; i < res.size(); ++i)
|
|
|
|
|
{
|
|
|
|
|
if (res[i].result_name != "")
|
|
|
|
|
prerequisite_names.push_back(res[i].result_name);
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-04 13:34:46 +00:00
|
|
|
|
return res;
|
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
|
|
|
|
void ExpressionAction::prepare(Block & sample_block)
|
2013-06-04 13:34:46 +00:00
|
|
|
|
{
|
2014-06-16 20:01:42 +00:00
|
|
|
|
// std::cerr << "preparing: " << toString() << std::endl;
|
2014-06-13 02:05:05 +00:00
|
|
|
|
|
2015-01-30 21:19:35 +00:00
|
|
|
|
/** Константные выражения следует вычислить, и положить результат в sample_block.
|
|
|
|
|
* Для неконстантных столбцов, следует в качестве column в sample_block положить nullptr.
|
|
|
|
|
*
|
|
|
|
|
* Тот факт, что только для константных выражений column != nullptr,
|
|
|
|
|
* может использоваться в дальнейшем при оптимизации запроса.
|
|
|
|
|
*/
|
|
|
|
|
|
2014-08-22 20:14:53 +00:00
|
|
|
|
switch (type)
|
2013-06-04 13:34:46 +00:00
|
|
|
|
{
|
2014-08-22 20:14:53 +00:00
|
|
|
|
case APPLY_FUNCTION:
|
|
|
|
|
{
|
|
|
|
|
if (sample_block.has(result_name))
|
|
|
|
|
throw Exception("Column '" + result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN);
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2014-08-22 20:14:53 +00:00
|
|
|
|
bool all_const = true;
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2014-08-22 20:14:53 +00:00
|
|
|
|
ColumnNumbers arguments(argument_names.size());
|
|
|
|
|
for (size_t i = 0; i < argument_names.size(); ++i)
|
|
|
|
|
{
|
|
|
|
|
arguments[i] = sample_block.getPositionByName(argument_names[i]);
|
|
|
|
|
ColumnPtr col = sample_block.getByPosition(arguments[i]).column;
|
|
|
|
|
if (!col || !col->isConst())
|
|
|
|
|
all_const = false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ColumnNumbers prerequisites(prerequisite_names.size());
|
|
|
|
|
for (size_t i = 0; i < prerequisite_names.size(); ++i)
|
|
|
|
|
{
|
|
|
|
|
prerequisites[i] = sample_block.getPositionByName(prerequisite_names[i]);
|
|
|
|
|
ColumnPtr col = sample_block.getByPosition(prerequisites[i]).column;
|
|
|
|
|
if (!col || !col->isConst())
|
|
|
|
|
all_const = false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ColumnPtr new_column;
|
|
|
|
|
|
|
|
|
|
/// Если все аргументы и требуемые столбцы - константы, выполним функцию.
|
|
|
|
|
if (all_const)
|
|
|
|
|
{
|
|
|
|
|
ColumnWithNameAndType new_column;
|
|
|
|
|
new_column.name = result_name;
|
|
|
|
|
new_column.type = result_type;
|
|
|
|
|
sample_block.insert(new_column);
|
|
|
|
|
|
|
|
|
|
size_t result_position = sample_block.getPositionByName(result_name);
|
|
|
|
|
function->execute(sample_block, arguments, prerequisites, result_position);
|
|
|
|
|
|
|
|
|
|
/// Если получилась не константа, на всякий случай будем считать результат неизвестным.
|
|
|
|
|
ColumnWithNameAndType & col = sample_block.getByPosition(result_position);
|
|
|
|
|
if (!col.column->isConst())
|
|
|
|
|
col.column = nullptr;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
sample_block.insert(ColumnWithNameAndType(nullptr, result_type, result_name));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
break;
|
2013-06-04 13:34:46 +00:00
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2014-08-22 20:14:53 +00:00
|
|
|
|
case ARRAY_JOIN:
|
2013-06-04 13:34:46 +00:00
|
|
|
|
{
|
2014-08-22 20:14:53 +00:00
|
|
|
|
for (NameSet::iterator it = array_joined_columns.begin(); it != array_joined_columns.end(); ++it)
|
|
|
|
|
{
|
|
|
|
|
ColumnWithNameAndType & current = sample_block.getByName(*it);
|
|
|
|
|
const DataTypeArray * array_type = typeid_cast<const DataTypeArray *>(&*current.type);
|
|
|
|
|
if (!array_type)
|
|
|
|
|
throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH);
|
|
|
|
|
current.type = array_type->getNestedType();
|
|
|
|
|
current.column = nullptr;
|
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2014-08-22 20:14:53 +00:00
|
|
|
|
break;
|
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2014-08-22 20:14:53 +00:00
|
|
|
|
case JOIN:
|
2013-05-29 11:46:51 +00:00
|
|
|
|
{
|
2014-08-22 20:14:53 +00:00
|
|
|
|
for (const auto & col : columns_added_by_join)
|
2015-01-30 21:19:35 +00:00
|
|
|
|
sample_block.insert(ColumnWithNameAndType(nullptr, col.type, col.name));
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2014-08-22 20:14:53 +00:00
|
|
|
|
break;
|
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2014-08-22 20:14:53 +00:00
|
|
|
|
case PROJECT:
|
|
|
|
|
{
|
|
|
|
|
Block new_block;
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < projection.size(); ++i)
|
2013-06-04 13:34:46 +00:00
|
|
|
|
{
|
2014-08-22 20:14:53 +00:00
|
|
|
|
const std::string & name = projection[i].first;
|
|
|
|
|
const std::string & alias = projection[i].second;
|
|
|
|
|
ColumnWithNameAndType column = sample_block.getByName(name);
|
|
|
|
|
if (alias != "")
|
|
|
|
|
column.name = alias;
|
|
|
|
|
new_block.insert(column);
|
2013-06-04 13:34:46 +00:00
|
|
|
|
}
|
2014-08-22 20:14:53 +00:00
|
|
|
|
|
|
|
|
|
sample_block.swap(new_block);
|
|
|
|
|
break;
|
2013-05-29 11:46:51 +00:00
|
|
|
|
}
|
2014-08-22 20:14:53 +00:00
|
|
|
|
|
|
|
|
|
case REMOVE_COLUMN:
|
2013-05-29 11:46:51 +00:00
|
|
|
|
{
|
2014-08-22 20:14:53 +00:00
|
|
|
|
sample_block.erase(source_name);
|
|
|
|
|
break;
|
2013-05-29 11:46:51 +00:00
|
|
|
|
}
|
2014-08-22 20:14:53 +00:00
|
|
|
|
|
|
|
|
|
case ADD_COLUMN:
|
2013-07-23 14:19:03 +00:00
|
|
|
|
{
|
2014-08-22 20:14:53 +00:00
|
|
|
|
if (sample_block.has(result_name))
|
|
|
|
|
throw Exception("Column '" + result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN);
|
|
|
|
|
|
|
|
|
|
sample_block.insert(ColumnWithNameAndType(added_column, result_type, result_name));
|
|
|
|
|
break;
|
2013-07-23 14:19:03 +00:00
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2014-08-22 20:14:53 +00:00
|
|
|
|
case COPY_COLUMN:
|
|
|
|
|
{
|
2013-05-28 11:54:37 +00:00
|
|
|
|
result_type = sample_block.getByName(source_name).type;
|
2014-08-22 20:14:53 +00:00
|
|
|
|
sample_block.insert(ColumnWithNameAndType(sample_block.getByName(source_name).column, result_type, result_name));
|
|
|
|
|
break;
|
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2014-08-22 20:14:53 +00:00
|
|
|
|
default:
|
|
|
|
|
throw Exception("Unknown action type", ErrorCodes::UNKNOWN_ACTION);
|
2013-05-24 10:49:19 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-06-12 18:41:09 +00:00
|
|
|
|
void ExpressionAction::execute(Block & block) const
|
2013-05-24 10:49:19 +00:00
|
|
|
|
{
|
2014-08-22 20:16:16 +00:00
|
|
|
|
// std::cerr << "executing: " << toString() << std::endl;
|
2014-06-13 02:05:05 +00:00
|
|
|
|
|
2013-11-19 20:12:10 +00:00
|
|
|
|
if (type == REMOVE_COLUMN || type == COPY_COLUMN)
|
|
|
|
|
if (!block.has(source_name))
|
|
|
|
|
throw Exception("Not found column '" + source_name + "'. There are columns: " + block.dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-11-19 20:12:10 +00:00
|
|
|
|
if (type == ADD_COLUMN || type == COPY_COLUMN || type == APPLY_FUNCTION)
|
|
|
|
|
if (block.has(result_name))
|
|
|
|
|
throw Exception("Column '" + result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN);
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-05-28 11:54:37 +00:00
|
|
|
|
switch (type)
|
2013-05-24 10:49:19 +00:00
|
|
|
|
{
|
2013-05-28 11:54:37 +00:00
|
|
|
|
case APPLY_FUNCTION:
|
2013-05-24 10:49:19 +00:00
|
|
|
|
{
|
2013-11-19 20:12:10 +00:00
|
|
|
|
ColumnNumbers arguments(argument_names.size());
|
|
|
|
|
for (size_t i = 0; i < argument_names.size(); ++i)
|
|
|
|
|
{
|
|
|
|
|
if (!block.has(argument_names[i]))
|
|
|
|
|
throw Exception("Not found column: '" + argument_names[i] + "'", ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
|
|
|
|
|
arguments[i] = block.getPositionByName(argument_names[i]);
|
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-11-19 20:12:10 +00:00
|
|
|
|
ColumnNumbers prerequisites(prerequisite_names.size());
|
|
|
|
|
for (size_t i = 0; i < prerequisite_names.size(); ++i)
|
|
|
|
|
{
|
|
|
|
|
if (!block.has(prerequisite_names[i]))
|
|
|
|
|
throw Exception("Not found column: '" + prerequisite_names[i] + "'", ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
|
|
|
|
|
prerequisites[i] = block.getPositionByName(prerequisite_names[i]);
|
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-05-28 11:54:37 +00:00
|
|
|
|
ColumnWithNameAndType new_column;
|
|
|
|
|
new_column.name = result_name;
|
|
|
|
|
new_column.type = result_type;
|
|
|
|
|
block.insert(new_column);
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2014-01-04 04:53:07 +00:00
|
|
|
|
ProfileEvents::increment(ProfileEvents::FunctionExecute);
|
2013-11-19 20:12:10 +00:00
|
|
|
|
function->execute(block, arguments, prerequisites, block.getPositionByName(result_name));
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-05-28 11:54:37 +00:00
|
|
|
|
break;
|
2013-05-24 10:49:19 +00:00
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-05-30 16:52:21 +00:00
|
|
|
|
case ARRAY_JOIN:
|
|
|
|
|
{
|
2013-10-17 13:32:32 +00:00
|
|
|
|
if (array_joined_columns.empty())
|
2013-07-26 13:46:52 +00:00
|
|
|
|
throw Exception("No arrays to join", ErrorCodes::LOGICAL_ERROR);
|
2013-11-19 20:12:10 +00:00
|
|
|
|
ColumnPtr any_array_ptr = block.getByName(*array_joined_columns.begin()).column;
|
2013-10-17 13:32:32 +00:00
|
|
|
|
if (any_array_ptr->isConst())
|
|
|
|
|
any_array_ptr = dynamic_cast<const IColumnConst &>(*any_array_ptr).convertToFullColumn();
|
2014-06-26 00:58:14 +00:00
|
|
|
|
const ColumnArray * any_array = typeid_cast<const ColumnArray *>(&*any_array_ptr);
|
2013-10-17 13:32:32 +00:00
|
|
|
|
if (!any_array)
|
2013-11-19 20:12:10 +00:00
|
|
|
|
throw Exception("ARRAY JOIN of not array: " + *array_joined_columns.begin(), ErrorCodes::TYPE_MISMATCH);
|
2013-10-17 13:32:32 +00:00
|
|
|
|
|
|
|
|
|
size_t columns = block.columns();
|
2013-05-30 16:52:21 +00:00
|
|
|
|
for (size_t i = 0; i < columns; ++i)
|
|
|
|
|
{
|
|
|
|
|
ColumnWithNameAndType & current = block.getByPosition(i);
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-10-17 13:32:32 +00:00
|
|
|
|
if (array_joined_columns.count(current.name))
|
2013-05-30 16:52:21 +00:00
|
|
|
|
{
|
2014-06-26 00:58:14 +00:00
|
|
|
|
if (!typeid_cast<const DataTypeArray *>(&*current.type))
|
2013-10-17 13:32:32 +00:00
|
|
|
|
throw Exception("ARRAY JOIN of not array: " + current.name, ErrorCodes::TYPE_MISMATCH);
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-07-29 16:56:52 +00:00
|
|
|
|
ColumnPtr array_ptr = current.column;
|
|
|
|
|
if (array_ptr->isConst())
|
|
|
|
|
array_ptr = dynamic_cast<const IColumnConst &>(*array_ptr).convertToFullColumn();
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2014-06-26 00:58:14 +00:00
|
|
|
|
const ColumnArray & array = typeid_cast<const ColumnArray &>(*array_ptr);
|
|
|
|
|
if (!array.hasEqualOffsets(typeid_cast<const ColumnArray &>(*any_array_ptr)))
|
2013-10-21 11:33:25 +00:00
|
|
|
|
throw Exception("Sizes of ARRAY-JOIN-ed arrays do not match", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH);
|
2013-10-17 13:32:32 +00:00
|
|
|
|
|
2014-06-26 00:58:14 +00:00
|
|
|
|
current.column = typeid_cast<const ColumnArray &>(*array_ptr).getDataPtr();
|
|
|
|
|
current.type = typeid_cast<const DataTypeArray &>(*current.type).getNestedType();
|
2013-05-30 16:52:21 +00:00
|
|
|
|
}
|
|
|
|
|
else
|
2013-10-17 13:32:32 +00:00
|
|
|
|
{
|
2013-07-22 16:49:19 +00:00
|
|
|
|
current.column = current.column->replicate(any_array->getOffsets());
|
2013-10-17 13:32:32 +00:00
|
|
|
|
}
|
2013-05-30 16:52:21 +00:00
|
|
|
|
}
|
2013-07-22 16:49:19 +00:00
|
|
|
|
|
2013-05-30 16:52:21 +00:00
|
|
|
|
break;
|
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2014-06-13 02:05:05 +00:00
|
|
|
|
case JOIN:
|
|
|
|
|
{
|
2014-06-18 18:31:35 +00:00
|
|
|
|
join->joinBlock(block);
|
2014-06-13 02:05:05 +00:00
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-28 11:54:37 +00:00
|
|
|
|
case PROJECT:
|
2013-05-24 10:49:19 +00:00
|
|
|
|
{
|
2013-05-28 11:54:37 +00:00
|
|
|
|
Block new_block;
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-11-19 20:12:10 +00:00
|
|
|
|
for (size_t i = 0; i < projection.size(); ++i)
|
2013-05-28 11:54:37 +00:00
|
|
|
|
{
|
2013-11-19 20:12:10 +00:00
|
|
|
|
const std::string & name = projection[i].first;
|
|
|
|
|
const std::string & alias = projection[i].second;
|
|
|
|
|
ColumnWithNameAndType column = block.getByName(name);
|
2013-05-28 11:54:37 +00:00
|
|
|
|
if (alias != "")
|
|
|
|
|
column.name = alias;
|
|
|
|
|
new_block.insert(column);
|
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-11-17 19:14:17 +00:00
|
|
|
|
block.swap(new_block);
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-05-28 11:54:37 +00:00
|
|
|
|
break;
|
2013-05-24 10:49:19 +00:00
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-05-28 11:54:37 +00:00
|
|
|
|
case REMOVE_COLUMN:
|
|
|
|
|
block.erase(source_name);
|
|
|
|
|
break;
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-05-28 11:54:37 +00:00
|
|
|
|
case ADD_COLUMN:
|
|
|
|
|
block.insert(ColumnWithNameAndType(added_column->cloneResized(block.rows()), result_type, result_name));
|
|
|
|
|
break;
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-05-28 11:54:37 +00:00
|
|
|
|
case COPY_COLUMN:
|
2013-11-19 20:12:10 +00:00
|
|
|
|
block.insert(ColumnWithNameAndType(block.getByName(source_name).column, result_type, result_name));
|
2013-05-28 11:54:37 +00:00
|
|
|
|
break;
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-05-28 11:54:37 +00:00
|
|
|
|
default:
|
|
|
|
|
throw Exception("Unknown action type", ErrorCodes::UNKNOWN_ACTION);
|
2013-05-24 10:49:19 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-06-12 18:41:09 +00:00
|
|
|
|
std::string ExpressionAction::toString() const
|
2013-05-24 10:49:19 +00:00
|
|
|
|
{
|
|
|
|
|
std::stringstream ss;
|
|
|
|
|
switch (type)
|
|
|
|
|
{
|
|
|
|
|
case ADD_COLUMN:
|
2014-06-13 02:05:05 +00:00
|
|
|
|
ss << "ADD " << result_name << " " << result_type->getName() << " " << added_column->getName();
|
2013-05-24 10:49:19 +00:00
|
|
|
|
break;
|
2014-06-13 02:05:05 +00:00
|
|
|
|
|
2013-05-24 10:49:19 +00:00
|
|
|
|
case REMOVE_COLUMN:
|
2014-06-13 02:05:05 +00:00
|
|
|
|
ss << "REMOVE " << source_name;
|
2013-05-24 10:49:19 +00:00
|
|
|
|
break;
|
2014-06-13 02:05:05 +00:00
|
|
|
|
|
2013-05-24 10:49:19 +00:00
|
|
|
|
case COPY_COLUMN:
|
2014-06-13 06:39:15 +00:00
|
|
|
|
ss << "COPY " << result_name << " = " << source_name;
|
2013-05-24 10:49:19 +00:00
|
|
|
|
break;
|
2014-06-13 02:05:05 +00:00
|
|
|
|
|
2013-05-24 10:49:19 +00:00
|
|
|
|
case APPLY_FUNCTION:
|
2014-06-13 02:05:05 +00:00
|
|
|
|
ss << "FUNCTION " << result_name << " " << result_type->getName() << " = " << function->getName() << "(";
|
2013-05-24 10:49:19 +00:00
|
|
|
|
for (size_t i = 0; i < argument_names.size(); ++i)
|
|
|
|
|
{
|
|
|
|
|
if (i)
|
2014-06-13 02:05:05 +00:00
|
|
|
|
ss << ", ";
|
2013-05-24 10:49:19 +00:00
|
|
|
|
ss << argument_names[i];
|
|
|
|
|
}
|
2014-06-13 02:05:05 +00:00
|
|
|
|
ss << ")";
|
2013-05-24 10:49:19 +00:00
|
|
|
|
break;
|
2014-06-13 02:05:05 +00:00
|
|
|
|
|
2013-05-30 16:52:21 +00:00
|
|
|
|
case ARRAY_JOIN:
|
2013-10-17 13:32:32 +00:00
|
|
|
|
ss << "ARRAY JOIN ";
|
2014-06-12 21:14:06 +00:00
|
|
|
|
for (NameSet::const_iterator it = array_joined_columns.begin(); it != array_joined_columns.end(); ++it)
|
2013-07-26 13:46:52 +00:00
|
|
|
|
{
|
|
|
|
|
if (it != array_joined_columns.begin())
|
|
|
|
|
ss << ", ";
|
2013-08-01 13:29:32 +00:00
|
|
|
|
ss << *it;
|
2013-07-26 13:46:52 +00:00
|
|
|
|
}
|
|
|
|
|
break;
|
2014-06-13 02:05:05 +00:00
|
|
|
|
|
|
|
|
|
case JOIN:
|
|
|
|
|
ss << "JOIN ";
|
|
|
|
|
for (NamesAndTypesList::const_iterator it = columns_added_by_join.begin(); it != columns_added_by_join.end(); ++it)
|
|
|
|
|
{
|
|
|
|
|
if (it != columns_added_by_join.begin())
|
|
|
|
|
ss << ", ";
|
2014-07-09 11:45:51 +00:00
|
|
|
|
ss << it->name;
|
2014-06-13 02:05:05 +00:00
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
2013-05-28 14:24:20 +00:00
|
|
|
|
case PROJECT:
|
2014-06-13 02:05:05 +00:00
|
|
|
|
ss << "PROJECT ";
|
2013-05-28 14:24:20 +00:00
|
|
|
|
for (size_t i = 0; i < projection.size(); ++i)
|
|
|
|
|
{
|
|
|
|
|
if (i)
|
|
|
|
|
ss << ", ";
|
|
|
|
|
ss << projection[i].first;
|
|
|
|
|
if (projection[i].second != "" && projection[i].second != projection[i].first)
|
2014-06-13 02:05:05 +00:00
|
|
|
|
ss << " AS " << projection[i].second;
|
2013-05-28 14:24:20 +00:00
|
|
|
|
}
|
|
|
|
|
break;
|
2014-06-13 02:05:05 +00:00
|
|
|
|
|
2013-05-24 10:49:19 +00:00
|
|
|
|
default:
|
|
|
|
|
throw Exception("Unexpected Action type", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-05-24 10:49:19 +00:00
|
|
|
|
return ss.str();
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-05 10:34:59 +00:00
|
|
|
|
void ExpressionActions::checkLimits(Block & block) const
|
2013-05-28 12:19:25 +00:00
|
|
|
|
{
|
|
|
|
|
const Limits & limits = settings.limits;
|
|
|
|
|
if (limits.max_temporary_columns && block.columns() > limits.max_temporary_columns)
|
|
|
|
|
throw Exception("Too many temporary columns: " + block.dumpNames()
|
2014-08-22 19:51:55 +00:00
|
|
|
|
+ ". Maximum: " + toString(limits.max_temporary_columns),
|
|
|
|
|
ErrorCodes::TOO_MUCH_TEMPORARY_COLUMNS);
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2014-08-22 19:51:55 +00:00
|
|
|
|
if (limits.max_temporary_non_const_columns)
|
2013-05-28 12:19:25 +00:00
|
|
|
|
{
|
2014-08-22 19:51:55 +00:00
|
|
|
|
size_t non_const_columns = 0;
|
2013-05-28 12:19:25 +00:00
|
|
|
|
for (size_t i = 0, size = block.columns(); i < size; ++i)
|
2014-08-22 19:51:55 +00:00
|
|
|
|
if (block.getByPosition(i).column && !block.getByPosition(i).column->isConst())
|
|
|
|
|
++non_const_columns;
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2014-08-22 19:51:55 +00:00
|
|
|
|
if (non_const_columns > limits.max_temporary_non_const_columns)
|
|
|
|
|
{
|
|
|
|
|
std::stringstream list_of_non_const_columns;
|
|
|
|
|
for (size_t i = 0, size = block.columns(); i < size; ++i)
|
|
|
|
|
if (!block.getByPosition(i).column->isConst())
|
|
|
|
|
list_of_non_const_columns << (i == 0 ? "" : ", ") << block.getByPosition(i).name;
|
|
|
|
|
|
|
|
|
|
throw Exception("Too many temporary non-const columns: " + list_of_non_const_columns.str()
|
|
|
|
|
+ ". Maximum: " + toString(limits.max_temporary_non_const_columns),
|
|
|
|
|
ErrorCodes::TOO_MUCH_TEMPORARY_NON_CONST_COLUMNS);
|
|
|
|
|
}
|
2013-05-28 12:19:25 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-11 16:21:25 +00:00
|
|
|
|
void ExpressionActions::addInput(const ColumnWithNameAndType & column)
|
|
|
|
|
{
|
|
|
|
|
input_columns.push_back(NameAndTypePair(column.name, column.type));
|
|
|
|
|
sample_block.insert(column);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ExpressionActions::addInput(const NameAndTypePair & column)
|
|
|
|
|
{
|
2014-07-09 11:45:51 +00:00
|
|
|
|
addInput(ColumnWithNameAndType(nullptr, column.type, column.name));
|
2013-06-11 16:21:25 +00:00
|
|
|
|
}
|
|
|
|
|
|
2014-06-12 18:41:09 +00:00
|
|
|
|
void ExpressionActions::add(const ExpressionAction & action, Names & out_new_columns)
|
2013-06-11 16:21:25 +00:00
|
|
|
|
{
|
|
|
|
|
NameSet temp_names;
|
|
|
|
|
addImpl(action, temp_names, out_new_columns);
|
|
|
|
|
}
|
|
|
|
|
|
2014-06-12 18:41:09 +00:00
|
|
|
|
void ExpressionActions::add(const ExpressionAction & action)
|
2013-06-04 13:34:46 +00:00
|
|
|
|
{
|
|
|
|
|
NameSet temp_names;
|
2013-06-11 16:21:25 +00:00
|
|
|
|
Names new_names;
|
|
|
|
|
addImpl(action, temp_names, new_names);
|
2013-06-04 13:34:46 +00:00
|
|
|
|
}
|
|
|
|
|
|
2014-06-12 18:41:09 +00:00
|
|
|
|
void ExpressionActions::addImpl(ExpressionAction action, NameSet & current_names, Names & new_names)
|
2013-05-28 12:19:25 +00:00
|
|
|
|
{
|
|
|
|
|
if (sample_block.has(action.result_name))
|
|
|
|
|
return;
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-04 13:34:46 +00:00
|
|
|
|
if (current_names.count(action.result_name))
|
|
|
|
|
throw Exception("Cyclic function prerequisites: " + action.result_name, ErrorCodes::LOGICAL_ERROR);
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-04 13:34:46 +00:00
|
|
|
|
current_names.insert(action.result_name);
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-07-26 13:46:52 +00:00
|
|
|
|
if (action.result_name != "")
|
|
|
|
|
new_names.push_back(action.result_name);
|
2013-08-01 15:28:10 +00:00
|
|
|
|
new_names.insert(new_names.end(), action.array_joined_columns.begin(), action.array_joined_columns.end());
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-04 13:34:46 +00:00
|
|
|
|
Actions prerequisites = action.getPrerequisites(sample_block);
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-04 13:34:46 +00:00
|
|
|
|
for (size_t i = 0; i < prerequisites.size(); ++i)
|
2013-06-11 16:21:25 +00:00
|
|
|
|
addImpl(prerequisites[i], current_names, new_names);
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-04 13:34:46 +00:00
|
|
|
|
action.prepare(sample_block);
|
2013-05-28 12:19:25 +00:00
|
|
|
|
actions.push_back(action);
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-04 13:34:46 +00:00
|
|
|
|
current_names.erase(action.result_name);
|
2013-05-28 12:19:25 +00:00
|
|
|
|
}
|
|
|
|
|
|
2013-05-28 14:47:37 +00:00
|
|
|
|
void ExpressionActions::prependProjectInput()
|
|
|
|
|
{
|
2014-06-12 18:41:09 +00:00
|
|
|
|
actions.insert(actions.begin(), ExpressionAction::project(getRequiredColumns()));
|
2013-05-28 14:47:37 +00:00
|
|
|
|
}
|
|
|
|
|
|
2014-06-12 18:41:09 +00:00
|
|
|
|
void ExpressionActions::prependArrayJoin(const ExpressionAction & action, const Block & sample_block)
|
2014-04-30 19:19:29 +00:00
|
|
|
|
{
|
2014-06-12 18:41:09 +00:00
|
|
|
|
if (action.type != ExpressionAction::ARRAY_JOIN)
|
2014-04-30 19:19:29 +00:00
|
|
|
|
throw Exception("ARRAY_JOIN action expected", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
|
|
NameSet array_join_set(action.array_joined_columns.begin(), action.array_joined_columns.end());
|
|
|
|
|
for (auto & it : input_columns)
|
|
|
|
|
{
|
2014-07-09 11:45:51 +00:00
|
|
|
|
if (array_join_set.count(it.name))
|
2014-04-30 19:19:29 +00:00
|
|
|
|
{
|
2014-07-09 11:45:51 +00:00
|
|
|
|
array_join_set.erase(it.name);
|
|
|
|
|
it.type = new DataTypeArray(it.type);
|
2014-04-30 19:19:29 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
for (const std::string & name : array_join_set)
|
|
|
|
|
{
|
|
|
|
|
input_columns.push_back(NameAndTypePair(name, sample_block.getByName(name).type));
|
2014-06-12 18:41:09 +00:00
|
|
|
|
actions.insert(actions.begin(), ExpressionAction::removeColumn(name));
|
2014-04-30 19:19:29 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
actions.insert(actions.begin(), action);
|
|
|
|
|
optimizeArrayJoin();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2014-06-12 18:41:09 +00:00
|
|
|
|
bool ExpressionActions::popUnusedArrayJoin(const Names & required_columns, ExpressionAction & out_action)
|
2014-04-30 19:19:29 +00:00
|
|
|
|
{
|
2014-06-12 18:41:09 +00:00
|
|
|
|
if (actions.empty() || actions.back().type != ExpressionAction::ARRAY_JOIN)
|
2014-04-30 19:19:29 +00:00
|
|
|
|
return false;
|
|
|
|
|
NameSet required_set(required_columns.begin(), required_columns.end());
|
|
|
|
|
for (const std::string & name : actions.back().array_joined_columns)
|
|
|
|
|
{
|
|
|
|
|
if (required_set.count(name))
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
for (const std::string & name : actions.back().array_joined_columns)
|
|
|
|
|
{
|
|
|
|
|
DataTypePtr & type = sample_block.getByName(name).type;
|
|
|
|
|
type = new DataTypeArray(type);
|
|
|
|
|
}
|
|
|
|
|
out_action = actions.back();
|
|
|
|
|
actions.pop_back();
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-05 10:34:59 +00:00
|
|
|
|
void ExpressionActions::execute(Block & block) const
|
2013-05-28 12:05:47 +00:00
|
|
|
|
{
|
2014-06-12 18:41:09 +00:00
|
|
|
|
for (const auto & action : actions)
|
2013-05-28 12:19:25 +00:00
|
|
|
|
{
|
2014-06-12 18:41:09 +00:00
|
|
|
|
action.execute(block);
|
2013-05-28 12:19:25 +00:00
|
|
|
|
checkLimits(block);
|
|
|
|
|
}
|
2013-05-28 12:05:47 +00:00
|
|
|
|
}
|
|
|
|
|
|
2013-06-20 13:50:55 +00:00
|
|
|
|
std::string ExpressionActions::getSmallestColumn(const NamesAndTypesList & columns)
|
2013-06-10 14:24:40 +00:00
|
|
|
|
{
|
|
|
|
|
NamesAndTypesList::const_iterator it = columns.begin();
|
2014-06-13 09:07:20 +00:00
|
|
|
|
if (it == columns.end())
|
|
|
|
|
throw Exception("No available columns", ErrorCodes::LOGICAL_ERROR);
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2014-07-09 11:45:51 +00:00
|
|
|
|
size_t min_size = it->type->isNumeric() ? it->type->getSizeOfField() : 100;
|
|
|
|
|
String res = it->name;
|
2013-06-10 14:24:40 +00:00
|
|
|
|
for (; it != columns.end(); ++it)
|
|
|
|
|
{
|
2014-07-09 11:45:51 +00:00
|
|
|
|
size_t current_size = it->type->isNumeric() ? it->type->getSizeOfField() : 100;
|
2013-06-10 14:24:40 +00:00
|
|
|
|
if (current_size < min_size)
|
|
|
|
|
{
|
|
|
|
|
min_size = current_size;
|
2014-07-09 11:45:51 +00:00
|
|
|
|
res = it->name;
|
2013-06-10 14:24:40 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-10 14:24:40 +00:00
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-28 11:54:37 +00:00
|
|
|
|
void ExpressionActions::finalize(const Names & output_columns)
|
2013-05-24 10:49:19 +00:00
|
|
|
|
{
|
2013-05-27 14:02:55 +00:00
|
|
|
|
NameSet final_columns;
|
|
|
|
|
for (size_t i = 0; i < output_columns.size(); ++i)
|
|
|
|
|
{
|
2013-05-28 11:54:37 +00:00
|
|
|
|
const std::string name = output_columns[i];
|
2013-05-27 14:02:55 +00:00
|
|
|
|
if (!sample_block.has(name))
|
2013-05-28 14:24:20 +00:00
|
|
|
|
throw Exception("Unknown column: " + name + ", there are only columns "
|
|
|
|
|
+ sample_block.dumpNames(), ErrorCodes::UNKNOWN_IDENTIFIER);
|
2013-05-28 11:54:37 +00:00
|
|
|
|
final_columns.insert(name);
|
2013-05-27 14:02:55 +00:00
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-18 09:43:35 +00:00
|
|
|
|
/// Какие столбцы нужны, чтобы выполнить действия от текущего до последнего.
|
|
|
|
|
NameSet needed_columns = final_columns;
|
|
|
|
|
/// Какие столбцы никто не будет трогать от текущего действия до последнего.
|
|
|
|
|
NameSet unmodified_columns;
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-18 09:43:35 +00:00
|
|
|
|
{
|
|
|
|
|
NamesAndTypesList sample_columns = sample_block.getColumnsList();
|
|
|
|
|
for (NamesAndTypesList::iterator it = sample_columns.begin(); it != sample_columns.end(); ++it)
|
2014-07-09 11:45:51 +00:00
|
|
|
|
unmodified_columns.insert(it->name);
|
2013-06-18 09:43:35 +00:00
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-18 09:43:35 +00:00
|
|
|
|
/// Будем идти с конца и поодерживать множество нужных на данном этапе столбцов.
|
|
|
|
|
/// Будем выбрасывать ненужные действия, хотя обычно их нет по построению.
|
|
|
|
|
for (int i = static_cast<int>(actions.size()) - 1; i >= 0; --i)
|
2013-05-24 10:49:19 +00:00
|
|
|
|
{
|
2014-06-12 18:41:09 +00:00
|
|
|
|
ExpressionAction & action = actions[i];
|
2013-06-18 09:43:35 +00:00
|
|
|
|
Names in = action.getNeededColumns();
|
2013-10-17 13:32:32 +00:00
|
|
|
|
|
2014-06-12 18:41:09 +00:00
|
|
|
|
if (action.type == ExpressionAction::PROJECT)
|
2013-06-18 09:43:35 +00:00
|
|
|
|
{
|
|
|
|
|
needed_columns = NameSet(in.begin(), in.end());
|
|
|
|
|
unmodified_columns.clear();
|
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
else if (action.type == ExpressionAction::ARRAY_JOIN)
|
2013-10-17 13:32:32 +00:00
|
|
|
|
{
|
|
|
|
|
/// Не будем ARRAY JOIN-ить столбцы, которые дальше не используются.
|
|
|
|
|
/// Обычно такие столбцы не используются и до ARRAY JOIN, и поэтому выбрасываются дальше в этой функции.
|
|
|
|
|
/// Не будем убирать все столбцы, чтобы не потерять количество строк.
|
2014-04-30 19:19:29 +00:00
|
|
|
|
for (auto it = action.array_joined_columns.begin(); it != action.array_joined_columns.end();)
|
2013-10-17 13:32:32 +00:00
|
|
|
|
{
|
2014-04-30 19:19:29 +00:00
|
|
|
|
bool need = needed_columns.count(*it);
|
|
|
|
|
if (!need && action.array_joined_columns.size() > 1)
|
2013-10-17 13:32:32 +00:00
|
|
|
|
{
|
2014-04-30 19:19:29 +00:00
|
|
|
|
action.array_joined_columns.erase(it++);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
needed_columns.insert(*it);
|
|
|
|
|
unmodified_columns.erase(*it);
|
|
|
|
|
|
|
|
|
|
/// Если никакие результаты ARRAY JOIN не используются, принудительно оставим на выходе произвольный столбец,
|
|
|
|
|
/// чтобы не потерять количество строк.
|
|
|
|
|
if (!need)
|
|
|
|
|
final_columns.insert(*it);
|
|
|
|
|
|
|
|
|
|
++it;
|
2013-10-17 13:32:32 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2013-06-18 09:43:35 +00:00
|
|
|
|
else
|
|
|
|
|
{
|
2013-10-17 13:32:32 +00:00
|
|
|
|
std::string out = action.result_name;
|
2013-06-18 09:43:35 +00:00
|
|
|
|
if (!out.empty())
|
|
|
|
|
{
|
|
|
|
|
/// Если результат не используется и нет побочных эффектов, выбросим действие.
|
|
|
|
|
if (!needed_columns.count(out) &&
|
2014-06-12 18:41:09 +00:00
|
|
|
|
(action.type == ExpressionAction::APPLY_FUNCTION
|
|
|
|
|
|| action.type == ExpressionAction::ADD_COLUMN
|
|
|
|
|
|| action.type == ExpressionAction::COPY_COLUMN))
|
2013-06-18 09:43:35 +00:00
|
|
|
|
{
|
|
|
|
|
actions.erase(actions.begin() + i);
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-18 09:43:35 +00:00
|
|
|
|
if (unmodified_columns.count(out))
|
|
|
|
|
{
|
|
|
|
|
sample_block.erase(out);
|
|
|
|
|
unmodified_columns.erase(out);
|
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-18 09:43:35 +00:00
|
|
|
|
continue;
|
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-18 09:43:35 +00:00
|
|
|
|
unmodified_columns.erase(out);
|
|
|
|
|
needed_columns.erase(out);
|
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-18 09:43:35 +00:00
|
|
|
|
needed_columns.insert(in.begin(), in.end());
|
|
|
|
|
}
|
2013-05-24 10:49:19 +00:00
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-20 14:49:41 +00:00
|
|
|
|
/// Не будем выбрасывать все входные столбцы, чтобы не потерять количество строк в блоке.
|
|
|
|
|
if (needed_columns.empty() && !input_columns.empty())
|
|
|
|
|
needed_columns.insert(getSmallestColumn(input_columns));
|
2014-04-30 19:19:29 +00:00
|
|
|
|
|
|
|
|
|
/// Не будем оставлять блок пустым, чтобы не потерять количество строк в нем.
|
|
|
|
|
if (final_columns.empty())
|
|
|
|
|
final_columns.insert(getSmallestColumn(input_columns));
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-05-24 10:49:19 +00:00
|
|
|
|
for (NamesAndTypesList::iterator it = input_columns.begin(); it != input_columns.end();)
|
|
|
|
|
{
|
|
|
|
|
NamesAndTypesList::iterator it0 = it;
|
|
|
|
|
++it;
|
2014-07-09 11:45:51 +00:00
|
|
|
|
if (!needed_columns.count(it0->name))
|
2013-05-24 10:49:19 +00:00
|
|
|
|
{
|
2014-07-09 11:45:51 +00:00
|
|
|
|
if (unmodified_columns.count(it0->name))
|
|
|
|
|
sample_block.erase(it0->name);
|
2013-05-27 14:02:55 +00:00
|
|
|
|
input_columns.erase(it0);
|
2013-05-24 10:49:19 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-05-24 10:49:19 +00:00
|
|
|
|
for (int i = static_cast<int>(sample_block.columns()) - 1; i >= 0; --i)
|
|
|
|
|
{
|
|
|
|
|
const std::string & name = sample_block.getByPosition(i).name;
|
2013-06-10 14:24:40 +00:00
|
|
|
|
if (!final_columns.count(name))
|
2014-06-12 18:41:09 +00:00
|
|
|
|
add(ExpressionAction::removeColumn(name));
|
2013-05-24 10:49:19 +00:00
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-10 16:03:23 +00:00
|
|
|
|
optimize();
|
2013-06-14 17:53:40 +00:00
|
|
|
|
checkLimits(sample_block);
|
2013-05-24 10:49:19 +00:00
|
|
|
|
}
|
|
|
|
|
|
2014-08-22 19:51:55 +00:00
|
|
|
|
|
2013-05-30 16:52:21 +00:00
|
|
|
|
std::string ExpressionActions::getID() const
|
|
|
|
|
{
|
|
|
|
|
std::stringstream ss;
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-05-30 16:52:21 +00:00
|
|
|
|
for (size_t i = 0; i < actions.size(); ++i)
|
|
|
|
|
{
|
|
|
|
|
if (i)
|
|
|
|
|
ss << ", ";
|
2014-06-12 18:41:09 +00:00
|
|
|
|
if (actions[i].type == ExpressionAction::APPLY_FUNCTION)
|
2013-05-30 16:52:21 +00:00
|
|
|
|
ss << actions[i].result_name;
|
2014-06-12 18:41:09 +00:00
|
|
|
|
if (actions[i].type == ExpressionAction::ARRAY_JOIN)
|
2013-07-26 13:46:52 +00:00
|
|
|
|
{
|
2013-08-01 15:28:10 +00:00
|
|
|
|
ss << "{";
|
2013-08-01 13:29:32 +00:00
|
|
|
|
for (NameSet::const_iterator it = actions[i].array_joined_columns.begin();
|
2013-07-26 13:46:52 +00:00
|
|
|
|
it != actions[i].array_joined_columns.end(); ++it)
|
|
|
|
|
{
|
|
|
|
|
if (it != actions[i].array_joined_columns.begin())
|
|
|
|
|
ss << ", ";
|
2013-08-01 13:29:32 +00:00
|
|
|
|
ss << *it;
|
2013-07-26 13:46:52 +00:00
|
|
|
|
}
|
2013-08-01 13:29:32 +00:00
|
|
|
|
ss << "}";
|
2013-07-26 13:46:52 +00:00
|
|
|
|
}
|
2014-06-13 02:05:05 +00:00
|
|
|
|
|
|
|
|
|
/// TODO JOIN
|
2013-05-30 16:52:21 +00:00
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-05-30 16:52:21 +00:00
|
|
|
|
ss << ": {";
|
|
|
|
|
NamesAndTypesList output_columns = sample_block.getColumnsList();
|
|
|
|
|
for (NamesAndTypesList::const_iterator it = output_columns.begin(); it != output_columns.end(); ++it)
|
|
|
|
|
{
|
|
|
|
|
if (it != output_columns.begin())
|
|
|
|
|
ss << ", ";
|
2014-07-09 11:45:51 +00:00
|
|
|
|
ss << it->name;
|
2013-05-30 16:52:21 +00:00
|
|
|
|
}
|
|
|
|
|
ss << "}";
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-05-30 16:52:21 +00:00
|
|
|
|
return ss.str();
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-24 10:49:19 +00:00
|
|
|
|
std::string ExpressionActions::dumpActions() const
|
|
|
|
|
{
|
|
|
|
|
std::stringstream ss;
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-05-24 10:49:19 +00:00
|
|
|
|
ss << "input:\n";
|
|
|
|
|
for (NamesAndTypesList::const_iterator it = input_columns.begin(); it != input_columns.end(); ++it)
|
2014-07-09 11:45:51 +00:00
|
|
|
|
ss << it->name << " " << it->type->getName() << "\n";
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-05-24 10:49:19 +00:00
|
|
|
|
ss << "\nactions:\n";
|
|
|
|
|
for (size_t i = 0; i < actions.size(); ++i)
|
|
|
|
|
ss << actions[i].toString() << '\n';
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-05-27 14:02:55 +00:00
|
|
|
|
ss << "\noutput:\n";
|
|
|
|
|
NamesAndTypesList output_columns = sample_block.getColumnsList();
|
|
|
|
|
for (NamesAndTypesList::const_iterator it = output_columns.begin(); it != output_columns.end(); ++it)
|
2014-07-09 11:45:51 +00:00
|
|
|
|
ss << it->name << " " << it->type->getName() << "\n";
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-05-24 10:49:19 +00:00
|
|
|
|
return ss.str();
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-10 16:03:23 +00:00
|
|
|
|
void ExpressionActions::optimize()
|
|
|
|
|
{
|
|
|
|
|
optimizeArrayJoin();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ExpressionActions::optimizeArrayJoin()
|
|
|
|
|
{
|
|
|
|
|
const size_t NONE = actions.size();
|
|
|
|
|
size_t first_array_join = NONE;
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-10 16:03:23 +00:00
|
|
|
|
/// Столбцы, для вычисления которых нужен arrayJoin.
|
|
|
|
|
/// Действия для их добавления нельзя переместить левее arrayJoin.
|
|
|
|
|
NameSet array_joined_columns;
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-10 16:03:23 +00:00
|
|
|
|
/// Столбцы, нужные для вычисления arrayJoin или тех, кто от него зависит.
|
|
|
|
|
/// Действия для их удаления нельзя переместить левее arrayJoin.
|
|
|
|
|
NameSet array_join_dependencies;
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-10 16:03:23 +00:00
|
|
|
|
for (size_t i = 0; i < actions.size(); ++i)
|
|
|
|
|
{
|
|
|
|
|
/// Не будем перемещать действия правее проецирования (тем более, что их там обычно нет).
|
2014-06-12 18:41:09 +00:00
|
|
|
|
if (actions[i].type == ExpressionAction::PROJECT)
|
2013-06-10 16:03:23 +00:00
|
|
|
|
break;
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-10 16:03:23 +00:00
|
|
|
|
bool depends_on_array_join = false;
|
|
|
|
|
Names needed;
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
|
|
|
|
if (actions[i].type == ExpressionAction::ARRAY_JOIN)
|
2013-06-10 16:03:23 +00:00
|
|
|
|
{
|
|
|
|
|
depends_on_array_join = true;
|
|
|
|
|
needed = actions[i].getNeededColumns();
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
if (first_array_join == NONE)
|
|
|
|
|
continue;
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-10 16:03:23 +00:00
|
|
|
|
needed = actions[i].getNeededColumns();
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-10 16:03:23 +00:00
|
|
|
|
for (size_t j = 0; j < needed.size(); ++j)
|
|
|
|
|
{
|
|
|
|
|
if (array_joined_columns.count(needed[j]))
|
|
|
|
|
{
|
|
|
|
|
depends_on_array_join = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-10 16:03:23 +00:00
|
|
|
|
if (depends_on_array_join)
|
|
|
|
|
{
|
|
|
|
|
if (first_array_join == NONE)
|
|
|
|
|
first_array_join = i;
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-07-26 13:46:52 +00:00
|
|
|
|
if (actions[i].result_name != "")
|
|
|
|
|
array_joined_columns.insert(actions[i].result_name);
|
2013-08-01 15:28:10 +00:00
|
|
|
|
array_joined_columns.insert(actions[i].array_joined_columns.begin(), actions[i].array_joined_columns.end());
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-10 16:03:23 +00:00
|
|
|
|
array_join_dependencies.insert(needed.begin(), needed.end());
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
bool can_move = false;
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
|
|
|
|
if (actions[i].type == ExpressionAction::REMOVE_COLUMN)
|
2013-06-10 16:03:23 +00:00
|
|
|
|
{
|
|
|
|
|
/// Если удаляем столбец, не нужный для arrayJoin (и тех, кто от него зависит), можно его удалить до arrayJoin.
|
|
|
|
|
can_move = !array_join_dependencies.count(actions[i].source_name);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
/// Если действие не удаляет столбцы и не зависит от результата arrayJoin, можно сделать его до arrayJoin.
|
|
|
|
|
can_move = true;
|
|
|
|
|
}
|
2014-06-12 18:41:09 +00:00
|
|
|
|
|
2013-06-10 16:03:23 +00:00
|
|
|
|
/// Переместим текущее действие в позицию сразу перед первым arrayJoin.
|
|
|
|
|
if (can_move)
|
|
|
|
|
{
|
|
|
|
|
/// Переместим i-й элемент в позицию first_array_join.
|
|
|
|
|
std::rotate(actions.begin() + first_array_join, actions.begin() + i, actions.begin() + i + 1);
|
|
|
|
|
++first_array_join;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-30 19:19:29 +00:00
|
|
|
|
|
|
|
|
|
void ExpressionActionsChain::addStep()
|
|
|
|
|
{
|
|
|
|
|
if (steps.empty())
|
|
|
|
|
throw Exception("Cannot add action to empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
|
|
ColumnsWithNameAndType columns = steps.back().actions->getSampleBlock().getColumns();
|
|
|
|
|
steps.push_back(Step(new ExpressionActions(columns, settings)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ExpressionActionsChain::finalize()
|
|
|
|
|
{
|
|
|
|
|
/// Финализируем все шаги. Справа налево, чтобы определять ненужные входные столбцы.
|
|
|
|
|
for (int i = static_cast<int>(steps.size()) - 1; i >= 0; --i)
|
|
|
|
|
{
|
|
|
|
|
Names required_output = steps[i].required_output;
|
|
|
|
|
if (i + 1 < static_cast<int>(steps.size()))
|
|
|
|
|
{
|
|
|
|
|
for (const auto & it : steps[i + 1].actions->getRequiredColumnsWithTypes())
|
2014-07-09 11:45:51 +00:00
|
|
|
|
required_output.push_back(it.name);
|
2014-04-30 19:19:29 +00:00
|
|
|
|
}
|
|
|
|
|
steps[i].actions->finalize(required_output);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Когда возможно, перенесем ARRAY JOIN из более ранних шагов в более поздние.
|
|
|
|
|
for (size_t i = 1; i < steps.size(); ++i)
|
|
|
|
|
{
|
2014-06-12 18:41:09 +00:00
|
|
|
|
ExpressionAction action;
|
2014-04-30 19:19:29 +00:00
|
|
|
|
if (steps[i - 1].actions->popUnusedArrayJoin(steps[i - 1].required_output, action))
|
|
|
|
|
steps[i].actions->prependArrayJoin(action, steps[i - 1].actions->getSampleBlock());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Добавим выбрасывание ненужных столбцов в начало каждого шага.
|
|
|
|
|
for (size_t i = 1; i < steps.size(); ++i)
|
|
|
|
|
{
|
|
|
|
|
size_t columns_from_previous = steps[i - 1].actions->getSampleBlock().columns();
|
|
|
|
|
|
|
|
|
|
/// Если на выходе предыдущего шага образуются ненужные столбцы, добавим в начало этого шага их выбрасывание.
|
|
|
|
|
/// За исключением случая, когда мы выбросим все столбцы и потеряем количество строк в блоке.
|
|
|
|
|
if (!steps[i].actions->getRequiredColumnsWithTypes().empty()
|
|
|
|
|
&& columns_from_previous > steps[i].actions->getRequiredColumnsWithTypes().size())
|
|
|
|
|
steps[i].actions->prependProjectInput();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string ExpressionActionsChain::dumpChain()
|
|
|
|
|
{
|
|
|
|
|
std::stringstream ss;
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < steps.size(); ++i)
|
|
|
|
|
{
|
|
|
|
|
ss << "step " << i << "\n";
|
|
|
|
|
ss << "required output:\n";
|
|
|
|
|
for (const std::string & name : steps[i].required_output)
|
|
|
|
|
ss << name << "\n";
|
|
|
|
|
ss << "\n" << steps[i].actions->dumpActions() << "\n";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return ss.str();
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-24 10:49:19 +00:00
|
|
|
|
}
|