Added support for multiple expressions in ARRAY JOIN section. There are known issues. Sorry for automatic indentation changes. [#CONV-8821]

This commit is contained in:
Michael Kolupaev 2013-10-17 13:32:32 +00:00
parent 566a11698e
commit 1cab77c753
9 changed files with 378 additions and 361 deletions

View File

@ -211,6 +211,8 @@ namespace ErrorCodes
NO_FREE_CONNECTION,
CANNOT_FSYNC,
NESTED_TYPE_TOO_DEEP,
ALIAS_REQUIRED,
AMBIGUOUS_IDENTIFIER,
POCO_EXCEPTION = 1000,
STD_EXCEPTION,

View File

@ -36,9 +36,8 @@ public:
ADD_COLUMN,
REMOVE_COLUMN,
COPY_COLUMN,
ARRAY_JOIN, /// Заменяет столбец с массивом на столбец с элементами. Если этот массив упоминается где-то еще, будет ошибка.
ARRAY_JOIN, /// Заменяет указанные столбцы с массивами на столбцы с элементами. Размножает значения в остальных столбцах по количеству элементов в массивах. Массивы должны быть параллельными (иметь одинаковые длины).
PROJECT, /// Переупорядочить и переименовать столбцы, удалить лишние. Допускаются одинаковые имена столбцов в результате.
MULTIPLE_ARRAY_JOIN, /// Заменяет столбцы из вложенной таблицы (или один столбец-массив) на столбцы с элементами.
};
Type type;
@ -47,10 +46,8 @@ public:
std::string result_name;
DataTypePtr result_type;
/// Для MULTIPLE_ARRAY_JOIN
std::string nested_table_name;
std::string nested_table_alias;
NameSet array_joined_columns; /// Имена столбцов без префикса 'NestedTableName.'
/// Для ARRAY_JOIN
NameSet array_joined_columns;
/// Для ADD_COLUMN.
ColumnPtr added_column;
@ -111,23 +108,12 @@ public:
return a;
}
static Action arrayJoin(const std::string & source_name, const std::string & result_name)
static Action arrayJoin(const NameSet & array_joined_columns)
{
if (array_joined_columns.empty())
throw Exception("No arrays to join", ErrorCodes::LOGICAL_ERROR);
Action a;
a.type = ARRAY_JOIN;
a.source_name = source_name;
a.result_name = result_name;
return a;
}
static Action multipleArrayJoin(const std::string & nested_table_name,
const std::string & nested_table_alias,
const NameSet & array_joined_columns)
{
Action a;
a.type = MULTIPLE_ARRAY_JOIN;
a.nested_table_name = nested_table_name;
a.nested_table_alias = nested_table_alias;
a.array_joined_columns = array_joined_columns;
return a;
}
@ -141,9 +127,6 @@ public:
private:
friend class ExpressionActions;
/// Проверяет является ли данный столбец результатом ARRAY JOIN
bool isArrayJoinedColumnName(const String & name) const;
std::vector<Action> getPrerequisites(Block & sample_block);
void prepare(Block & sample_block);
void execute(Block & block) const;

View File

@ -94,9 +94,11 @@ private:
/// Исходные столбцы.
NamesAndTypesList columns;
/// Столбцы после агрегации. Если нет агрегации, совпадает с columns.
/// Столбцы после ARRAY JOIN. Если нет ARRAY JOIN, совпадает с columns.
NamesAndTypesList columns_after_array_join;
/// Столбцы после агрегации. Если нет агрегации, совпадает с columns_after_array_join.
NamesAndTypesList aggregated_columns;
/// Таблица, из которой делается запрос. Используется для sign-rewrite'а
const StoragePtr storage;
/// Имя поля Sign в таблице. Непусто, если нужно осуществлять sign-rewrite
@ -113,7 +115,7 @@ private:
typedef std::map<ASTPtr, ASTPtr> MapOfASTs;
/// Столбцы, которые должны быть преобразованы из-за секции ARRAY JOIN
NameSet array_joined_columns;
NameSet columns_for_array_join;
/** Для getActionsImpl.
* Стек из ExpressionActions, соответствующих вложенным лямбда-выражениям.
@ -223,6 +225,7 @@ private:
/// Проверяет является ли данный столбец результатом ARRAY JOIN
bool isArrayJoinedColumnName(const String & name);
/// Возвращает исходное имя столбца до применения к нему ARRAY JOIN
String getOriginalNestedName(const String & name);
@ -230,7 +233,7 @@ private:
/** Создать словарь алиасов.
*/
void createAliasesDict(ASTPtr & ast);
void createAliasesDict(ASTPtr & ast, int ignore_levels = 0);
/** Для узлов-звёздочек - раскрыть их в список всех столбцов.
* Для узлов-литералов - подставить алиасы.
@ -255,7 +258,7 @@ private:
/// Добавить агрегатные функции в aggregate_descriptions.
/// Установить has_aggregation=true, если есть хоть одна агрегатная функция.
void getAggregatesImpl(ASTPtr ast, ExpressionActions & actions);
void getRequiredColumnsImpl(ASTPtr ast, NamesSet & required_columns, NamesSet & ignored_names);
/// Получить таблицу, из которой идет запрос

View File

@ -18,7 +18,6 @@ public:
Database,
Table,
Format,
ArrayJoin,
};
/// имя

View File

@ -17,7 +17,7 @@ public:
ASTPtr select_expression_list;
ASTPtr database;
ASTPtr table; /// Идентификатор или подзапрос (рекурсивно ASTSelectQuery)
ASTPtr array_join_identifier;
ASTPtr array_join_expression_list;
bool final;
ASTPtr sample_size;
ASTPtr where_expression;
@ -42,7 +42,7 @@ public:
if (select_expression_list) { res->select_expression_list = select_expression_list->clone(); res->children.push_back(res->select_expression_list); }
if (database) { res->database = database->clone(); res->children.push_back(res->database); }
if (table) { res->table = table->clone(); res->children.push_back(res->table); }
if (array_join_identifier) { res->array_join_identifier = array_join_identifier->clone(); res->children.push_back(res->array_join_identifier); }
if (array_join_expression_list) { res->array_join_expression_list = array_join_expression_list->clone(); res->children.push_back(res->array_join_expression_list); }
if (sample_size) { res->sample_size = sample_size->clone(); res->children.push_back(res->sample_size); }
if (where_expression) { res->where_expression = where_expression->clone(); res->children.push_back(res->where_expression); }
if (group_expression_list) { res->group_expression_list = group_expression_list->clone(); res->children.push_back(res->group_expression_list); }

View File

@ -26,11 +26,6 @@ Names ExpressionActions::Action::getNeededColumns() const
return res;
}
bool ExpressionActions::Action::isArrayJoinedColumnName(const String & name) const
{
return array_joined_columns.count(name) != 0;
}
ExpressionActions::Action ExpressionActions::Action::applyFunction(FunctionPtr function_,
const std::vector<std::string> & argument_names_,
std::string result_name_)
@ -138,48 +133,14 @@ void ExpressionActions::Action::prepare(Block & sample_block)
}
else if (type == ARRAY_JOIN)
{
if (sample_block.has(result_name))
throw Exception("Column '" + result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN);
if (!sample_block.has(source_name))
throw Exception("Unknown identifier: '" + source_name + "'", ErrorCodes::UNKNOWN_IDENTIFIER);
const DataTypeArray * array_type = dynamic_cast<const DataTypeArray *>(&*sample_block.getByName(source_name).type);
if (!array_type)
throw Exception("arrayJoin requires array argument", ErrorCodes::TYPE_MISMATCH);
result_type = array_type->getNestedType();
sample_block.erase(source_name);
sample_block.insert(ColumnWithNameAndType(NULL, result_type, result_name));
}
else if (type == MULTIPLE_ARRAY_JOIN)
{
bool has_arrays_to_join = false;
size_t columns = sample_block.columns();
for (size_t i = 0; i < columns; ++i)
for (NameSet::iterator it = array_joined_columns.begin(); it != array_joined_columns.end(); ++it)
{
const ColumnWithNameAndType & current = sample_block.getByPosition(i);
ColumnWithNameAndType & current = sample_block.getByName(*it);
const DataTypeArray * array_type = dynamic_cast<const DataTypeArray *>(&*current.type);
if (isArrayJoinedColumnName(current.name))
{
if (!array_type)
throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH);
has_arrays_to_join = true;
ColumnWithNameAndType result;
result.column = NULL;
result.type = array_type->getNestedType();
result.name = current.name;
sample_block.erase(i);
sample_block.insert(i, result);
}
if (!array_type)
throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH);
current.type = array_type->getNestedType();
}
if (!has_arrays_to_join)
throw Exception("No arrays to join", ErrorCodes::LOGICAL_ERROR);
}
else if (type == ADD_COLUMN)
{
@ -199,11 +160,11 @@ void ExpressionActions::Action::prepare(Block & sample_block)
void ExpressionActions::Action::execute(Block & block) const
{
if (type == REMOVE_COLUMN || type == COPY_COLUMN || type == ARRAY_JOIN)
if (type == REMOVE_COLUMN || type == COPY_COLUMN)
if (!block.has(source_name))
throw Exception("Not found column '" + source_name + "'. There are columns: " + block.dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
if (type == ADD_COLUMN || type == COPY_COLUMN || type == APPLY_FUNCTION || type == ARRAY_JOIN)
if (type == ADD_COLUMN || type == COPY_COLUMN || type == APPLY_FUNCTION)
if (block.has(result_name))
throw Exception("Column '" + result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN);
@ -238,60 +199,41 @@ void ExpressionActions::Action::execute(Block & block) const
}
case ARRAY_JOIN:
case MULTIPLE_ARRAY_JOIN:
{
ColumnPtr any_array_ptr = NULL;
size_t columns = block.columns();
for (size_t i = 0; i < columns; ++i)
{
const ColumnWithNameAndType & current = block.getByPosition(i);
if (current.name == source_name || isArrayJoinedColumnName(current.name))
{
if (!dynamic_cast<const DataTypeArray *>(&*current.type))
throw Exception("arrayJoin of not array: " + current.name, ErrorCodes::TYPE_MISMATCH);
ColumnPtr array_ptr = current.column;
if (array_ptr->isConst())
array_ptr = dynamic_cast<const IColumnConst &>(*array_ptr).convertToFullColumn();
if (any_array_ptr.isNull())
any_array_ptr = array_ptr;
else
{
const ColumnArray & array = dynamic_cast<const ColumnArray &>(*array_ptr);
if (!array.hasEqualOffsets(dynamic_cast<const ColumnArray &>(*any_array_ptr)))
throw Exception("Sizes of nested arrays do not match", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH);
}
}
}
if (any_array_ptr.isNull())
if (array_joined_columns.empty())
throw Exception("No arrays to join", ErrorCodes::LOGICAL_ERROR);
ColumnPtr any_array_ptr = block.getByName(*array_joined_columns.begin()).column;
if (any_array_ptr->isConst())
any_array_ptr = dynamic_cast<const IColumnConst &>(*any_array_ptr).convertToFullColumn();
const ColumnArray * any_array = dynamic_cast<const ColumnArray *>(&*any_array_ptr);
if (!any_array)
throw Exception("ARRAY JOIN of not array: " + *array_joined_columns.begin(), ErrorCodes::TYPE_MISMATCH);
size_t columns = block.columns();
for (size_t i = 0; i < columns; ++i)
{
ColumnWithNameAndType & current = block.getByPosition(i);
if (current.name == source_name || isArrayJoinedColumnName(current.name))
if (array_joined_columns.count(current.name))
{
if (!dynamic_cast<const DataTypeArray *>(&*current.type))
throw Exception("ARRAY JOIN of not array: " + current.name, ErrorCodes::TYPE_MISMATCH);
ColumnPtr array_ptr = current.column;
if (array_ptr->isConst())
array_ptr = dynamic_cast<const IColumnConst &>(*array_ptr).convertToFullColumn();
ColumnWithNameAndType result;
result.column = dynamic_cast<const ColumnArray &>(*array_ptr).getDataPtr();
result.type = dynamic_cast<const DataTypeArray &>(*current.type).getNestedType();
result.name = type == MULTIPLE_ARRAY_JOIN ? current.name : result_name;
block.erase(i);
block.insert(i, result);
const ColumnArray & array = dynamic_cast<const ColumnArray &>(*array_ptr);
if (!array.hasEqualOffsets(dynamic_cast<const ColumnArray &>(*any_array_ptr)))
throw Exception("Sizes of nested arrays do not match", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH);
current.column = dynamic_cast<const ColumnArray &>(*array_ptr).getDataPtr();
current.type = dynamic_cast<const DataTypeArray &>(*current.type).getNestedType();
}
else
{
current.column = current.column->replicate(any_array->getOffsets());
}
}
break;
@ -358,18 +300,13 @@ std::string ExpressionActions::Action::toString() const
ss << " )";
break;
case ARRAY_JOIN:
ss << result_name << "(" << result_type->getName() << ")" << "= " << "arrayJoin" << " ( " << source_name << " )";
break;
case MULTIPLE_ARRAY_JOIN:
ss << "ARRAY JOIN " << nested_table_name << (nested_table_name != nested_table_alias ? " AS " + nested_table_alias : "");
ss << "{";
for (NameSet::const_iterator it = array_joined_columns.begin(); it != array_joined_columns.end(); ++it)
ss << "ARRAY JOIN ";
for (NameSet::iterator it = array_joined_columns.begin(); it != array_joined_columns.end(); ++it)
{
if (it != array_joined_columns.begin())
ss << ", ";
ss << *it;
}
ss << "}";
break;
case PROJECT:
ss << "{";
@ -532,15 +469,31 @@ void ExpressionActions::finalize(const Names & output_columns)
{
Action & action = actions[i];
Names in = action.getNeededColumns();
std::string out = action.result_name;
if (action.type == Action::PROJECT)
{
needed_columns = NameSet(in.begin(), in.end());
unmodified_columns.clear();
}
else if (action.type == Action::ARRAY_JOIN)
{
/// Не будем ARRAY JOIN-ить столбцы, которые дальше не используются.
/// Обычно такие столбцы не используются и до ARRAY JOIN, и поэтому выбрасываются дальше в этой функции.
/// Не будем убирать все столбцы, чтобы не потерять количество строк.
NameSet::iterator it = action.array_joined_columns.begin();
while (it != action.array_joined_columns.end() && action.array_joined_columns.size() > 1)
{
NameSet::iterator jt = it;
++it;
if (!needed_columns.count(*jt))
{
action.array_joined_columns.erase(jt);
}
}
}
else
{
std::string out = action.result_name;
if (!out.empty())
{
/// Если результат не используется и нет побочных эффектов, выбросим действие.
@ -605,9 +558,9 @@ std::string ExpressionActions::getID() const
{
if (i)
ss << ", ";
if (actions[i].type == Action::APPLY_FUNCTION || actions[i].type == Action::ARRAY_JOIN)
if (actions[i].type == Action::APPLY_FUNCTION)
ss << actions[i].result_name;
if (actions[i].type == Action::MULTIPLE_ARRAY_JOIN)
if (actions[i].type == Action::ARRAY_JOIN)
{
ss << "{";
for (NameSet::const_iterator it = actions[i].array_joined_columns.begin();
@ -681,7 +634,7 @@ void ExpressionActions::optimizeArrayJoin()
bool depends_on_array_join = false;
Names needed;
if (actions[i].type == Action::ARRAY_JOIN || actions[i].type == Action::MULTIPLE_ARRAY_JOIN)
if (actions[i].type == Action::ARRAY_JOIN)
{
depends_on_array_join = true;
needed = actions[i].getNeededColumns();

File diff suppressed because it is too large Load Diff

View File

@ -106,25 +106,22 @@ bool ParserSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, String & ex
return false;
}
/// ARRAY JOIN array|Nested_table [AS alias]
/// ARRAY JOIN expr list
if (s_array.ignore(pos, end, expected))
{
ws.ignore(pos, end);
if (!s_join.ignore(pos, end, expected))
return false;
ws.ignore(pos, end);
ParserWithOptionalAlias ident(new ParserCompoundIdentifier());
if (!ident.parse(pos, end, select_query->array_join_identifier, expected))
if (!exp_list.parse(pos, end, select_query->array_join_expression_list, expected))
return false;
dynamic_cast<ASTIdentifier &>(*select_query->array_join_identifier).kind = ASTIdentifier::ArrayJoin;
ws.ignore(pos, end);
}
/// FINAL
if (s_final.ignore(pos, end, expected))
{
@ -248,8 +245,8 @@ bool ParserSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, String & ex
select_query->children.push_back(select_query->database);
if (select_query->table)
select_query->children.push_back(select_query->table);
if (select_query->array_join_identifier)
select_query->children.push_back(select_query->array_join_identifier);
if (select_query->array_join_expression_list)
select_query->children.push_back(select_query->array_join_expression_list);
if (select_query->sample_size)
select_query->children.push_back(select_query->sample_size);
if (select_query->where_expression)

View File

@ -112,10 +112,10 @@ void formatAST(const ASTSelectQuery & ast, std::ostream & s, size_t indent, bo
formatAST(*ast.table, s, indent, hilite, one_line);
}
if (ast.array_join_identifier)
if (ast.array_join_expression_list)
{
s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "ARRAY JOIN " << (hilite ? hilite_none : "");
formatAST(*ast.array_join_identifier, s, indent, hilite, one_line);
formatAST(*ast.array_join_expression_list, s, indent, hilite, one_line);
}
if (ast.final)