From 1cab77c753ac2b2f3345e92a7292d30c34676c4f Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Thu, 17 Oct 2013 13:32:32 +0000 Subject: [PATCH] Added support for multiple expressions in ARRAY JOIN section. There are known issues. Sorry for automatic indentation changes. [#CONV-8821] --- dbms/include/DB/Core/ErrorCodes.h | 2 + .../DB/Interpreters/ExpressionActions.h | 29 +- .../DB/Interpreters/ExpressionAnalyzer.h | 13 +- dbms/include/DB/Parsers/ASTIdentifier.h | 1 - dbms/include/DB/Parsers/ASTSelectQuery.h | 4 +- dbms/src/Interpreters/ExpressionActions.cpp | 147 ++--- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 518 ++++++++++-------- dbms/src/Parsers/ParserSelectQuery.cpp | 21 +- dbms/src/Parsers/formatAST.cpp | 4 +- 9 files changed, 378 insertions(+), 361 deletions(-) diff --git a/dbms/include/DB/Core/ErrorCodes.h b/dbms/include/DB/Core/ErrorCodes.h index 2208cb503e2..14c496d40a2 100644 --- a/dbms/include/DB/Core/ErrorCodes.h +++ b/dbms/include/DB/Core/ErrorCodes.h @@ -211,6 +211,8 @@ namespace ErrorCodes NO_FREE_CONNECTION, CANNOT_FSYNC, NESTED_TYPE_TOO_DEEP, + ALIAS_REQUIRED, + AMBIGUOUS_IDENTIFIER, POCO_EXCEPTION = 1000, STD_EXCEPTION, diff --git a/dbms/include/DB/Interpreters/ExpressionActions.h b/dbms/include/DB/Interpreters/ExpressionActions.h index 98a653eb4c7..86915500925 100644 --- a/dbms/include/DB/Interpreters/ExpressionActions.h +++ b/dbms/include/DB/Interpreters/ExpressionActions.h @@ -36,9 +36,8 @@ public: ADD_COLUMN, REMOVE_COLUMN, COPY_COLUMN, - ARRAY_JOIN, /// Заменяет столбец с массивом на столбец с элементами. Если этот массив упоминается где-то еще, будет ошибка. + ARRAY_JOIN, /// Заменяет указанные столбцы с массивами на столбцы с элементами. Размножает значения в остальных столбцах по количеству элементов в массивах. Массивы должны быть параллельными (иметь одинаковые длины). PROJECT, /// Переупорядочить и переименовать столбцы, удалить лишние. Допускаются одинаковые имена столбцов в результате. - MULTIPLE_ARRAY_JOIN, /// Заменяет столбцы из вложенной таблицы (или один столбец-массив) на столбцы с элементами. }; Type type; @@ -47,10 +46,8 @@ public: std::string result_name; DataTypePtr result_type; - /// Для MULTIPLE_ARRAY_JOIN - std::string nested_table_name; - std::string nested_table_alias; - NameSet array_joined_columns; /// Имена столбцов без префикса 'NestedTableName.' + /// Для ARRAY_JOIN + NameSet array_joined_columns; /// Для ADD_COLUMN. ColumnPtr added_column; @@ -111,23 +108,12 @@ public: return a; } - static Action arrayJoin(const std::string & source_name, const std::string & result_name) + static Action arrayJoin(const NameSet & array_joined_columns) { + if (array_joined_columns.empty()) + throw Exception("No arrays to join", ErrorCodes::LOGICAL_ERROR); Action a; a.type = ARRAY_JOIN; - a.source_name = source_name; - a.result_name = result_name; - return a; - } - - static Action multipleArrayJoin(const std::string & nested_table_name, - const std::string & nested_table_alias, - const NameSet & array_joined_columns) - { - Action a; - a.type = MULTIPLE_ARRAY_JOIN; - a.nested_table_name = nested_table_name; - a.nested_table_alias = nested_table_alias; a.array_joined_columns = array_joined_columns; return a; } @@ -141,9 +127,6 @@ public: private: friend class ExpressionActions; - /// Проверяет является ли данный столбец результатом ARRAY JOIN - bool isArrayJoinedColumnName(const String & name) const; - std::vector getPrerequisites(Block & sample_block); void prepare(Block & sample_block); void execute(Block & block) const; diff --git a/dbms/include/DB/Interpreters/ExpressionAnalyzer.h b/dbms/include/DB/Interpreters/ExpressionAnalyzer.h index e719a693872..7f2b9a1562f 100644 --- a/dbms/include/DB/Interpreters/ExpressionAnalyzer.h +++ b/dbms/include/DB/Interpreters/ExpressionAnalyzer.h @@ -94,9 +94,11 @@ private: /// Исходные столбцы. NamesAndTypesList columns; - /// Столбцы после агрегации. Если нет агрегации, совпадает с columns. + /// Столбцы после ARRAY JOIN. Если нет ARRAY JOIN, совпадает с columns. + NamesAndTypesList columns_after_array_join; + /// Столбцы после агрегации. Если нет агрегации, совпадает с columns_after_array_join. NamesAndTypesList aggregated_columns; - + /// Таблица, из которой делается запрос. Используется для sign-rewrite'а const StoragePtr storage; /// Имя поля Sign в таблице. Непусто, если нужно осуществлять sign-rewrite @@ -113,7 +115,7 @@ private: typedef std::map MapOfASTs; /// Столбцы, которые должны быть преобразованы из-за секции ARRAY JOIN - NameSet array_joined_columns; + NameSet columns_for_array_join; /** Для getActionsImpl. * Стек из ExpressionActions, соответствующих вложенным лямбда-выражениям. @@ -223,6 +225,7 @@ private: /// Проверяет является ли данный столбец результатом ARRAY JOIN bool isArrayJoinedColumnName(const String & name); + /// Возвращает исходное имя столбца до применения к нему ARRAY JOIN String getOriginalNestedName(const String & name); @@ -230,7 +233,7 @@ private: /** Создать словарь алиасов. */ - void createAliasesDict(ASTPtr & ast); + void createAliasesDict(ASTPtr & ast, int ignore_levels = 0); /** Для узлов-звёздочек - раскрыть их в список всех столбцов. * Для узлов-литералов - подставить алиасы. @@ -255,7 +258,7 @@ private: /// Добавить агрегатные функции в aggregate_descriptions. /// Установить has_aggregation=true, если есть хоть одна агрегатная функция. void getAggregatesImpl(ASTPtr ast, ExpressionActions & actions); - + void getRequiredColumnsImpl(ASTPtr ast, NamesSet & required_columns, NamesSet & ignored_names); /// Получить таблицу, из которой идет запрос diff --git a/dbms/include/DB/Parsers/ASTIdentifier.h b/dbms/include/DB/Parsers/ASTIdentifier.h index 5d9e58bcc2b..e212cf7158e 100644 --- a/dbms/include/DB/Parsers/ASTIdentifier.h +++ b/dbms/include/DB/Parsers/ASTIdentifier.h @@ -18,7 +18,6 @@ public: Database, Table, Format, - ArrayJoin, }; /// имя diff --git a/dbms/include/DB/Parsers/ASTSelectQuery.h b/dbms/include/DB/Parsers/ASTSelectQuery.h index 98dfbdb7102..9feee748f52 100644 --- a/dbms/include/DB/Parsers/ASTSelectQuery.h +++ b/dbms/include/DB/Parsers/ASTSelectQuery.h @@ -17,7 +17,7 @@ public: ASTPtr select_expression_list; ASTPtr database; ASTPtr table; /// Идентификатор или подзапрос (рекурсивно ASTSelectQuery) - ASTPtr array_join_identifier; + ASTPtr array_join_expression_list; bool final; ASTPtr sample_size; ASTPtr where_expression; @@ -42,7 +42,7 @@ public: if (select_expression_list) { res->select_expression_list = select_expression_list->clone(); res->children.push_back(res->select_expression_list); } if (database) { res->database = database->clone(); res->children.push_back(res->database); } if (table) { res->table = table->clone(); res->children.push_back(res->table); } - if (array_join_identifier) { res->array_join_identifier = array_join_identifier->clone(); res->children.push_back(res->array_join_identifier); } + if (array_join_expression_list) { res->array_join_expression_list = array_join_expression_list->clone(); res->children.push_back(res->array_join_expression_list); } if (sample_size) { res->sample_size = sample_size->clone(); res->children.push_back(res->sample_size); } if (where_expression) { res->where_expression = where_expression->clone(); res->children.push_back(res->where_expression); } if (group_expression_list) { res->group_expression_list = group_expression_list->clone(); res->children.push_back(res->group_expression_list); } diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 49ffec26b4a..81bf52f77ed 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -26,11 +26,6 @@ Names ExpressionActions::Action::getNeededColumns() const return res; } -bool ExpressionActions::Action::isArrayJoinedColumnName(const String & name) const -{ - return array_joined_columns.count(name) != 0; -} - ExpressionActions::Action ExpressionActions::Action::applyFunction(FunctionPtr function_, const std::vector & argument_names_, std::string result_name_) @@ -138,48 +133,14 @@ void ExpressionActions::Action::prepare(Block & sample_block) } else if (type == ARRAY_JOIN) { - if (sample_block.has(result_name)) - throw Exception("Column '" + result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN); - if (!sample_block.has(source_name)) - throw Exception("Unknown identifier: '" + source_name + "'", ErrorCodes::UNKNOWN_IDENTIFIER); - - const DataTypeArray * array_type = dynamic_cast(&*sample_block.getByName(source_name).type); - if (!array_type) - throw Exception("arrayJoin requires array argument", ErrorCodes::TYPE_MISMATCH); - result_type = array_type->getNestedType(); - - sample_block.erase(source_name); - sample_block.insert(ColumnWithNameAndType(NULL, result_type, result_name)); - } - else if (type == MULTIPLE_ARRAY_JOIN) - { - bool has_arrays_to_join = false; - - size_t columns = sample_block.columns(); - for (size_t i = 0; i < columns; ++i) + for (NameSet::iterator it = array_joined_columns.begin(); it != array_joined_columns.end(); ++it) { - const ColumnWithNameAndType & current = sample_block.getByPosition(i); + ColumnWithNameAndType & current = sample_block.getByName(*it); const DataTypeArray * array_type = dynamic_cast(&*current.type); - - if (isArrayJoinedColumnName(current.name)) - { - if (!array_type) - throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH); - - has_arrays_to_join = true; - - ColumnWithNameAndType result; - result.column = NULL; - result.type = array_type->getNestedType(); - result.name = current.name; - - sample_block.erase(i); - sample_block.insert(i, result); - } + if (!array_type) + throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH); + current.type = array_type->getNestedType(); } - - if (!has_arrays_to_join) - throw Exception("No arrays to join", ErrorCodes::LOGICAL_ERROR); } else if (type == ADD_COLUMN) { @@ -199,11 +160,11 @@ void ExpressionActions::Action::prepare(Block & sample_block) void ExpressionActions::Action::execute(Block & block) const { - if (type == REMOVE_COLUMN || type == COPY_COLUMN || type == ARRAY_JOIN) + if (type == REMOVE_COLUMN || type == COPY_COLUMN) if (!block.has(source_name)) throw Exception("Not found column '" + source_name + "'. There are columns: " + block.dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); - if (type == ADD_COLUMN || type == COPY_COLUMN || type == APPLY_FUNCTION || type == ARRAY_JOIN) + if (type == ADD_COLUMN || type == COPY_COLUMN || type == APPLY_FUNCTION) if (block.has(result_name)) throw Exception("Column '" + result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN); @@ -238,60 +199,41 @@ void ExpressionActions::Action::execute(Block & block) const } case ARRAY_JOIN: - case MULTIPLE_ARRAY_JOIN: { - ColumnPtr any_array_ptr = NULL; - - size_t columns = block.columns(); - for (size_t i = 0; i < columns; ++i) - { - const ColumnWithNameAndType & current = block.getByPosition(i); - - if (current.name == source_name || isArrayJoinedColumnName(current.name)) - { - if (!dynamic_cast(&*current.type)) - throw Exception("arrayJoin of not array: " + current.name, ErrorCodes::TYPE_MISMATCH); - - ColumnPtr array_ptr = current.column; - if (array_ptr->isConst()) - array_ptr = dynamic_cast(*array_ptr).convertToFullColumn(); - - if (any_array_ptr.isNull()) - any_array_ptr = array_ptr; - else - { - const ColumnArray & array = dynamic_cast(*array_ptr); - if (!array.hasEqualOffsets(dynamic_cast(*any_array_ptr))) - throw Exception("Sizes of nested arrays do not match", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); - } - } - } - - if (any_array_ptr.isNull()) + if (array_joined_columns.empty()) throw Exception("No arrays to join", ErrorCodes::LOGICAL_ERROR); - + ColumnPtr any_array_ptr = block.getByName(*array_joined_columns.begin()).column; + if (any_array_ptr->isConst()) + any_array_ptr = dynamic_cast(*any_array_ptr).convertToFullColumn(); const ColumnArray * any_array = dynamic_cast(&*any_array_ptr); - + if (!any_array) + throw Exception("ARRAY JOIN of not array: " + *array_joined_columns.begin(), ErrorCodes::TYPE_MISMATCH); + + size_t columns = block.columns(); for (size_t i = 0; i < columns; ++i) { ColumnWithNameAndType & current = block.getByPosition(i); - if (current.name == source_name || isArrayJoinedColumnName(current.name)) + if (array_joined_columns.count(current.name)) { + if (!dynamic_cast(&*current.type)) + throw Exception("ARRAY JOIN of not array: " + current.name, ErrorCodes::TYPE_MISMATCH); + ColumnPtr array_ptr = current.column; if (array_ptr->isConst()) array_ptr = dynamic_cast(*array_ptr).convertToFullColumn(); - ColumnWithNameAndType result; - result.column = dynamic_cast(*array_ptr).getDataPtr(); - result.type = dynamic_cast(*current.type).getNestedType(); - result.name = type == MULTIPLE_ARRAY_JOIN ? current.name : result_name; - - block.erase(i); - block.insert(i, result); + const ColumnArray & array = dynamic_cast(*array_ptr); + if (!array.hasEqualOffsets(dynamic_cast(*any_array_ptr))) + throw Exception("Sizes of nested arrays do not match", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); + + current.column = dynamic_cast(*array_ptr).getDataPtr(); + current.type = dynamic_cast(*current.type).getNestedType(); } else + { current.column = current.column->replicate(any_array->getOffsets()); + } } break; @@ -358,18 +300,13 @@ std::string ExpressionActions::Action::toString() const ss << " )"; break; case ARRAY_JOIN: - ss << result_name << "(" << result_type->getName() << ")" << "= " << "arrayJoin" << " ( " << source_name << " )"; - break; - case MULTIPLE_ARRAY_JOIN: - ss << "ARRAY JOIN " << nested_table_name << (nested_table_name != nested_table_alias ? " AS " + nested_table_alias : ""); - ss << "{"; - for (NameSet::const_iterator it = array_joined_columns.begin(); it != array_joined_columns.end(); ++it) + ss << "ARRAY JOIN "; + for (NameSet::iterator it = array_joined_columns.begin(); it != array_joined_columns.end(); ++it) { if (it != array_joined_columns.begin()) ss << ", "; ss << *it; } - ss << "}"; break; case PROJECT: ss << "{"; @@ -532,15 +469,31 @@ void ExpressionActions::finalize(const Names & output_columns) { Action & action = actions[i]; Names in = action.getNeededColumns(); - std::string out = action.result_name; - + if (action.type == Action::PROJECT) { needed_columns = NameSet(in.begin(), in.end()); unmodified_columns.clear(); } + else if (action.type == Action::ARRAY_JOIN) + { + /// Не будем ARRAY JOIN-ить столбцы, которые дальше не используются. + /// Обычно такие столбцы не используются и до ARRAY JOIN, и поэтому выбрасываются дальше в этой функции. + /// Не будем убирать все столбцы, чтобы не потерять количество строк. + NameSet::iterator it = action.array_joined_columns.begin(); + while (it != action.array_joined_columns.end() && action.array_joined_columns.size() > 1) + { + NameSet::iterator jt = it; + ++it; + if (!needed_columns.count(*jt)) + { + action.array_joined_columns.erase(jt); + } + } + } else { + std::string out = action.result_name; if (!out.empty()) { /// Если результат не используется и нет побочных эффектов, выбросим действие. @@ -605,9 +558,9 @@ std::string ExpressionActions::getID() const { if (i) ss << ", "; - if (actions[i].type == Action::APPLY_FUNCTION || actions[i].type == Action::ARRAY_JOIN) + if (actions[i].type == Action::APPLY_FUNCTION) ss << actions[i].result_name; - if (actions[i].type == Action::MULTIPLE_ARRAY_JOIN) + if (actions[i].type == Action::ARRAY_JOIN) { ss << "{"; for (NameSet::const_iterator it = actions[i].array_joined_columns.begin(); @@ -681,7 +634,7 @@ void ExpressionActions::optimizeArrayJoin() bool depends_on_array_join = false; Names needed; - if (actions[i].type == Action::ARRAY_JOIN || actions[i].type == Action::MULTIPLE_ARRAY_JOIN) + if (actions[i].type == Action::ARRAY_JOIN) { depends_on_array_join = true; needed = actions[i].getNeededColumns(); diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 3b9ed6f1ce2..310402f476f 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -54,26 +54,47 @@ void ExpressionAnalyzer::init() { select_query = dynamic_cast(&*ast); has_aggregation = false; - + createAliasesDict(ast); /// Если есть агрегатные функции, присвоит has_aggregation=true. normalizeTree(); - + removeUnusedColumns(); - + getArrayJoinedColumns(); - + /// Найдем агрегатные функции. if (select_query && (select_query->group_expression_list || select_query->having_expression)) has_aggregation = true; - + ExpressionActions temp_actions(columns, settings); - addMultipleArrayJoinAction(temp_actions); + + columns_after_array_join = columns; + + if (select_query && select_query->array_join_expression_list) + { + const ASTs & array_join_asts = select_query->array_join_expression_list->children; + for (size_t i = 0; i < array_join_asts.size(); ++i) + { + ASTPtr ast = array_join_asts[i]; + getRootActionsImpl(ast, true, false, temp_actions); + } + + addMultipleArrayJoinAction(temp_actions); + + const Block & temp_sample = temp_actions.getSampleBlock(); + for (size_t i = 0; i < temp_sample.columns(); ++i) + { + const ColumnWithNameAndType & col = temp_sample.getByPosition(i); + if (isArrayJoinedColumnName(col.name)) + columns_after_array_join.push_back(NameAndTypePair(col.name, col.type)); + } + } getAggregatesImpl(ast, temp_actions); - + if (has_aggregation) { assertSelect(); - + /// Найдем ключи агрегации. if (select_query->group_expression_list) { @@ -86,7 +107,7 @@ void ExpressionAnalyzer::init() key.first = group_asts[i]->getColumnName(); key.second = temp_actions.getSampleBlock().getByName(key.first).type; aggregation_keys.push_back(key); - + if (!unique_keys.count(key.first)) { aggregated_columns.push_back(key); @@ -94,7 +115,7 @@ void ExpressionAnalyzer::init() } } } - + for (size_t i = 0; i < aggregate_descriptions.size(); ++i) { AggregateDescription & desc = aggregate_descriptions[i]; @@ -103,7 +124,7 @@ void ExpressionAnalyzer::init() } else { - aggregated_columns = columns; + aggregated_columns = columns_after_array_join; } } @@ -120,46 +141,73 @@ NamesAndTypesList::iterator ExpressionAnalyzer::findColumn(const String & name, bool ExpressionAnalyzer::isArrayJoinedColumnName(const String & name) { - if (select_query && select_query->array_join_identifier) + if (select_query && select_query->array_join_expression_list) { - String nested_table_alias = select_query->array_join_identifier->getAlias(); - return name == nested_table_alias || DataTypeNested::extractNestedTableName(name) == nested_table_alias; + ASTs & expressions = select_query->array_join_expression_list->children; + int count = 0; + String table_name = DataTypeNested::extractNestedTableName(name); + for (size_t i = 0; i < expressions.size(); ++i) + { + String alias = expressions[i]->getAlias(); + if (name == alias || table_name == alias) + ++count; + } + if (count > 1) + throw Exception("Ambiguous identifier from ARRAY JOIN: " + name, ErrorCodes::AMBIGUOUS_IDENTIFIER); + return count == 1; } return false; } + String ExpressionAnalyzer::getOriginalNestedName(const String & name) { - if (select_query && select_query->array_join_identifier) + if (select_query && select_query->array_join_expression_list) { - String nested_table_name = select_query->array_join_identifier->getColumnName(); - String nested_table_alias = select_query->array_join_identifier->getAlias(); - - if (name == nested_table_alias) - return nested_table_name; - - if (DataTypeNested::extractNestedTableName(name) == nested_table_alias) + ASTs & expressions = select_query->array_join_expression_list->children; + String table_name = DataTypeNested::extractNestedTableName(name); + for (size_t i = 0; i < expressions.size(); ++i) { - String nested_column = DataTypeNested::extractNestedColumnName(name); - return DataTypeNested::concatenateNestedName(nested_table_name, nested_column); + String expression_name = expressions[i]->getColumnName(); + String alias = expressions[i]->getAlias(); + bool is_identifier = !!dynamic_cast(&*expressions[i]); + if (name == alias) + { + if (is_identifier) + return expression_name; + else + return ""; + } + else if (table_name == alias) + { + String nested_column = DataTypeNested::extractNestedColumnName(name); + return DataTypeNested::concatenateNestedName(expression_name, nested_column); + } } } return name; } -void ExpressionAnalyzer::createAliasesDict(ASTPtr & ast) +void ExpressionAnalyzer::createAliasesDict(ASTPtr & ast, int ignore_levels) { - if (ASTIdentifier * node = dynamic_cast(&*ast)) - if (node->kind == ASTIdentifier::ArrayJoin) - return; + ASTSelectQuery * select = dynamic_cast(&*ast); /// Обход снизу-вверх. Не опускаемся в подзапросы. for (ASTs::iterator it = ast->children.begin(); it != ast->children.end(); ++it) + { + int new_ignore_levels = std::max(0, ignore_levels - 1); + /// Алиасы верхнего уровня в секции ARRAY JOIN имеют особый смысл, их добавлять не будем. + if (select && *it == select->array_join_expression_list) + new_ignore_levels = 2; if (!dynamic_cast(&**it)) - createAliasesDict(*it); - + createAliasesDict(*it, new_ignore_levels); + } + + if (ignore_levels > 0) + return; + std::string * alias = getAlias(ast); if (alias && !alias->empty()) { @@ -181,7 +229,7 @@ StoragePtr ExpressionAnalyzer::getTable() { if (select->table && !dynamic_cast(&*select->table)) { - String database = select->database ? + String database = select->database ? dynamic_cast(*select->database).name : ""; const String & table = dynamic_cast(*select->table).name; @@ -232,7 +280,7 @@ ASTPtr ExpressionAnalyzer::rewriteCount(const ASTFunction * node) ASTExpressionList & exp_list = *p_exp_list; ASTPtr exp_list_node = p_exp_list; exp_list.children.push_back(createSignColumn()); - + /// sum(Sign) ASTFunction * p_sum = new ASTFunction; ASTFunction & sum = *p_sum; @@ -241,20 +289,20 @@ ASTPtr ExpressionAnalyzer::rewriteCount(const ASTFunction * node) sum.alias = node->alias; sum.arguments = exp_list_node; sum.children.push_back(exp_list_node); - + return sum_node; } ASTPtr ExpressionAnalyzer::rewriteSum(const ASTFunction * node) -{ +{ /// 'x', 'Sign' ASTExpressionList * p_mult_exp_list = new ASTExpressionList; ASTExpressionList & mult_exp_list = *p_mult_exp_list; ASTPtr mult_exp_list_node = p_mult_exp_list; mult_exp_list.children.push_back(createSignColumn()); mult_exp_list.children.push_back(node->arguments->children[0]); - + /// x * Sign ASTFunction * p_mult = new ASTFunction; ASTFunction & mult = *p_mult; @@ -262,13 +310,13 @@ ASTPtr ExpressionAnalyzer::rewriteSum(const ASTFunction * node) mult.name = "multiply"; mult.arguments = mult_exp_list_node; mult.children.push_back(mult_exp_list_node); - + /// 'x * Sign' ASTExpressionList * p_exp_list = new ASTExpressionList; ASTExpressionList & exp_list = *p_exp_list; ASTPtr exp_list_node = p_exp_list; exp_list.children.push_back(mult_node); - + /// sum(x * Sign) ASTFunction * p_sum = new ASTFunction; ASTFunction & sum = *p_sum; @@ -276,8 +324,8 @@ ASTPtr ExpressionAnalyzer::rewriteSum(const ASTFunction * node) sum.name = "sum"; sum.alias = node->alias; sum.arguments = exp_list_node; - sum.children.push_back(exp_list_node); - + sum.children.push_back(exp_list_node); + return sum_node; } @@ -288,14 +336,14 @@ ASTPtr ExpressionAnalyzer::rewriteAvg(const ASTFunction * node) ASTPtr node_clone = node->clone(); ASTFunction * node_clone_func = dynamic_cast(&*node_clone); node_clone_func->alias = ""; - + /// 'sum(Sign * x)', 'sum(Sign)' ASTExpressionList * p_div_exp_list = new ASTExpressionList; ASTExpressionList & div_exp_list = *p_div_exp_list; ASTPtr div_exp_list_node = p_div_exp_list; div_exp_list.children.push_back(rewriteSum(node_clone_func)); div_exp_list.children.push_back(rewriteCount(node_clone_func)); - + /// sum(Sign * x) / sum(Sign) ASTFunction * p_div = new ASTFunction; ASTFunction & div = *p_div; @@ -304,7 +352,7 @@ ASTPtr ExpressionAnalyzer::rewriteAvg(const ASTFunction * node) div.alias = node->alias; div.arguments = div_exp_list_node; div.children.push_back(div_exp_list_node); - + return div_node; } @@ -348,19 +396,19 @@ void ExpressionAnalyzer::normalizeTreeImpl(ASTPtr & ast, MapOfASTs & finished_as ast = finished_asts[ast]; return; } - + ASTPtr initial_ast = ast; current_asts.insert(initial_ast); std::string * my_alias = getAlias(ast); if (my_alias && !my_alias->empty()) current_alias = *my_alias; - + /// rewrite правила, которые действуют при обходе сверху-вниз. - + if (!in_sign_rewritten && !sign_column_name.empty()) in_sign_rewritten = considerSignRewrite(ast); - + if (ASTFunction * node = dynamic_cast(&*ast)) { /** Нет ли в таблице столбца, название которого полностью совпадает с записью функции? @@ -377,7 +425,7 @@ void ExpressionAnalyzer::normalizeTreeImpl(ASTPtr & ast, MapOfASTs & finished_as } else if (ASTIdentifier * node = dynamic_cast(&*ast)) { - if (node->kind == ASTIdentifier::Column || node->kind == ASTIdentifier::ArrayJoin) + if (node->kind == ASTIdentifier::Column) { /// Если это алиас, но не родительский алиас (чтобы работали конструкции вроде "SELECT column+1 AS column"). Aliases::const_iterator jt = aliases.find(node->name); @@ -412,25 +460,22 @@ void ExpressionAnalyzer::normalizeTreeImpl(ASTPtr & ast, MapOfASTs & finished_as } } } - + /// Рекурсивные вызовы. Не опускаемся в подзапросы. - + for (ASTs::iterator it = ast->children.begin(); it != ast->children.end(); ++it) if (!dynamic_cast(&**it)) normalizeTreeImpl(*it, finished_asts, current_asts, current_alias, in_sign_rewritten); - + /// Если секция WHERE или HAVING состоит из одного алиаса, ссылку нужно заменить не только в children, но и в where_expression и having_expression. - /// Тоже самое с секцией ARRAY JOIN, алиас нужно подставить и там. if (ASTSelectQuery * select = dynamic_cast(&*ast)) { if (select->where_expression) normalizeTreeImpl(select->where_expression, finished_asts, current_asts, current_alias, in_sign_rewritten); if (select->having_expression) normalizeTreeImpl(select->having_expression, finished_asts, current_asts, current_alias, in_sign_rewritten); - if (select->array_join_identifier) - normalizeTreeImpl(select->array_join_identifier, finished_asts, current_asts, current_alias, in_sign_rewritten); } - + /// Действия, выполняемые снизу вверх. if (ASTFunction * node = dynamic_cast(&*ast)) @@ -452,7 +497,7 @@ void ExpressionAnalyzer::normalizeTreeImpl(ASTPtr & ast, MapOfASTs & finished_as node->kind = ASTFunction::FUNCTION; } } - + current_asts.erase(initial_ast); current_asts.erase(ast); finished_asts[initial_ast] = ast; @@ -467,10 +512,10 @@ void ExpressionAnalyzer::makeSet(ASTFunction * node, const Block & sample_block) */ IAST & args = *node->arguments; ASTPtr & arg = args.children[1]; - + if (dynamic_cast(&*arg)) return; - + if (dynamic_cast(&*arg)) { /// Исполняем подзапрос, превращаем результат в множество, и кладём это множество на место подзапроса. @@ -483,7 +528,7 @@ void ExpressionAnalyzer::makeSet(ASTFunction * node, const Block & sample_block) else { /// Случай явного перечисления значений. - + DataTypes set_element_types; ASTPtr & left_arg = args.children[0]; @@ -504,17 +549,17 @@ void ExpressionAnalyzer::makeSet(ASTFunction * node, const Block & sample_block) else set_element_types.push_back(left_type); } - + /// Отличим случай x in (1, 2) от случая x in 1 (он же x in (1)). bool single_value = false; ASTPtr elements_ast = arg; - + if (ASTFunction * set_func = dynamic_cast(&*arg)) { if (set_func->name != "tuple") throw Exception("Incorrect type of 2nd argument for function " + node->name + ". Must be subquery or set of values.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - + /// Отличм случай (x, y) in ((1, 2), (3, 4)) от случая (x, y) in (1, 2). ASTFunction * any_element = dynamic_cast(&*set_func->arguments->children[0]); if (set_element_types.size() >= 2 && (!any_element || any_element->name != "tuple")) @@ -531,14 +576,14 @@ void ExpressionAnalyzer::makeSet(ASTFunction * node, const Block & sample_block) throw Exception("Incorrect type of 2nd argument for function " + node->name + ". Must be subquery or set of values.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } - + if (single_value) { ASTPtr exp_list = new ASTExpressionList; exp_list->children.push_back(elements_ast); elements_ast = exp_list; } - + ASTSet * ast_set = new ASTSet(arg->getColumnName()); ast_set->set = new Set(settings.limits); ast_set->set->create(set_element_types, elements_ast); @@ -566,17 +611,26 @@ void ExpressionAnalyzer::getRootActionsImpl(ASTPtr ast, bool no_subqueries, bool void ExpressionAnalyzer::getArrayJoinedColumns() { - if (select_query && select_query->array_join_identifier) - getArrayJoinedColumnsImpl(ast); + if (select_query && select_query->array_join_expression_list) + { + getArrayJoinedColumnsImpl(select_query->group_expression_list); + getArrayJoinedColumnsImpl(select_query->select_expression_list); + getArrayJoinedColumnsImpl(select_query->where_expression); + getArrayJoinedColumnsImpl(select_query->having_expression); + getArrayJoinedColumnsImpl(select_query->order_expression_list); + } } void ExpressionAnalyzer::getArrayJoinedColumnsImpl(ASTPtr ast) { + if (!ast) + return; + if (ASTIdentifier * node = dynamic_cast(&*ast)) { if (node->kind == ASTIdentifier::Column && isArrayJoinedColumnName(node->name)) - array_joined_columns.insert(node->name); + columns_for_array_join.insert(node->name); } else { @@ -587,55 +641,18 @@ void ExpressionAnalyzer::getArrayJoinedColumnsImpl(ASTPtr ast) } -void ExpressionAnalyzer::addMultipleArrayJoinAction(ExpressionActions & actions) -{ - if (array_joined_columns.empty()) - return; - - String nested_table_name = select_query->array_join_identifier->getColumnName(); - String nested_table_alias = select_query->array_join_identifier->getAlias(); - - bool added_columns = false; - - const NamesAndTypesList & input_columns = actions.getRequiredColumnsWithTypes(); - for (NamesAndTypesList::const_iterator it = input_columns.begin(); it != input_columns.end(); ++it) - { - const String & name = it->first; - const DataTypePtr & type= it->second; - - String nested_table = DataTypeNested::extractNestedTableName(name); - String nested_column = DataTypeNested::extractNestedColumnName(name); - - /// Проверка на тип нужна для случая, когда столбец уже был преобразован по ARRAY JOIN - /// на предыдущих шагах ExpressionActionChain'а - if (dynamic_cast(&*type) - && (name == nested_table_name || nested_table == nested_table_name)) - { - added_columns = true; - String array_joined_name = name == nested_table_name - ? nested_table_alias - : DataTypeNested::concatenateNestedName(nested_table_alias, nested_column); - actions.add(ExpressionActions::Action::copyColumn(name, array_joined_name)); - } - } - - if (added_columns) - actions.add(ExpressionActions::Action::multipleArrayJoin(nested_table_name, nested_table_alias, array_joined_columns)); -} - - void ExpressionAnalyzer::getActionsImpl(ASTPtr ast, bool no_subqueries, bool only_consts, ScopeStack & actions_stack) { /// Если результат вычисления уже есть в блоке. if ((dynamic_cast(&*ast) || dynamic_cast(&*ast)) && actions_stack.getSampleBlock().has(ast->getColumnName())) return; - + if (ASTFunction * node = dynamic_cast(&*ast)) { if (node->kind == ASTFunction::LAMBDA_EXPRESSION) throw Exception("Unexpected expression", ErrorCodes::UNEXPECTED_EXPRESSION); - + if (node->kind == ASTFunction::ARRAY_JOIN) { if (node->arguments->children.size() != 1) @@ -644,14 +661,16 @@ void ExpressionAnalyzer::getActionsImpl(ASTPtr ast, bool no_subqueries, bool onl getActionsImpl(arg, no_subqueries, only_consts, actions_stack); if (!only_consts) { - String temp_name = "__array_joined__" + arg->getColumnName(); - actions_stack.addAction(ExpressionActions::Action::copyColumn(arg->getColumnName(), temp_name)); - actions_stack.addAction(ExpressionActions::Action::arrayJoin(temp_name, node->getColumnName())); + String result_name = node->getColumnName(); + actions_stack.addAction(ExpressionActions::Action::copyColumn(arg->getColumnName(), result_name)); + NameSet joined_columns; + joined_columns.insert(result_name); + actions_stack.addAction(ExpressionActions::Action::arrayJoin(joined_columns)); } - + return; } - + if (node->kind == ASTFunction::FUNCTION) { if (node->name == "in" || node->name == "notIn") @@ -676,20 +695,20 @@ void ExpressionAnalyzer::getActionsImpl(ASTPtr ast, bool no_subqueries, bool onl return; } } - + FunctionPtr function = context.getFunctionFactory().get(node->name, context); - + Names argument_names; DataTypes argument_types; bool arguments_present = true; - + /// Если у функции есть аргумент-лямбда-выражение, нужно определить его тип до рекурсивного вызова. bool has_lambda_arguments = false; - + for (size_t i = 0; i < node->arguments->children.size(); ++i) { ASTPtr child = node->arguments->children[i]; - + ASTFunction * lambda = dynamic_cast(&*child); ASTSet * set = dynamic_cast(&*child); if (lambda && lambda->name == "lambda") @@ -697,12 +716,12 @@ void ExpressionAnalyzer::getActionsImpl(ASTPtr ast, bool no_subqueries, bool onl /// Если аргумент - лямбда-выражение, только запомним его примерный тип. if (lambda->arguments->children.size() != 2) throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - + ASTFunction * lambda_args_tuple = dynamic_cast(&*lambda->arguments->children[0]); - + if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH); - + has_lambda_arguments = true; argument_types.push_back(new DataTypeExpression(DataTypes(lambda_args_tuple->arguments->children.size()))); /// Выберем название в следующем цикле. @@ -716,9 +735,9 @@ void ExpressionAnalyzer::getActionsImpl(ASTPtr ast, bool no_subqueries, bool onl column.column = new ColumnSet(1, set->set); column.type = new DataTypeSet; column.name = getUniqueName(actions_stack.getSampleBlock(), "__set"); - + actions_stack.addAction(ExpressionActions::Action::addColumn(column)); - + argument_types.push_back(column.type); argument_names.push_back(column.name); } @@ -745,21 +764,21 @@ void ExpressionAnalyzer::getActionsImpl(ASTPtr ast, bool no_subqueries, bool onl } } } - + if (only_consts && !arguments_present) return; - + Names additional_requirements; - + if (has_lambda_arguments && !only_consts) { function->getLambdaArgumentTypes(argument_types); - + /// Вызовемся рекурсивно для лямбда-выражений. for (size_t i = 0; i < node->arguments->children.size(); ++i) { ASTPtr child = node->arguments->children[i]; - + ASTFunction * lambda = dynamic_cast(&*child); if (lambda && lambda->name == "lambda") { @@ -767,39 +786,39 @@ void ExpressionAnalyzer::getActionsImpl(ASTPtr ast, bool no_subqueries, bool onl ASTFunction * lambda_args_tuple = dynamic_cast(&*lambda->arguments->children[0]); ASTs lambda_arg_asts = lambda_args_tuple->arguments->children; NamesAndTypesList lambda_arguments; - + for (size_t j = 0; j < lambda_arg_asts.size(); ++j) { ASTIdentifier * identifier = dynamic_cast(&*lambda_arg_asts[j]); if (!identifier) throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH); - + String arg_name = identifier->name; NameAndTypePair arg(arg_name, lambda_type->getArgumentTypes()[j]); - + lambda_arguments.push_back(arg); } - + actions_stack.pushLevel(lambda_arguments); getActionsImpl(lambda->arguments->children[1], no_subqueries, only_consts, actions_stack); ExpressionActionsPtr lambda_actions = actions_stack.popLevel(); - + String result_name = lambda->arguments->children[1]->getColumnName(); lambda_actions->finalize(Names(1, result_name)); DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type; argument_types[i] = new DataTypeExpression(lambda_type->getArgumentTypes(), result_type); - + Names captured = lambda_actions->getRequiredColumns(); for (size_t j = 0; j < captured.size(); ++j) { if (findColumn(captured[j], lambda_arguments) == lambda_arguments.end()) additional_requirements.push_back(captured[j]); } - + /// Не можем дать название getColumnName(), /// потому что оно не однозначно определяет выражение (типы аргументов могут быть разными). argument_names[i] = getUniqueName(actions_stack.getSampleBlock(), "__lambda"); - + ColumnWithNameAndType lambda_column; lambda_column.column = new ColumnExpression(1, lambda_actions, lambda_arguments, result_type, result_name); lambda_column.type = argument_types[i]; @@ -808,7 +827,7 @@ void ExpressionAnalyzer::getActionsImpl(ASTPtr ast, bool no_subqueries, bool onl } } } - + if (only_consts) { for (size_t i = 0; i < argument_names.size(); ++i) @@ -820,7 +839,7 @@ void ExpressionAnalyzer::getActionsImpl(ASTPtr ast, bool no_subqueries, bool onl } } } - + if (arguments_present) actions_stack.addAction(ExpressionActions::Action::applyFunction(function, argument_names, node->getColumnName()), additional_requirements); @@ -833,7 +852,7 @@ void ExpressionAnalyzer::getActionsImpl(ASTPtr ast, bool no_subqueries, bool onl column.column = type->createConstColumn(1, node->value); column.type = type; column.name = node->getColumnName(); - + actions_stack.addAction(ExpressionActions::Action::addColumn(column)); } else @@ -852,15 +871,15 @@ void ExpressionAnalyzer::getAggregatesImpl(ASTPtr ast, ExpressionActions & actio has_aggregation = true; AggregateDescription aggregate; aggregate.column_name = node->getColumnName(); - + for (size_t i = 0; i < aggregate_descriptions.size(); ++i) if (aggregate_descriptions[i].column_name == aggregate.column_name) return; - + ASTs & arguments = node->arguments->children; aggregate.argument_names.resize(arguments.size()); DataTypes types(arguments.size()); - + for (size_t i = 0; i < arguments.size(); ++i) { getRootActionsImpl(arguments[i], true, false, actions); @@ -868,28 +887,28 @@ void ExpressionAnalyzer::getAggregatesImpl(ASTPtr ast, ExpressionActions & actio types[i] = actions.getSampleBlock().getByName(name).type; aggregate.argument_names[i] = name; } - + aggregate.function = context.getAggregateFunctionFactory().get(node->name, types); - + if (node->parameters) { ASTs & parameters = dynamic_cast(*node->parameters).children; Row params_row(parameters.size()); - + for (size_t i = 0; i < parameters.size(); ++i) { ASTLiteral * lit = dynamic_cast(&*parameters[i]); if (!lit) throw Exception("Parameters to aggregate functions must be literals", ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS); - + params_row[i] = lit->value; } - + aggregate.function->setParameters(params_row); } - + aggregate.function->setArguments(types); - + aggregate_descriptions.push_back(aggregate); } else @@ -924,70 +943,113 @@ void ExpressionAnalyzer::initChain(ExpressionActionsChain & chain, NamesAndTypes } } +void ExpressionAnalyzer::addMultipleArrayJoinAction(ExpressionActions & actions) +{ + ASTs & asts = select_query->array_join_expression_list->children; + typedef std::map NameToAlias; + NameToAlias name_to_alias; + NameSet known_aliases; + for (size_t i = 0; i < asts.size(); ++i) + { + ASTPtr ast = asts[i]; + + String nested_table_name = ast->getColumnName(); + String nested_table_alias = ast->getAlias(); + if (nested_table_alias == nested_table_name && !dynamic_cast(&*ast)) + throw Exception("No alias for non-trivial value in ARRAY JOIN: " + nested_table_name, ErrorCodes::ALIAS_REQUIRED); + + if (known_aliases.count(nested_table_alias) || aliases.count(nested_table_alias)) + throw Exception("Duplicate alias " + nested_table_alias, ErrorCodes::MULTIPLE_ALIASES_FOR_EXPRESSION); + if (name_to_alias.count(nested_table_name)) + throw Exception("Duplicate ARRAY JOIN on column " + nested_table_name, ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS); + known_aliases.insert(nested_table_alias); + name_to_alias[nested_table_name] = nested_table_alias; + } + + NamesAndTypesList input_columns = actions.getSampleBlock().getColumnsList(); + for (NamesAndTypesList::const_iterator it = input_columns.begin(); it != input_columns.end(); ++it) + { + const String & name = it->first; + + String nested_table = DataTypeNested::extractNestedTableName(name); + String nested_column = DataTypeNested::extractNestedColumnName(name); + + String array_joined_name; + if (name_to_alias.count(name)) + array_joined_name = name_to_alias[name]; + else if (name_to_alias.count(nested_table)) + array_joined_name = DataTypeNested::concatenateNestedName(name_to_alias[nested_table], nested_column); + else + continue; + + actions.add(ExpressionActions::Action::copyColumn(name, array_joined_name)); + } + + actions.add(ExpressionActions::Action::arrayJoin(columns_for_array_join)); +} + + bool ExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain) { assertSelect(); - - if (!select_query->array_join_identifier) + + if (!select_query->array_join_expression_list) return false; - + initChain(chain, columns); ExpressionActionsChain::Step & step = chain.steps.back(); - - getArrayJoinedColumnsImpl(ast); - - if (!array_joined_columns.empty()) - { - addMultipleArrayJoinAction(*step.actions); - step.required_output.insert(step.required_output.end(), array_joined_columns.begin(), array_joined_columns.end()); - } - + + getRootActionsImpl(select_query->array_join_expression_list, false, false, *step.actions); + + addMultipleArrayJoinAction(*step.actions); + step.required_output.insert(step.required_output.end(), columns_for_array_join.begin(), columns_for_array_join.end()); + return true; } bool ExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain) { assertSelect(); - + if (!select_query->where_expression) return false; - - initChain(chain, columns); + + initChain(chain, columns_after_array_join); ExpressionActionsChain::Step & step = chain.steps.back(); - + step.required_output.push_back(select_query->where_expression->getColumnName()); getRootActionsImpl(select_query->where_expression, false, false, *step.actions); - + return true; } bool ExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain) { assertAggregation(); - + if (!select_query->group_expression_list) return false; - + initChain(chain, columns); ExpressionActionsChain::Step & step = chain.steps.back(); - + ASTs asts = select_query->group_expression_list->children; for (size_t i = 0; i < asts.size(); ++i) { step.required_output.push_back(asts[i]->getColumnName()); getRootActionsImpl(asts[i], false, false, *step.actions); } - + return true; } void ExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChain & chain) { assertAggregation(); - - initChain(chain, columns); + + initChain(chain, columns_after_array_join); ExpressionActionsChain::Step & step = chain.steps.back(); - + for (size_t i = 0; i < aggregate_descriptions.size(); ++i) { for (size_t j = 0; j < aggregate_descriptions[i].argument_names.size(); ++j) @@ -995,12 +1057,12 @@ void ExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChai step.required_output.push_back(aggregate_descriptions[i].argument_names[j]); } } - + getActionsBeforeAggregationImpl(select_query->select_expression_list, *step.actions); - + if (select_query->having_expression) getActionsBeforeAggregationImpl(select_query->having_expression, *step.actions); - + if (select_query->order_expression_list) getActionsBeforeAggregationImpl(select_query->order_expression_list, *step.actions); } @@ -1008,28 +1070,28 @@ void ExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChai bool ExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain) { assertAggregation(); - + if (!select_query->having_expression) return false; - + initChain(chain, aggregated_columns); ExpressionActionsChain::Step & step = chain.steps.back(); - + step.required_output.push_back(select_query->having_expression->getColumnName()); getRootActionsImpl(select_query->having_expression, false, false, *step.actions); - + return true; } void ExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain) { assertSelect(); - + initChain(chain, aggregated_columns); ExpressionActionsChain::Step & step = chain.steps.back(); - + getRootActionsImpl(select_query->select_expression_list, false, false, *step.actions); - + ASTs asts = select_query->select_expression_list->children; for (size_t i = 0; i < asts.size(); ++i) { @@ -1040,15 +1102,15 @@ void ExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain) bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain) { assertSelect(); - + if (!select_query->order_expression_list) return false; - + initChain(chain, aggregated_columns); ExpressionActionsChain::Step & step = chain.steps.back(); - + getRootActionsImpl(select_query->order_expression_list, false, false, *step.actions); - + ASTs asts = select_query->order_expression_list->children; for (size_t i = 0; i < asts.size(); ++i) { @@ -1058,26 +1120,26 @@ bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain) ASTPtr order_expression = ast->children[0]; step.required_output.push_back(order_expression->getColumnName()); } - + return true; } void ExpressionAnalyzer::appendProjectResult(DB::ExpressionActionsChain & chain) { assertSelect(); - + initChain(chain, aggregated_columns); ExpressionActionsChain::Step & step = chain.steps.back(); - + NamesWithAliases result_columns; - + ASTs asts = select_query->select_expression_list->children; for (size_t i = 0; i < asts.size(); ++i) { result_columns.push_back(NameWithAlias(asts[i]->getColumnName(), asts[i]->getAlias())); step.required_output.push_back(result_columns.back().second); } - + step.actions->add(ExpressionActions::Action::project(result_columns)); } @@ -1085,20 +1147,19 @@ void ExpressionAnalyzer::appendProjectResult(DB::ExpressionActionsChain & chain) Block ExpressionAnalyzer::getSelectSampleBlock() { assertSelect(); - + ExpressionActions temp_actions(aggregated_columns, settings); - addMultipleArrayJoinAction(temp_actions); NamesWithAliases result_columns; - + ASTs asts = select_query->select_expression_list->children; for (size_t i = 0; i < asts.size(); ++i) { result_columns.push_back(NameWithAlias(asts[i]->getColumnName(), asts[i]->getAlias())); getRootActionsImpl(asts[i], true, false, temp_actions); } - + temp_actions.add(ExpressionActions::Action::project(result_columns)); - + return temp_actions.getSampleBlock(); } @@ -1108,7 +1169,7 @@ void ExpressionAnalyzer::getActionsBeforeAggregationImpl(ASTPtr ast, ExpressionA if (node && node->kind == ASTFunction::AGGREGATE_FUNCTION) { ASTs & arguments = node->arguments->children; - + for (size_t i = 0; i < arguments.size(); ++i) { getRootActionsImpl(arguments[i], false, false, actions); @@ -1129,14 +1190,14 @@ ExpressionActionsPtr ExpressionAnalyzer::getActions(bool project_result) ExpressionActionsPtr actions = new ExpressionActions(columns, settings); NamesWithAliases result_columns; Names result_names; - + ASTs asts; - + if (ASTExpressionList * node = dynamic_cast(&*ast)) asts = node->children; else asts = ASTs(1, ast); - + for (size_t i = 0; i < asts.size(); ++i) { std::string name = asts[i]->getColumnName(); @@ -1149,7 +1210,7 @@ ExpressionActionsPtr ExpressionAnalyzer::getActions(bool project_result) result_names.push_back(alias); getRootActionsImpl(asts[i], false, false, *actions); } - + if (project_result) { actions->add(ExpressionActions::Action::project(result_columns)); @@ -1160,9 +1221,9 @@ ExpressionActionsPtr ExpressionAnalyzer::getActions(bool project_result) for (NamesAndTypesList::const_iterator it = columns.begin(); it != columns.end(); ++it) result_names.push_back(it->first); } - + actions->finalize(result_names); - + return actions; } @@ -1170,9 +1231,9 @@ ExpressionActionsPtr ExpressionAnalyzer::getActions(bool project_result) ExpressionActionsPtr ExpressionAnalyzer::getConstActions() { ExpressionActionsPtr actions = new ExpressionActions(NamesAndTypesList(), settings); - + getRootActionsImpl(ast, true, true, *actions); - + return actions; } @@ -1187,21 +1248,32 @@ void ExpressionAnalyzer::removeUnusedColumns() { NamesSet required; NamesSet ignored; + if (select_query && select_query->array_join_expression_list) + { + ASTs & expressions = select_query->array_join_expression_list->children; + for (size_t i = 0; i < expressions.size(); ++i) + { + /// Игнорируем идентификаторы верхнего уровня из секции ARRAY JOIN. + /// Они будут добавлены там, где они используются. + if (dynamic_cast(&*expressions[i])) + ignored.insert(expressions[i]->getColumnName()); + } + } getRequiredColumnsImpl(ast, required, ignored); - + /// Нужно прочитать хоть один столбец, чтобы узнать количество строк. if (required.empty()) required.insert(ExpressionActions::getSmallestColumn(columns)); - + unknown_required_columns = required; - + for (NamesAndTypesList::iterator it = columns.begin(); it != columns.end();) { NamesAndTypesList::iterator it0 = it; ++it; - + unknown_required_columns.erase(it0->first); - + if (!required.count(it0->first)) { required.erase(it0->first); @@ -1214,13 +1286,15 @@ Names ExpressionAnalyzer::getRequiredColumns() { if (!unknown_required_columns.empty()) throw Exception("Unknown identifier: " + *unknown_required_columns.begin(), ErrorCodes::UNKNOWN_IDENTIFIER); - + Names res; for (NamesAndTypesList::iterator it = columns.begin(); it != columns.end(); ++it) res.push_back(it->first); return res; } + + void ExpressionAnalyzer::getRequiredColumnsImpl(ASTPtr ast, NamesSet & required_columns, NamesSet & ignored_names) { if (ASTIdentifier * node = dynamic_cast(&*ast)) @@ -1228,25 +1302,31 @@ void ExpressionAnalyzer::getRequiredColumnsImpl(ASTPtr ast, NamesSet & required_ if (node->kind == ASTIdentifier::Column && !ignored_names.count(node->name)) { if (isArrayJoinedColumnName(node->name)) - required_columns.insert(getOriginalNestedName(node->name)); + { + String original = getOriginalNestedName(node->name); + if (!original.empty()) + required_columns.insert(original); + } else + { required_columns.insert(node->name); + } } return; } - + if (ASTFunction * node = dynamic_cast(&*ast)) { if (node->kind == ASTFunction::LAMBDA_EXPRESSION) { if (node->arguments->children.size() != 2) throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - + ASTFunction * lambda_args_tuple = dynamic_cast(&*node->arguments->children[0]); - + if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH); - + /// Не нужно добавлять параметры лямбда-выражения в required_columns. Names added_ignored; for (size_t i = 0 ; i < lambda_args_tuple->arguments->children.size(); ++i) @@ -1261,22 +1341,22 @@ void ExpressionAnalyzer::getRequiredColumnsImpl(ASTPtr ast, NamesSet & required_ added_ignored.push_back(name); } } - + getRequiredColumnsImpl(node->arguments->children[1], required_columns, ignored_names); - + for (size_t i = 0; i < added_ignored.size(); ++i) ignored_names.erase(added_ignored[i]); - + return; } } - + for (size_t i = 0; i < ast->children.size(); ++i) { ASTPtr child = ast->children[i]; if (!dynamic_cast(&*child) && !dynamic_cast(&*child)) getRequiredColumnsImpl(child, required_columns, ignored_names); - } + } } } diff --git a/dbms/src/Parsers/ParserSelectQuery.cpp b/dbms/src/Parsers/ParserSelectQuery.cpp index cd776cc811a..26a1aee7eac 100644 --- a/dbms/src/Parsers/ParserSelectQuery.cpp +++ b/dbms/src/Parsers/ParserSelectQuery.cpp @@ -106,25 +106,22 @@ bool ParserSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, String & ex return false; } - /// ARRAY JOIN array|Nested_table [AS alias] + /// ARRAY JOIN expr list if (s_array.ignore(pos, end, expected)) { ws.ignore(pos, end); - + if (!s_join.ignore(pos, end, expected)) return false; - + ws.ignore(pos, end); - - ParserWithOptionalAlias ident(new ParserCompoundIdentifier()); - if (!ident.parse(pos, end, select_query->array_join_identifier, expected)) + + if (!exp_list.parse(pos, end, select_query->array_join_expression_list, expected)) return false; - - dynamic_cast(*select_query->array_join_identifier).kind = ASTIdentifier::ArrayJoin; - + ws.ignore(pos, end); } - + /// FINAL if (s_final.ignore(pos, end, expected)) { @@ -248,8 +245,8 @@ bool ParserSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, String & ex select_query->children.push_back(select_query->database); if (select_query->table) select_query->children.push_back(select_query->table); - if (select_query->array_join_identifier) - select_query->children.push_back(select_query->array_join_identifier); + if (select_query->array_join_expression_list) + select_query->children.push_back(select_query->array_join_expression_list); if (select_query->sample_size) select_query->children.push_back(select_query->sample_size); if (select_query->where_expression) diff --git a/dbms/src/Parsers/formatAST.cpp b/dbms/src/Parsers/formatAST.cpp index d7ae39c51f0..78102b955ee 100644 --- a/dbms/src/Parsers/formatAST.cpp +++ b/dbms/src/Parsers/formatAST.cpp @@ -112,10 +112,10 @@ void formatAST(const ASTSelectQuery & ast, std::ostream & s, size_t indent, bo formatAST(*ast.table, s, indent, hilite, one_line); } - if (ast.array_join_identifier) + if (ast.array_join_expression_list) { s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "ARRAY JOIN " << (hilite ? hilite_none : ""); - formatAST(*ast.array_join_identifier, s, indent, hilite, one_line); + formatAST(*ast.array_join_expression_list, s, indent, hilite, one_line); } if (ast.final)