mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
ClickHouse: fixed and claened up ARRAY JOIN a little. [#CONV-8821]
This commit is contained in:
parent
20da4e0a2b
commit
491480e79c
@ -213,6 +213,7 @@ namespace ErrorCodes
|
||||
NESTED_TYPE_TOO_DEEP,
|
||||
ALIAS_REQUIRED,
|
||||
AMBIGUOUS_IDENTIFIER,
|
||||
EMPTY_NESTED_TABLE,
|
||||
SOCKET_TIMEOUT,
|
||||
NETWORK_ERROR,
|
||||
|
||||
|
@ -114,8 +114,13 @@ private:
|
||||
typedef std::set<const IAST *> SetOfASTs;
|
||||
typedef std::map<ASTPtr, ASTPtr> MapOfASTs;
|
||||
|
||||
/// Столбцы, которые должны быть преобразованы из-за секции ARRAY JOIN
|
||||
NameSet columns_for_array_join;
|
||||
/// Какой столбец нужно по-ARRAY-JOIN-ить, чтобы получить указанный.
|
||||
/// Например, для SELECT s.v ... ARRAY JOIN a AS s сюда попадет "s.v"->"a.v".
|
||||
NameToNameMap array_join_result_to_source;
|
||||
|
||||
/// Для секции ARRAY JOIN отображение из алиаса в полное столбца
|
||||
/// Например, для ARRAY JOIN [1,2] AS b сюда попадет "b"->"array(1,2)".
|
||||
NameToNameMap array_join_alias_to_name;
|
||||
|
||||
/** Для getActionsImpl.
|
||||
* Стек из ExpressionActions, соответствующих вложенным лямбда-выражениям.
|
||||
@ -223,12 +228,6 @@ private:
|
||||
NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypesList & cols);
|
||||
NamesAndTypesList::iterator findColumn(const String & name) { return findColumn(name, columns); }
|
||||
|
||||
/// Проверяет является ли данный столбец результатом ARRAY JOIN
|
||||
bool isArrayJoinedColumnName(const String & name);
|
||||
|
||||
/// Возвращает исходное имя столбца до применения к нему ARRAY JOIN
|
||||
String getOriginalNestedName(const String & name);
|
||||
|
||||
void removeUnusedColumns();
|
||||
|
||||
/** Создать словарь алиасов.
|
||||
|
@ -225,7 +225,7 @@ void ExpressionActions::Action::execute(Block & block) const
|
||||
|
||||
const ColumnArray & array = dynamic_cast<const ColumnArray &>(*array_ptr);
|
||||
if (!array.hasEqualOffsets(dynamic_cast<const ColumnArray &>(*any_array_ptr)))
|
||||
throw Exception("Sizes of nested arrays do not match", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH);
|
||||
throw Exception("Sizes of ARRAY-JOIN-ed arrays do not match", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH);
|
||||
|
||||
current.column = dynamic_cast<const ColumnArray &>(*array_ptr).getDataPtr();
|
||||
current.type = dynamic_cast<const DataTypeArray &>(*current.type).getNestedType();
|
||||
|
@ -78,18 +78,16 @@ void ExpressionAnalyzer::init()
|
||||
createAliasesDict(ast); /// Если есть агрегатные функции, присвоит has_aggregation=true.
|
||||
normalizeTree();
|
||||
|
||||
removeUnusedColumns();
|
||||
|
||||
getArrayJoinedColumns();
|
||||
|
||||
removeUnusedColumns();
|
||||
|
||||
/// Найдем агрегатные функции.
|
||||
if (select_query && (select_query->group_expression_list || select_query->having_expression))
|
||||
has_aggregation = true;
|
||||
|
||||
ExpressionActions temp_actions(columns, settings);
|
||||
|
||||
columns_after_array_join = columns;
|
||||
|
||||
if (select_query && select_query->array_join_expression_list)
|
||||
{
|
||||
const ASTs & array_join_asts = select_query->array_join_expression_list->children;
|
||||
@ -102,13 +100,16 @@ void ExpressionAnalyzer::init()
|
||||
addMultipleArrayJoinAction(temp_actions);
|
||||
|
||||
const Block & temp_sample = temp_actions.getSampleBlock();
|
||||
for (size_t i = 0; i < temp_sample.columns(); ++i)
|
||||
for (NameToNameMap::iterator it = array_join_result_to_source.begin(); it != array_join_result_to_source.end(); ++it)
|
||||
{
|
||||
const ColumnWithNameAndType & col = temp_sample.getByPosition(i);
|
||||
if (isArrayJoinedColumnName(col.name))
|
||||
columns_after_array_join.push_back(NameAndTypePair(col.name, col.type));
|
||||
columns_after_array_join.push_back(NameAndTypePair(it->first, temp_sample.getByName(it->first).type));
|
||||
}
|
||||
}
|
||||
for (NamesAndTypesList::iterator it = columns.begin(); it != columns.end(); ++it)
|
||||
{
|
||||
if (!array_join_result_to_source.count(it->first))
|
||||
columns_after_array_join.push_back(*it);
|
||||
}
|
||||
getAggregatesImpl(ast, temp_actions);
|
||||
|
||||
if (has_aggregation)
|
||||
@ -159,57 +160,8 @@ NamesAndTypesList::iterator ExpressionAnalyzer::findColumn(const String & name,
|
||||
}
|
||||
|
||||
|
||||
bool ExpressionAnalyzer::isArrayJoinedColumnName(const String & name)
|
||||
{
|
||||
if (select_query && select_query->array_join_expression_list)
|
||||
{
|
||||
ASTs & expressions = select_query->array_join_expression_list->children;
|
||||
int count = 0;
|
||||
String table_name = DataTypeNested::extractNestedTableName(name);
|
||||
for (size_t i = 0; i < expressions.size(); ++i)
|
||||
{
|
||||
String alias = expressions[i]->getAlias();
|
||||
if (name == alias || table_name == alias)
|
||||
++count;
|
||||
}
|
||||
if (count > 1)
|
||||
throw Exception("Ambiguous identifier from ARRAY JOIN: " + name, ErrorCodes::AMBIGUOUS_IDENTIFIER);
|
||||
return count == 1;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
String ExpressionAnalyzer::getOriginalNestedName(const String & name)
|
||||
{
|
||||
if (select_query && select_query->array_join_expression_list)
|
||||
{
|
||||
ASTs & expressions = select_query->array_join_expression_list->children;
|
||||
String table_name = DataTypeNested::extractNestedTableName(name);
|
||||
for (size_t i = 0; i < expressions.size(); ++i)
|
||||
{
|
||||
String expression_name = expressions[i]->getColumnName();
|
||||
String alias = expressions[i]->getAlias();
|
||||
bool is_identifier = !!dynamic_cast<ASTIdentifier *>(&*expressions[i]);
|
||||
if (name == alias)
|
||||
{
|
||||
if (is_identifier)
|
||||
return expression_name;
|
||||
else
|
||||
return "";
|
||||
}
|
||||
else if (table_name == alias)
|
||||
{
|
||||
String nested_column = DataTypeNested::extractNestedColumnName(name);
|
||||
return DataTypeNested::concatenateNestedName(expression_name, nested_column);
|
||||
}
|
||||
}
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
|
||||
/// ignore_levels - алиасы в скольки верхних уровнях поддерева нужно игнорировать.
|
||||
/// Например, при ignore_levels=1 ast не может быть занесен в словарь, но его дети могут.
|
||||
void ExpressionAnalyzer::createAliasesDict(ASTPtr & ast, int ignore_levels)
|
||||
{
|
||||
ASTSelectQuery * select = dynamic_cast<ASTSelectQuery *>(&*ast);
|
||||
@ -218,7 +170,8 @@ void ExpressionAnalyzer::createAliasesDict(ASTPtr & ast, int ignore_levels)
|
||||
for (ASTs::iterator it = ast->children.begin(); it != ast->children.end(); ++it)
|
||||
{
|
||||
int new_ignore_levels = std::max(0, ignore_levels - 1);
|
||||
/// Алиасы верхнего уровня в секции ARRAY JOIN имеют особый смысл, их добавлять не будем.
|
||||
/// Алиасы верхнего уровня в секции ARRAY JOIN имеют особый смысл, их добавлять не будем
|
||||
/// (пропустим сам expression list и его детей).
|
||||
if (select && *it == select->array_join_expression_list)
|
||||
new_ignore_levels = 2;
|
||||
if (!dynamic_cast<ASTSelectQuery *>(&**it))
|
||||
@ -657,15 +610,33 @@ void ExpressionAnalyzer::getArrayJoinedColumns()
|
||||
{
|
||||
if (select_query && select_query->array_join_expression_list)
|
||||
{
|
||||
getArrayJoinedColumnsImpl(select_query->group_expression_list);
|
||||
getArrayJoinedColumnsImpl(select_query->select_expression_list);
|
||||
getArrayJoinedColumnsImpl(select_query->where_expression);
|
||||
getArrayJoinedColumnsImpl(select_query->having_expression);
|
||||
getArrayJoinedColumnsImpl(select_query->order_expression_list);
|
||||
ASTs & array_join_asts = select_query->array_join_expression_list->children;
|
||||
for (size_t i = 0; i < array_join_asts .size(); ++i)
|
||||
{
|
||||
ASTPtr ast = array_join_asts [i];
|
||||
|
||||
String nested_table_name = ast->getColumnName();
|
||||
String nested_table_alias = ast->getAlias();
|
||||
if (nested_table_alias == nested_table_name && !dynamic_cast<ASTIdentifier *>(&*ast))
|
||||
throw Exception("No alias for non-trivial value in ARRAY JOIN: " + nested_table_name, ErrorCodes::ALIAS_REQUIRED);
|
||||
|
||||
if (array_join_alias_to_name.count(nested_table_alias) || aliases.count(nested_table_alias))
|
||||
throw Exception("Duplicate alias " + nested_table_alias, ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS);
|
||||
array_join_alias_to_name[nested_table_alias] = nested_table_name;
|
||||
}
|
||||
|
||||
ASTs & query_asts = select_query->children;
|
||||
for (size_t i = 0; i < query_asts.size(); ++i)
|
||||
{
|
||||
ASTPtr ast = query_asts[i];
|
||||
if (select_query && ast == select_query->array_join_expression_list)
|
||||
continue;
|
||||
getArrayJoinedColumnsImpl(ast);
|
||||
}
|
||||
|
||||
/// Если результат ARRAY JOIN не используется, придется все равно по-ARRAY-JOIN-ить какой-нибудь столбец,
|
||||
/// чтобы получить правильное количество строк.
|
||||
if (columns_for_array_join.empty())
|
||||
if (array_join_result_to_source.empty())
|
||||
{
|
||||
ASTPtr expr = select_query->array_join_expression_list->children[0];
|
||||
String source_name = expr->getColumnName();
|
||||
@ -673,7 +644,7 @@ void ExpressionAnalyzer::getArrayJoinedColumns()
|
||||
/// Это массив.
|
||||
if (!dynamic_cast<ASTIdentifier *>(&*expr) || findColumn(source_name, columns) != columns.end())
|
||||
{
|
||||
columns_for_array_join.insert(result_name);
|
||||
array_join_result_to_source[result_name] = source_name;
|
||||
}
|
||||
else /// Это вложенная таблица.
|
||||
{
|
||||
@ -684,11 +655,14 @@ void ExpressionAnalyzer::getArrayJoinedColumns()
|
||||
String column_name = DataTypeNested::extractNestedColumnName(it->first);
|
||||
if (table_name == source_name)
|
||||
{
|
||||
columns_for_array_join.insert(DataTypeNested::concatenateNestedName(result_name, column_name));
|
||||
array_join_result_to_source[DataTypeNested::concatenateNestedName(result_name, column_name)]
|
||||
= it->first;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found)
|
||||
throw Exception("No columns in nested table " + source_name, ErrorCodes::EMPTY_NESTED_TABLE);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -697,13 +671,20 @@ void ExpressionAnalyzer::getArrayJoinedColumns()
|
||||
|
||||
void ExpressionAnalyzer::getArrayJoinedColumnsImpl(ASTPtr ast)
|
||||
{
|
||||
if (!ast)
|
||||
return;
|
||||
|
||||
if (ASTIdentifier * node = dynamic_cast<ASTIdentifier *>(&*ast))
|
||||
{
|
||||
if (node->kind == ASTIdentifier::Column && isArrayJoinedColumnName(node->name))
|
||||
columns_for_array_join.insert(node->name);
|
||||
if (node->kind == ASTIdentifier::Column)
|
||||
{
|
||||
String table_name = DataTypeNested::extractNestedTableName(node->name);
|
||||
if (array_join_alias_to_name.count(node->name))
|
||||
array_join_result_to_source[node->name] = array_join_alias_to_name[node->name];
|
||||
else if (array_join_alias_to_name.count(table_name))
|
||||
{
|
||||
String nested_column = DataTypeNested::extractNestedColumnName(node->name);
|
||||
array_join_result_to_source[node->name]
|
||||
= DataTypeNested::concatenateNestedName(array_join_alias_to_name[table_name], nested_column);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1018,48 +999,15 @@ void ExpressionAnalyzer::initChain(ExpressionActionsChain & chain, NamesAndTypes
|
||||
|
||||
void ExpressionAnalyzer::addMultipleArrayJoinAction(ExpressionActions & actions)
|
||||
{
|
||||
ASTs & asts = select_query->array_join_expression_list->children;
|
||||
typedef std::map<String, String> AliasToName;
|
||||
AliasToName alias_to_name;
|
||||
for (size_t i = 0; i < asts.size(); ++i)
|
||||
NameSet result_columns;
|
||||
for (NameToNameMap::iterator it = array_join_result_to_source.begin(); it != array_join_result_to_source.end(); ++it)
|
||||
{
|
||||
ASTPtr ast = asts[i];
|
||||
|
||||
String nested_table_name = ast->getColumnName();
|
||||
String nested_table_alias = ast->getAlias();
|
||||
if (nested_table_alias == nested_table_name && !dynamic_cast<ASTIdentifier *>(&*ast))
|
||||
throw Exception("No alias for non-trivial value in ARRAY JOIN: " + nested_table_name, ErrorCodes::ALIAS_REQUIRED);
|
||||
|
||||
if (alias_to_name.count(nested_table_alias) || aliases.count(nested_table_alias))
|
||||
throw Exception("Duplicate alias " + nested_table_alias, ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS);
|
||||
alias_to_name[nested_table_alias] = nested_table_name;
|
||||
if (it->first != it->second)
|
||||
actions.add(ExpressionActions::Action::copyColumn(it->second, it->first));
|
||||
result_columns.insert(it->first);
|
||||
}
|
||||
|
||||
for (NamesSet::iterator it = columns_for_array_join.begin(); it != columns_for_array_join.end(); ++it)
|
||||
{
|
||||
const String & result_name = *it;
|
||||
|
||||
String result_table = DataTypeNested::extractNestedTableName(result_name);
|
||||
String result_column = DataTypeNested::extractNestedColumnName(result_name);
|
||||
|
||||
String source_name;
|
||||
if (alias_to_name.count(result_name))
|
||||
{
|
||||
source_name = alias_to_name[result_name];
|
||||
}
|
||||
else if (alias_to_name.count(result_table))
|
||||
{
|
||||
source_name = DataTypeNested::concatenateNestedName(alias_to_name[result_table], result_column);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception("Unexpected result of ARRAY JOIN", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
actions.add(ExpressionActions::Action::copyColumn(source_name, result_name));
|
||||
}
|
||||
|
||||
actions.add(ExpressionActions::Action::arrayJoin(columns_for_array_join));
|
||||
actions.add(ExpressionActions::Action::arrayJoin(result_columns));
|
||||
}
|
||||
|
||||
|
||||
@ -1076,7 +1024,9 @@ bool ExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain)
|
||||
getRootActionsImpl(select_query->array_join_expression_list, false, false, *step.actions);
|
||||
|
||||
addMultipleArrayJoinAction(*step.actions);
|
||||
step.required_output.insert(step.required_output.end(), columns_for_array_join.begin(), columns_for_array_join.end());
|
||||
|
||||
for (NameToNameMap::iterator it = array_join_result_to_source.begin(); it != array_join_result_to_source.end(); ++it)
|
||||
step.required_output.push_back(it->first);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -1322,19 +1272,34 @@ void ExpressionAnalyzer::removeUnusedColumns()
|
||||
{
|
||||
NamesSet required;
|
||||
NamesSet ignored;
|
||||
|
||||
if (select_query && select_query->array_join_expression_list)
|
||||
{
|
||||
ASTs & expressions = select_query->array_join_expression_list->children;
|
||||
for (size_t i = 0; i < expressions.size(); ++i)
|
||||
{
|
||||
/// Игнорируем идентификаторы верхнего уровня из секции ARRAY JOIN.
|
||||
/// Они будут добавлены там, где они используются.
|
||||
/// Их потом добавим отдельно.
|
||||
if (dynamic_cast<ASTIdentifier *>(&*expressions[i]))
|
||||
ignored.insert(expressions[i]->getColumnName());
|
||||
|
||||
ignored.insert(expressions[i]->getAlias());
|
||||
}
|
||||
}
|
||||
|
||||
getRequiredColumnsImpl(ast, required, ignored);
|
||||
|
||||
NameSet array_join_sources;
|
||||
for (NameToNameMap::iterator it = array_join_result_to_source.begin(); it != array_join_result_to_source.end(); ++it)
|
||||
{
|
||||
array_join_sources.insert(it->second);
|
||||
}
|
||||
for (NamesAndTypesList::iterator it = columns.begin(); it != columns.end(); ++it)
|
||||
{
|
||||
if (array_join_sources.count(it->first))
|
||||
required.insert(it->first);
|
||||
}
|
||||
|
||||
/// Нужно прочитать хоть один столбец, чтобы узнать количество строк.
|
||||
if (required.empty())
|
||||
required.insert(ExpressionActions::getSmallestColumn(columns));
|
||||
@ -1373,18 +1338,11 @@ void ExpressionAnalyzer::getRequiredColumnsImpl(ASTPtr ast, NamesSet & required_
|
||||
{
|
||||
if (ASTIdentifier * node = dynamic_cast<ASTIdentifier *>(&*ast))
|
||||
{
|
||||
if (node->kind == ASTIdentifier::Column && !ignored_names.count(node->name))
|
||||
if (node->kind == ASTIdentifier::Column
|
||||
&& !ignored_names.count(node->name)
|
||||
&& !ignored_names.count(DataTypeNested::extractNestedTableName(node->name)))
|
||||
{
|
||||
if (isArrayJoinedColumnName(node->name))
|
||||
{
|
||||
String original = getOriginalNestedName(node->name);
|
||||
if (!original.empty())
|
||||
required_columns.insert(original);
|
||||
}
|
||||
else
|
||||
{
|
||||
required_columns.insert(node->name);
|
||||
}
|
||||
required_columns.insert(node->name);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -43,7 +43,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(ASTPtr query_ptr_, const Context
|
||||
: InterpreterSelectQuery(query.table, context).getSampleBlock().getColumnsList());
|
||||
|
||||
if (context.getColumns().empty())
|
||||
throw Exception("There is no available columns", ErrorCodes::THERE_IS_NO_COLUMN);
|
||||
throw Exception("There are no available columns", ErrorCodes::THERE_IS_NO_COLUMN);
|
||||
|
||||
query_analyzer = new ExpressionAnalyzer(query_ptr, context, subquery_depth);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user