mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 17:44:23 +00:00
Added support for multiple expressions in ARRAY JOIN section. There are known issues. Sorry for automatic indentation changes. [#CONV-8821]
This commit is contained in:
parent
566a11698e
commit
1cab77c753
@ -211,6 +211,8 @@ namespace ErrorCodes
|
||||
NO_FREE_CONNECTION,
|
||||
CANNOT_FSYNC,
|
||||
NESTED_TYPE_TOO_DEEP,
|
||||
ALIAS_REQUIRED,
|
||||
AMBIGUOUS_IDENTIFIER,
|
||||
|
||||
POCO_EXCEPTION = 1000,
|
||||
STD_EXCEPTION,
|
||||
|
@ -36,9 +36,8 @@ public:
|
||||
ADD_COLUMN,
|
||||
REMOVE_COLUMN,
|
||||
COPY_COLUMN,
|
||||
ARRAY_JOIN, /// Заменяет столбец с массивом на столбец с элементами. Если этот массив упоминается где-то еще, будет ошибка.
|
||||
ARRAY_JOIN, /// Заменяет указанные столбцы с массивами на столбцы с элементами. Размножает значения в остальных столбцах по количеству элементов в массивах. Массивы должны быть параллельными (иметь одинаковые длины).
|
||||
PROJECT, /// Переупорядочить и переименовать столбцы, удалить лишние. Допускаются одинаковые имена столбцов в результате.
|
||||
MULTIPLE_ARRAY_JOIN, /// Заменяет столбцы из вложенной таблицы (или один столбец-массив) на столбцы с элементами.
|
||||
};
|
||||
|
||||
Type type;
|
||||
@ -47,10 +46,8 @@ public:
|
||||
std::string result_name;
|
||||
DataTypePtr result_type;
|
||||
|
||||
/// Для MULTIPLE_ARRAY_JOIN
|
||||
std::string nested_table_name;
|
||||
std::string nested_table_alias;
|
||||
NameSet array_joined_columns; /// Имена столбцов без префикса 'NestedTableName.'
|
||||
/// Для ARRAY_JOIN
|
||||
NameSet array_joined_columns;
|
||||
|
||||
/// Для ADD_COLUMN.
|
||||
ColumnPtr added_column;
|
||||
@ -111,23 +108,12 @@ public:
|
||||
return a;
|
||||
}
|
||||
|
||||
static Action arrayJoin(const std::string & source_name, const std::string & result_name)
|
||||
static Action arrayJoin(const NameSet & array_joined_columns)
|
||||
{
|
||||
if (array_joined_columns.empty())
|
||||
throw Exception("No arrays to join", ErrorCodes::LOGICAL_ERROR);
|
||||
Action a;
|
||||
a.type = ARRAY_JOIN;
|
||||
a.source_name = source_name;
|
||||
a.result_name = result_name;
|
||||
return a;
|
||||
}
|
||||
|
||||
static Action multipleArrayJoin(const std::string & nested_table_name,
|
||||
const std::string & nested_table_alias,
|
||||
const NameSet & array_joined_columns)
|
||||
{
|
||||
Action a;
|
||||
a.type = MULTIPLE_ARRAY_JOIN;
|
||||
a.nested_table_name = nested_table_name;
|
||||
a.nested_table_alias = nested_table_alias;
|
||||
a.array_joined_columns = array_joined_columns;
|
||||
return a;
|
||||
}
|
||||
@ -141,9 +127,6 @@ public:
|
||||
private:
|
||||
friend class ExpressionActions;
|
||||
|
||||
/// Проверяет является ли данный столбец результатом ARRAY JOIN
|
||||
bool isArrayJoinedColumnName(const String & name) const;
|
||||
|
||||
std::vector<Action> getPrerequisites(Block & sample_block);
|
||||
void prepare(Block & sample_block);
|
||||
void execute(Block & block) const;
|
||||
|
@ -94,9 +94,11 @@ private:
|
||||
|
||||
/// Исходные столбцы.
|
||||
NamesAndTypesList columns;
|
||||
/// Столбцы после агрегации. Если нет агрегации, совпадает с columns.
|
||||
/// Столбцы после ARRAY JOIN. Если нет ARRAY JOIN, совпадает с columns.
|
||||
NamesAndTypesList columns_after_array_join;
|
||||
/// Столбцы после агрегации. Если нет агрегации, совпадает с columns_after_array_join.
|
||||
NamesAndTypesList aggregated_columns;
|
||||
|
||||
|
||||
/// Таблица, из которой делается запрос. Используется для sign-rewrite'а
|
||||
const StoragePtr storage;
|
||||
/// Имя поля Sign в таблице. Непусто, если нужно осуществлять sign-rewrite
|
||||
@ -113,7 +115,7 @@ private:
|
||||
typedef std::map<ASTPtr, ASTPtr> MapOfASTs;
|
||||
|
||||
/// Столбцы, которые должны быть преобразованы из-за секции ARRAY JOIN
|
||||
NameSet array_joined_columns;
|
||||
NameSet columns_for_array_join;
|
||||
|
||||
/** Для getActionsImpl.
|
||||
* Стек из ExpressionActions, соответствующих вложенным лямбда-выражениям.
|
||||
@ -223,6 +225,7 @@ private:
|
||||
|
||||
/// Проверяет является ли данный столбец результатом ARRAY JOIN
|
||||
bool isArrayJoinedColumnName(const String & name);
|
||||
|
||||
/// Возвращает исходное имя столбца до применения к нему ARRAY JOIN
|
||||
String getOriginalNestedName(const String & name);
|
||||
|
||||
@ -230,7 +233,7 @@ private:
|
||||
|
||||
/** Создать словарь алиасов.
|
||||
*/
|
||||
void createAliasesDict(ASTPtr & ast);
|
||||
void createAliasesDict(ASTPtr & ast, int ignore_levels = 0);
|
||||
|
||||
/** Для узлов-звёздочек - раскрыть их в список всех столбцов.
|
||||
* Для узлов-литералов - подставить алиасы.
|
||||
@ -255,7 +258,7 @@ private:
|
||||
/// Добавить агрегатные функции в aggregate_descriptions.
|
||||
/// Установить has_aggregation=true, если есть хоть одна агрегатная функция.
|
||||
void getAggregatesImpl(ASTPtr ast, ExpressionActions & actions);
|
||||
|
||||
|
||||
void getRequiredColumnsImpl(ASTPtr ast, NamesSet & required_columns, NamesSet & ignored_names);
|
||||
|
||||
/// Получить таблицу, из которой идет запрос
|
||||
|
@ -18,7 +18,6 @@ public:
|
||||
Database,
|
||||
Table,
|
||||
Format,
|
||||
ArrayJoin,
|
||||
};
|
||||
|
||||
/// имя
|
||||
|
@ -17,7 +17,7 @@ public:
|
||||
ASTPtr select_expression_list;
|
||||
ASTPtr database;
|
||||
ASTPtr table; /// Идентификатор или подзапрос (рекурсивно ASTSelectQuery)
|
||||
ASTPtr array_join_identifier;
|
||||
ASTPtr array_join_expression_list;
|
||||
bool final;
|
||||
ASTPtr sample_size;
|
||||
ASTPtr where_expression;
|
||||
@ -42,7 +42,7 @@ public:
|
||||
if (select_expression_list) { res->select_expression_list = select_expression_list->clone(); res->children.push_back(res->select_expression_list); }
|
||||
if (database) { res->database = database->clone(); res->children.push_back(res->database); }
|
||||
if (table) { res->table = table->clone(); res->children.push_back(res->table); }
|
||||
if (array_join_identifier) { res->array_join_identifier = array_join_identifier->clone(); res->children.push_back(res->array_join_identifier); }
|
||||
if (array_join_expression_list) { res->array_join_expression_list = array_join_expression_list->clone(); res->children.push_back(res->array_join_expression_list); }
|
||||
if (sample_size) { res->sample_size = sample_size->clone(); res->children.push_back(res->sample_size); }
|
||||
if (where_expression) { res->where_expression = where_expression->clone(); res->children.push_back(res->where_expression); }
|
||||
if (group_expression_list) { res->group_expression_list = group_expression_list->clone(); res->children.push_back(res->group_expression_list); }
|
||||
|
@ -26,11 +26,6 @@ Names ExpressionActions::Action::getNeededColumns() const
|
||||
return res;
|
||||
}
|
||||
|
||||
bool ExpressionActions::Action::isArrayJoinedColumnName(const String & name) const
|
||||
{
|
||||
return array_joined_columns.count(name) != 0;
|
||||
}
|
||||
|
||||
ExpressionActions::Action ExpressionActions::Action::applyFunction(FunctionPtr function_,
|
||||
const std::vector<std::string> & argument_names_,
|
||||
std::string result_name_)
|
||||
@ -138,48 +133,14 @@ void ExpressionActions::Action::prepare(Block & sample_block)
|
||||
}
|
||||
else if (type == ARRAY_JOIN)
|
||||
{
|
||||
if (sample_block.has(result_name))
|
||||
throw Exception("Column '" + result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN);
|
||||
if (!sample_block.has(source_name))
|
||||
throw Exception("Unknown identifier: '" + source_name + "'", ErrorCodes::UNKNOWN_IDENTIFIER);
|
||||
|
||||
const DataTypeArray * array_type = dynamic_cast<const DataTypeArray *>(&*sample_block.getByName(source_name).type);
|
||||
if (!array_type)
|
||||
throw Exception("arrayJoin requires array argument", ErrorCodes::TYPE_MISMATCH);
|
||||
result_type = array_type->getNestedType();
|
||||
|
||||
sample_block.erase(source_name);
|
||||
sample_block.insert(ColumnWithNameAndType(NULL, result_type, result_name));
|
||||
}
|
||||
else if (type == MULTIPLE_ARRAY_JOIN)
|
||||
{
|
||||
bool has_arrays_to_join = false;
|
||||
|
||||
size_t columns = sample_block.columns();
|
||||
for (size_t i = 0; i < columns; ++i)
|
||||
for (NameSet::iterator it = array_joined_columns.begin(); it != array_joined_columns.end(); ++it)
|
||||
{
|
||||
const ColumnWithNameAndType & current = sample_block.getByPosition(i);
|
||||
ColumnWithNameAndType & current = sample_block.getByName(*it);
|
||||
const DataTypeArray * array_type = dynamic_cast<const DataTypeArray *>(&*current.type);
|
||||
|
||||
if (isArrayJoinedColumnName(current.name))
|
||||
{
|
||||
if (!array_type)
|
||||
throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH);
|
||||
|
||||
has_arrays_to_join = true;
|
||||
|
||||
ColumnWithNameAndType result;
|
||||
result.column = NULL;
|
||||
result.type = array_type->getNestedType();
|
||||
result.name = current.name;
|
||||
|
||||
sample_block.erase(i);
|
||||
sample_block.insert(i, result);
|
||||
}
|
||||
if (!array_type)
|
||||
throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH);
|
||||
current.type = array_type->getNestedType();
|
||||
}
|
||||
|
||||
if (!has_arrays_to_join)
|
||||
throw Exception("No arrays to join", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
else if (type == ADD_COLUMN)
|
||||
{
|
||||
@ -199,11 +160,11 @@ void ExpressionActions::Action::prepare(Block & sample_block)
|
||||
|
||||
void ExpressionActions::Action::execute(Block & block) const
|
||||
{
|
||||
if (type == REMOVE_COLUMN || type == COPY_COLUMN || type == ARRAY_JOIN)
|
||||
if (type == REMOVE_COLUMN || type == COPY_COLUMN)
|
||||
if (!block.has(source_name))
|
||||
throw Exception("Not found column '" + source_name + "'. There are columns: " + block.dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
|
||||
|
||||
if (type == ADD_COLUMN || type == COPY_COLUMN || type == APPLY_FUNCTION || type == ARRAY_JOIN)
|
||||
if (type == ADD_COLUMN || type == COPY_COLUMN || type == APPLY_FUNCTION)
|
||||
if (block.has(result_name))
|
||||
throw Exception("Column '" + result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN);
|
||||
|
||||
@ -238,60 +199,41 @@ void ExpressionActions::Action::execute(Block & block) const
|
||||
}
|
||||
|
||||
case ARRAY_JOIN:
|
||||
case MULTIPLE_ARRAY_JOIN:
|
||||
{
|
||||
ColumnPtr any_array_ptr = NULL;
|
||||
|
||||
size_t columns = block.columns();
|
||||
for (size_t i = 0; i < columns; ++i)
|
||||
{
|
||||
const ColumnWithNameAndType & current = block.getByPosition(i);
|
||||
|
||||
if (current.name == source_name || isArrayJoinedColumnName(current.name))
|
||||
{
|
||||
if (!dynamic_cast<const DataTypeArray *>(&*current.type))
|
||||
throw Exception("arrayJoin of not array: " + current.name, ErrorCodes::TYPE_MISMATCH);
|
||||
|
||||
ColumnPtr array_ptr = current.column;
|
||||
if (array_ptr->isConst())
|
||||
array_ptr = dynamic_cast<const IColumnConst &>(*array_ptr).convertToFullColumn();
|
||||
|
||||
if (any_array_ptr.isNull())
|
||||
any_array_ptr = array_ptr;
|
||||
else
|
||||
{
|
||||
const ColumnArray & array = dynamic_cast<const ColumnArray &>(*array_ptr);
|
||||
if (!array.hasEqualOffsets(dynamic_cast<const ColumnArray &>(*any_array_ptr)))
|
||||
throw Exception("Sizes of nested arrays do not match", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (any_array_ptr.isNull())
|
||||
if (array_joined_columns.empty())
|
||||
throw Exception("No arrays to join", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
ColumnPtr any_array_ptr = block.getByName(*array_joined_columns.begin()).column;
|
||||
if (any_array_ptr->isConst())
|
||||
any_array_ptr = dynamic_cast<const IColumnConst &>(*any_array_ptr).convertToFullColumn();
|
||||
const ColumnArray * any_array = dynamic_cast<const ColumnArray *>(&*any_array_ptr);
|
||||
|
||||
if (!any_array)
|
||||
throw Exception("ARRAY JOIN of not array: " + *array_joined_columns.begin(), ErrorCodes::TYPE_MISMATCH);
|
||||
|
||||
size_t columns = block.columns();
|
||||
for (size_t i = 0; i < columns; ++i)
|
||||
{
|
||||
ColumnWithNameAndType & current = block.getByPosition(i);
|
||||
|
||||
if (current.name == source_name || isArrayJoinedColumnName(current.name))
|
||||
if (array_joined_columns.count(current.name))
|
||||
{
|
||||
if (!dynamic_cast<const DataTypeArray *>(&*current.type))
|
||||
throw Exception("ARRAY JOIN of not array: " + current.name, ErrorCodes::TYPE_MISMATCH);
|
||||
|
||||
ColumnPtr array_ptr = current.column;
|
||||
if (array_ptr->isConst())
|
||||
array_ptr = dynamic_cast<const IColumnConst &>(*array_ptr).convertToFullColumn();
|
||||
|
||||
ColumnWithNameAndType result;
|
||||
result.column = dynamic_cast<const ColumnArray &>(*array_ptr).getDataPtr();
|
||||
result.type = dynamic_cast<const DataTypeArray &>(*current.type).getNestedType();
|
||||
result.name = type == MULTIPLE_ARRAY_JOIN ? current.name : result_name;
|
||||
|
||||
block.erase(i);
|
||||
block.insert(i, result);
|
||||
const ColumnArray & array = dynamic_cast<const ColumnArray &>(*array_ptr);
|
||||
if (!array.hasEqualOffsets(dynamic_cast<const ColumnArray &>(*any_array_ptr)))
|
||||
throw Exception("Sizes of nested arrays do not match", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH);
|
||||
|
||||
current.column = dynamic_cast<const ColumnArray &>(*array_ptr).getDataPtr();
|
||||
current.type = dynamic_cast<const DataTypeArray &>(*current.type).getNestedType();
|
||||
}
|
||||
else
|
||||
{
|
||||
current.column = current.column->replicate(any_array->getOffsets());
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
@ -358,18 +300,13 @@ std::string ExpressionActions::Action::toString() const
|
||||
ss << " )";
|
||||
break;
|
||||
case ARRAY_JOIN:
|
||||
ss << result_name << "(" << result_type->getName() << ")" << "= " << "arrayJoin" << " ( " << source_name << " )";
|
||||
break;
|
||||
case MULTIPLE_ARRAY_JOIN:
|
||||
ss << "ARRAY JOIN " << nested_table_name << (nested_table_name != nested_table_alias ? " AS " + nested_table_alias : "");
|
||||
ss << "{";
|
||||
for (NameSet::const_iterator it = array_joined_columns.begin(); it != array_joined_columns.end(); ++it)
|
||||
ss << "ARRAY JOIN ";
|
||||
for (NameSet::iterator it = array_joined_columns.begin(); it != array_joined_columns.end(); ++it)
|
||||
{
|
||||
if (it != array_joined_columns.begin())
|
||||
ss << ", ";
|
||||
ss << *it;
|
||||
}
|
||||
ss << "}";
|
||||
break;
|
||||
case PROJECT:
|
||||
ss << "{";
|
||||
@ -532,15 +469,31 @@ void ExpressionActions::finalize(const Names & output_columns)
|
||||
{
|
||||
Action & action = actions[i];
|
||||
Names in = action.getNeededColumns();
|
||||
std::string out = action.result_name;
|
||||
|
||||
|
||||
if (action.type == Action::PROJECT)
|
||||
{
|
||||
needed_columns = NameSet(in.begin(), in.end());
|
||||
unmodified_columns.clear();
|
||||
}
|
||||
else if (action.type == Action::ARRAY_JOIN)
|
||||
{
|
||||
/// Не будем ARRAY JOIN-ить столбцы, которые дальше не используются.
|
||||
/// Обычно такие столбцы не используются и до ARRAY JOIN, и поэтому выбрасываются дальше в этой функции.
|
||||
/// Не будем убирать все столбцы, чтобы не потерять количество строк.
|
||||
NameSet::iterator it = action.array_joined_columns.begin();
|
||||
while (it != action.array_joined_columns.end() && action.array_joined_columns.size() > 1)
|
||||
{
|
||||
NameSet::iterator jt = it;
|
||||
++it;
|
||||
if (!needed_columns.count(*jt))
|
||||
{
|
||||
action.array_joined_columns.erase(jt);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string out = action.result_name;
|
||||
if (!out.empty())
|
||||
{
|
||||
/// Если результат не используется и нет побочных эффектов, выбросим действие.
|
||||
@ -605,9 +558,9 @@ std::string ExpressionActions::getID() const
|
||||
{
|
||||
if (i)
|
||||
ss << ", ";
|
||||
if (actions[i].type == Action::APPLY_FUNCTION || actions[i].type == Action::ARRAY_JOIN)
|
||||
if (actions[i].type == Action::APPLY_FUNCTION)
|
||||
ss << actions[i].result_name;
|
||||
if (actions[i].type == Action::MULTIPLE_ARRAY_JOIN)
|
||||
if (actions[i].type == Action::ARRAY_JOIN)
|
||||
{
|
||||
ss << "{";
|
||||
for (NameSet::const_iterator it = actions[i].array_joined_columns.begin();
|
||||
@ -681,7 +634,7 @@ void ExpressionActions::optimizeArrayJoin()
|
||||
bool depends_on_array_join = false;
|
||||
Names needed;
|
||||
|
||||
if (actions[i].type == Action::ARRAY_JOIN || actions[i].type == Action::MULTIPLE_ARRAY_JOIN)
|
||||
if (actions[i].type == Action::ARRAY_JOIN)
|
||||
{
|
||||
depends_on_array_join = true;
|
||||
needed = actions[i].getNeededColumns();
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -106,25 +106,22 @@ bool ParserSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, String & ex
|
||||
return false;
|
||||
}
|
||||
|
||||
/// ARRAY JOIN array|Nested_table [AS alias]
|
||||
/// ARRAY JOIN expr list
|
||||
if (s_array.ignore(pos, end, expected))
|
||||
{
|
||||
ws.ignore(pos, end);
|
||||
|
||||
|
||||
if (!s_join.ignore(pos, end, expected))
|
||||
return false;
|
||||
|
||||
|
||||
ws.ignore(pos, end);
|
||||
|
||||
ParserWithOptionalAlias ident(new ParserCompoundIdentifier());
|
||||
if (!ident.parse(pos, end, select_query->array_join_identifier, expected))
|
||||
|
||||
if (!exp_list.parse(pos, end, select_query->array_join_expression_list, expected))
|
||||
return false;
|
||||
|
||||
dynamic_cast<ASTIdentifier &>(*select_query->array_join_identifier).kind = ASTIdentifier::ArrayJoin;
|
||||
|
||||
|
||||
ws.ignore(pos, end);
|
||||
}
|
||||
|
||||
|
||||
/// FINAL
|
||||
if (s_final.ignore(pos, end, expected))
|
||||
{
|
||||
@ -248,8 +245,8 @@ bool ParserSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, String & ex
|
||||
select_query->children.push_back(select_query->database);
|
||||
if (select_query->table)
|
||||
select_query->children.push_back(select_query->table);
|
||||
if (select_query->array_join_identifier)
|
||||
select_query->children.push_back(select_query->array_join_identifier);
|
||||
if (select_query->array_join_expression_list)
|
||||
select_query->children.push_back(select_query->array_join_expression_list);
|
||||
if (select_query->sample_size)
|
||||
select_query->children.push_back(select_query->sample_size);
|
||||
if (select_query->where_expression)
|
||||
|
@ -112,10 +112,10 @@ void formatAST(const ASTSelectQuery & ast, std::ostream & s, size_t indent, bo
|
||||
formatAST(*ast.table, s, indent, hilite, one_line);
|
||||
}
|
||||
|
||||
if (ast.array_join_identifier)
|
||||
if (ast.array_join_expression_list)
|
||||
{
|
||||
s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "ARRAY JOIN " << (hilite ? hilite_none : "");
|
||||
formatAST(*ast.array_join_identifier, s, indent, hilite, one_line);
|
||||
formatAST(*ast.array_join_expression_list, s, indent, hilite, one_line);
|
||||
}
|
||||
|
||||
if (ast.final)
|
||||
|
Loading…
Reference in New Issue
Block a user