fix ArrayJoin columns calculation

This commit is contained in:
chertus 2019-01-22 20:36:08 +03:00
parent 24fc3ad544
commit 97199b9712
4 changed files with 67 additions and 36 deletions

View File

@ -15,12 +15,22 @@
namespace DB
{
namespace ErrorCodes
{
extern const int ALIAS_REQUIRED;
extern const int MULTIPLE_EXPRESSIONS_FOR_ALIAS;
extern const int LOGICAL_ERROR;
}
/// Fills the array_join_result_to_source: on which columns-arrays to replicate, and how to call them after that.
class ArrayJoinedColumnsMatcher
{
public:
struct Data
{
using Aliases = std::unordered_map<String, ASTPtr>;
const Aliases & aliases;
NameToNameMap & array_join_name_to_alias;
NameToNameMap & array_join_alias_to_name;
NameToNameMap & array_join_result_to_source;
@ -30,10 +40,6 @@ public:
static bool needChildVisit(ASTPtr & node, const ASTPtr & child)
{
/// Processed
if (typeid_cast<ASTIdentifier *>(node.get()))
return false;
if (typeid_cast<ASTTablesInSelectQuery *>(node.get()))
return false;
@ -48,10 +54,41 @@ public:
{
if (auto * t = typeid_cast<ASTIdentifier *>(ast.get()))
visit(*t, ast, data);
if (auto * t = typeid_cast<ASTSelectQuery *>(ast.get()))
return visit(*t, ast, data);
return {};
}
private:
static std::vector<ASTPtr *> visit(const ASTSelectQuery & node, ASTPtr &, Data & data)
{
ASTPtr array_join_expression_list = node.array_join_expression_list();
if (!array_join_expression_list)
throw Exception("Logical error: no ARRAY JOIN", ErrorCodes::LOGICAL_ERROR);
std::vector<ASTPtr *> out;
out.reserve(array_join_expression_list->children.size());
for (ASTPtr & ast : array_join_expression_list->children)
{
const String nested_table_name = ast->getColumnName();
const String nested_table_alias = ast->getAliasOrColumnName();
if (nested_table_alias == nested_table_name && !isIdentifier(ast))
throw Exception("No alias for non-trivial value in ARRAY JOIN: " + nested_table_name, ErrorCodes::ALIAS_REQUIRED);
if (data.array_join_alias_to_name.count(nested_table_alias) || data.aliases.count(nested_table_alias))
throw Exception("Duplicate alias in ARRAY JOIN: " + nested_table_alias, ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS);
data.array_join_alias_to_name[nested_table_alias] = nested_table_name;
data.array_join_name_to_alias[nested_table_name] = nested_table_alias;
out.emplace_back(&ast);
}
return out;
}
static void visit(const ASTIdentifier & node, ASTPtr &, Data & data)
{
NameToNameMap & array_join_name_to_alias = data.array_join_name_to_alias;

View File

@ -35,8 +35,6 @@ namespace DB
namespace ErrorCodes
{
extern const int ALIAS_REQUIRED;
extern const int MULTIPLE_EXPRESSIONS_FOR_ALIAS;
extern const int EMPTY_NESTED_TABLE;
extern const int LOGICAL_ERROR;
extern const int INVALID_JOIN_ON_EXPRESSION;
@ -434,33 +432,13 @@ void optimizeUsing(const ASTSelectQuery * select_query)
void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const ASTSelectQuery * select_query,
const Names & source_columns, const NameSet & source_columns_set)
{
ASTPtr array_join_expression_list = select_query->array_join_expression_list();
if (array_join_expression_list)
if (ASTPtr array_join_expression_list = select_query->array_join_expression_list())
{
ASTs & array_join_asts = array_join_expression_list->children;
for (const auto & ast : array_join_asts)
{
const String nested_table_name = ast->getColumnName();
const String nested_table_alias = ast->getAliasOrColumnName();
if (nested_table_alias == nested_table_name && !isIdentifier(ast))
throw Exception("No alias for non-trivial value in ARRAY JOIN: " + nested_table_name,
ErrorCodes::ALIAS_REQUIRED);
if (result.array_join_alias_to_name.count(nested_table_alias) || result.aliases.count(nested_table_alias))
throw Exception("Duplicate alias in ARRAY JOIN: " + nested_table_alias,
ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS);
result.array_join_alias_to_name[nested_table_alias] = nested_table_name;
result.array_join_name_to_alias[nested_table_name] = nested_table_alias;
}
{
ArrayJoinedColumnsVisitor::Data visitor_data{result.array_join_name_to_alias,
result.array_join_alias_to_name,
result.array_join_result_to_source};
ArrayJoinedColumnsVisitor(visitor_data).visit(query);
}
ArrayJoinedColumnsVisitor::Data visitor_data{result.aliases,
result.array_join_name_to_alias,
result.array_join_alias_to_name,
result.array_join_result_to_source};
ArrayJoinedColumnsVisitor(visitor_data).visit(query);
/// If the result of ARRAY JOIN is not used, it is necessary to ARRAY-JOIN any column,
/// to get the correct number of rows.

View File

@ -16,7 +16,6 @@ struct SyntaxAnalyzerResult
NamesAndTypesList source_columns;
/// Note: used only in tests.
using Aliases = std::unordered_map<String, ASTPtr>;
Aliases aliases;

View File

@ -1,5 +1,22 @@
CREATE TABLE IF NOT EXISTS test.sign (Sign Int8, Arr Array(Int8)) ENGINE = Memory;
DROP TABLE IF EXISTS test.visits;
CREATE TABLE test.visits
(
Sign Int8,
Arr Array(Int8),
`ParsedParams.Key1` Array(String),
`ParsedParams.Key2` Array(String),
CounterID UInt32
) ENGINE = Memory;
SELECT arrayMap(x -> x * Sign, Arr) FROM test.sign;
SELECT arrayMap(x -> x * Sign, Arr) FROM test.visits;
DROP TABLE test.sign;
SELECT PP.Key2 AS `ym:s:pl2`
FROM test.visits
ARRAY JOIN
`ParsedParams.Key2` AS `PP.Key2`,
`ParsedParams.Key1` AS `PP.Key1`,
arrayEnumerateUniq(`ParsedParams.Key2`, arrayMap(x_0 -> 1, `ParsedParams.Key1`)) AS `upp_==_yes_`,
arrayEnumerateUniq(`ParsedParams.Key2`) AS _uniq_ParsedParams
WHERE CounterID = 100500;
DROP TABLE test.visits;