optimize some functions to subcolumns

This commit is contained in:
Anton Popov 2021-05-22 02:22:22 +03:00
parent 13cfedd188
commit 75adbd8e23
5 changed files with 101 additions and 27 deletions

View File

@ -1,6 +1,7 @@
#include <Interpreters/RewriteFunctionToSubcolumnVisitor.h> #include <Interpreters/RewriteFunctionToSubcolumnVisitor.h>
#include <DataTypes/NestedUtils.h> #include <DataTypes/NestedUtils.h>
#include <Parsers/ASTIdentifier.h> #include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
namespace DB namespace DB
{ {
@ -8,25 +9,45 @@ namespace DB
namespace namespace
{ {
ASTPtr transformToSubcolumn(const ASTIdentifier & identifier, const String & subcolumn_name) ASTPtr transformToSubcolumn(const String & name_in_storage, const String & subcolumn_name)
{ {
return std::make_shared<ASTIdentifier>(Nested::concatenateName(identifier.name(), subcolumn_name)); return std::make_shared<ASTIdentifier>(Nested::concatenateName(name_in_storage, subcolumn_name));
} }
ASTPtr transformIsNotNullToSubcolumn(const ASTIdentifier & identifier, const String & subcolumn_name) ASTPtr transformEmptyToSubcolumn(const String & name_in_storage, const String & subcolumn_name)
{ {
auto ast = transformToSubcolumn(identifier, subcolumn_name); auto ast = transformToSubcolumn(name_in_storage, subcolumn_name);
ast = makeASTFunction("NOT", ast); return makeASTFunction("equals", ast, std::make_shared<ASTLiteral>(0u));
return ast;
} }
const std::unordered_map<String, std::pair<String, decltype(&transformToSubcolumn)>> function_to_subcolumn = ASTPtr transformNotEmptyToSubcolumn(const String & name_in_storage, const String & subcolumn_name)
{ {
{"length", {"size0", transformToSubcolumn}}, auto ast = transformToSubcolumn(name_in_storage, subcolumn_name);
{"isNull", {"null", transformToSubcolumn}}, return makeASTFunction("notEquals", ast, std::make_shared<ASTLiteral>(0u));
{"isNotNull", {"null", transformIsNotNullToSubcolumn}}, }
{"mapKeys", {"keys", transformToSubcolumn}},
{"mapValues", {"values", transformToSubcolumn}} ASTPtr transformIsNotNullToSubcolumn(const String & name_in_storage, const String & subcolumn_name)
{
auto ast = transformToSubcolumn(name_in_storage, subcolumn_name);
return makeASTFunction("not", ast);
}
ASTPtr transformCountNullableToSubcolumn(const String & name_in_storage, const String & subcolumn_name)
{
auto ast = transformToSubcolumn(name_in_storage, subcolumn_name);
return makeASTFunction("sum", makeASTFunction("not", ast));
}
const std::unordered_map<String, std::tuple<TypeIndex, String, decltype(&transformToSubcolumn)>> function_to_subcolumn =
{
{"length", {TypeIndex::Array, "size0", transformToSubcolumn}},
{"empty", {TypeIndex::Array, "size0", transformEmptyToSubcolumn}},
{"notEmpty", {TypeIndex::Array, "size0", transformNotEmptyToSubcolumn}},
{"isNull", {TypeIndex::Nullable, "null", transformToSubcolumn}},
{"isNotNull", {TypeIndex::Nullable, "null", transformIsNotNullToSubcolumn}},
{"count", {TypeIndex::Nullable, "null", transformCountNullableToSubcolumn}},
{"mapKeys", {TypeIndex::Map, "keys", transformToSubcolumn}},
{"mapValues", {TypeIndex::Map, "values", transformToSubcolumn}},
}; };
} }
@ -38,15 +59,22 @@ void RewriteFunctionToSubcolumnData::visit(ASTFunction & function, ASTPtr & ast)
return; return;
const auto * identifier = arguments[0]->as<ASTIdentifier>(); const auto * identifier = arguments[0]->as<ASTIdentifier>();
if (!identifier || !columns_to_rewrite.count(identifier->name())) if (!identifier)
return; return;
auto it = function_to_subcolumn.find(function.name); auto it = function_to_subcolumn.find(function.name);
if (it == function_to_subcolumn.end()) if (it == function_to_subcolumn.end())
return; return;
const auto & [subcolumn_name, transformer] = it->second; const auto & [type_id, subcolumn_name, transformer] = it->second;
ast = transformer(*identifier, subcolumn_name); const auto & columns = metadata_snapshot->getColumns();
const auto & name_in_storage = identifier->name();
if (columns.has(name_in_storage)
&& columns.get(name_in_storage).type->getTypeId() == type_id)
{
ast = transformer(name_in_storage, subcolumn_name);
}
} }
} }

View File

@ -2,6 +2,7 @@
#include <Parsers/ASTFunction.h> #include <Parsers/ASTFunction.h>
#include <Interpreters/InDepthNodeVisitor.h> #include <Interpreters/InDepthNodeVisitor.h>
#include <Storages/StorageInMemoryMetadata.h>
namespace DB namespace DB
{ {
@ -14,7 +15,7 @@ public:
using TypeToVisit = ASTFunction; using TypeToVisit = ASTFunction;
void visit(ASTFunction & function, ASTPtr & ast); void visit(ASTFunction & function, ASTPtr & ast);
const NameSet & columns_to_rewrite; StorageMetadataPtr metadata_snapshot;
}; };
using RewriteFunctionToSubcolumnMatcher = OneTypeMatcher<RewriteFunctionToSubcolumnData>; using RewriteFunctionToSubcolumnMatcher = OneTypeMatcher<RewriteFunctionToSubcolumnData>;

View File

@ -581,9 +581,9 @@ void transformIfStringsIntoEnum(ASTPtr & query)
ConvertStringsToEnumVisitor(convert_data).visit(query); ConvertStringsToEnumVisitor(convert_data).visit(query);
} }
void optimizeFunctionsToSubcolumns(ASTPtr & query, const NameSet & source_columns) void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & metadata_snapshot)
{ {
RewriteFunctionToSubcolumnVisitor::Data data{source_columns}; RewriteFunctionToSubcolumnVisitor::Data data{metadata_snapshot};
RewriteFunctionToSubcolumnVisitor(data).visit(query); RewriteFunctionToSubcolumnVisitor(data).visit(query);
} }
@ -607,8 +607,9 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result,
if (!select_query) if (!select_query)
throw Exception("Select analyze for not select asts.", ErrorCodes::LOGICAL_ERROR); throw Exception("Select analyze for not select asts.", ErrorCodes::LOGICAL_ERROR);
if (result.storage && result.storage->supportsSubcolumns() && settings.optimize_functions_to_subcolumns) if (settings.optimize_functions_to_subcolumns && result.storage
optimizeFunctionsToSubcolumns(query, result.source_columns_set); && result.storage->supportsSubcolumns() && result.metadata_snapshot)
optimizeFunctionsToSubcolumns(query, result.metadata_snapshot);
optimizeIf(query, result.aliases, settings.optimize_if_chain_to_multiif); optimizeIf(query, result.aliases, settings.optimize_if_chain_to_multiif);

View File

@ -1,12 +1,30 @@
3 0 1 ['foo','bar'] [1,2] 0 0 1
1 1 0 [] [] 0 1 0
SELECT
isNull(id),
`n.null`,
NOT `n.null`
FROM t_func_to_subcolumns
3 0 1 0
0 1 0 \N
SELECT SELECT
`arr.size0`, `arr.size0`,
`n.null`, `arr.size0` = 0,
NOT `n.null`, `arr.size0` != 0,
empty(n)
FROM t_func_to_subcolumns
['foo','bar'] [1,2]
[] []
SELECT
`m.keys`, `m.keys`,
`m.values` `m.values`
FROM t_func_to_subcolumns FROM t_func_to_subcolumns
1
SELECT sum(NOT `n.null`)
FROM t_func_to_subcolumns
2
SELECT count(id)
FROM t_func_to_subcolumns
1 0 0 1 0 0
2 1 0 2 1 0
3 0 0 3 0 0
@ -25,3 +43,5 @@ ALL FULL OUTER JOIN
3 AS id, 3 AS id,
\'www\' \'www\'
) AS right USING (id) ) AS right USING (id)
0 10
0 20

View File

@ -6,13 +6,37 @@ SET optimize_functions_to_subcolumns = 1;
CREATE TABLE t_func_to_subcolumns (id UInt64, arr Array(UInt64), n Nullable(String), m Map(String, UInt64)) CREATE TABLE t_func_to_subcolumns (id UInt64, arr Array(UInt64), n Nullable(String), m Map(String, UInt64))
ENGINE = MergeTree ORDER BY tuple(); ENGINE = MergeTree ORDER BY tuple();
INSERT INTO t_func_to_subcolumns VALUES (1, [1, 2, 3], 'abc', map('foo', 1, 'bar', 2)) (2, [22], NULL, map()); INSERT INTO t_func_to_subcolumns VALUES (1, [1, 2, 3], 'abc', map('foo', 1, 'bar', 2)) (2, [], NULL, map());
SELECT length(arr), n IS NULL, n IS NOT NULL, mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns;
EXPLAIN SYNTAX SELECT length(arr), n IS NULL, n IS NOT NULL, mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; EXPLAIN SYNTAX SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns;
SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns;
EXPLAIN SYNTAX SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns;
SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns;
EXPLAIN SYNTAX SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns;
SELECT count(n) FROM t_func_to_subcolumns;
EXPLAIN SYNTAX SELECT count(n) FROM t_func_to_subcolumns;
SELECT count(id) FROM t_func_to_subcolumns;
EXPLAIN SYNTAX SELECT count(id) FROM t_func_to_subcolumns;
SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left
FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id); FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id);
EXPLAIN SYNTAX SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left EXPLAIN SYNTAX SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left
FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id); FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id);
DROP TABLE t_func_to_subcolumns;
DROP TABLE IF EXISTS t_tuple_null;
CREATE TABLE t_tuple_null (t Tuple(null UInt32)) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO t_tuple_null VALUES ((10)), ((20));
SELECT t IS NULL, t.null FROM t_tuple_null;
DROP TABLE t_tuple_null;