mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
optimize some functions to subcolumns
This commit is contained in:
parent
13cfedd188
commit
75adbd8e23
@ -1,6 +1,7 @@
|
|||||||
#include <Interpreters/RewriteFunctionToSubcolumnVisitor.h>
|
#include <Interpreters/RewriteFunctionToSubcolumnVisitor.h>
|
||||||
#include <DataTypes/NestedUtils.h>
|
#include <DataTypes/NestedUtils.h>
|
||||||
#include <Parsers/ASTIdentifier.h>
|
#include <Parsers/ASTIdentifier.h>
|
||||||
|
#include <Parsers/ASTLiteral.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -8,25 +9,45 @@ namespace DB
|
|||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
|
|
||||||
ASTPtr transformToSubcolumn(const ASTIdentifier & identifier, const String & subcolumn_name)
|
ASTPtr transformToSubcolumn(const String & name_in_storage, const String & subcolumn_name)
|
||||||
{
|
{
|
||||||
return std::make_shared<ASTIdentifier>(Nested::concatenateName(identifier.name(), subcolumn_name));
|
return std::make_shared<ASTIdentifier>(Nested::concatenateName(name_in_storage, subcolumn_name));
|
||||||
}
|
}
|
||||||
|
|
||||||
ASTPtr transformIsNotNullToSubcolumn(const ASTIdentifier & identifier, const String & subcolumn_name)
|
ASTPtr transformEmptyToSubcolumn(const String & name_in_storage, const String & subcolumn_name)
|
||||||
{
|
{
|
||||||
auto ast = transformToSubcolumn(identifier, subcolumn_name);
|
auto ast = transformToSubcolumn(name_in_storage, subcolumn_name);
|
||||||
ast = makeASTFunction("NOT", ast);
|
return makeASTFunction("equals", ast, std::make_shared<ASTLiteral>(0u));
|
||||||
return ast;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::unordered_map<String, std::pair<String, decltype(&transformToSubcolumn)>> function_to_subcolumn =
|
ASTPtr transformNotEmptyToSubcolumn(const String & name_in_storage, const String & subcolumn_name)
|
||||||
{
|
{
|
||||||
{"length", {"size0", transformToSubcolumn}},
|
auto ast = transformToSubcolumn(name_in_storage, subcolumn_name);
|
||||||
{"isNull", {"null", transformToSubcolumn}},
|
return makeASTFunction("notEquals", ast, std::make_shared<ASTLiteral>(0u));
|
||||||
{"isNotNull", {"null", transformIsNotNullToSubcolumn}},
|
}
|
||||||
{"mapKeys", {"keys", transformToSubcolumn}},
|
|
||||||
{"mapValues", {"values", transformToSubcolumn}}
|
ASTPtr transformIsNotNullToSubcolumn(const String & name_in_storage, const String & subcolumn_name)
|
||||||
|
{
|
||||||
|
auto ast = transformToSubcolumn(name_in_storage, subcolumn_name);
|
||||||
|
return makeASTFunction("not", ast);
|
||||||
|
}
|
||||||
|
|
||||||
|
ASTPtr transformCountNullableToSubcolumn(const String & name_in_storage, const String & subcolumn_name)
|
||||||
|
{
|
||||||
|
auto ast = transformToSubcolumn(name_in_storage, subcolumn_name);
|
||||||
|
return makeASTFunction("sum", makeASTFunction("not", ast));
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::unordered_map<String, std::tuple<TypeIndex, String, decltype(&transformToSubcolumn)>> function_to_subcolumn =
|
||||||
|
{
|
||||||
|
{"length", {TypeIndex::Array, "size0", transformToSubcolumn}},
|
||||||
|
{"empty", {TypeIndex::Array, "size0", transformEmptyToSubcolumn}},
|
||||||
|
{"notEmpty", {TypeIndex::Array, "size0", transformNotEmptyToSubcolumn}},
|
||||||
|
{"isNull", {TypeIndex::Nullable, "null", transformToSubcolumn}},
|
||||||
|
{"isNotNull", {TypeIndex::Nullable, "null", transformIsNotNullToSubcolumn}},
|
||||||
|
{"count", {TypeIndex::Nullable, "null", transformCountNullableToSubcolumn}},
|
||||||
|
{"mapKeys", {TypeIndex::Map, "keys", transformToSubcolumn}},
|
||||||
|
{"mapValues", {TypeIndex::Map, "values", transformToSubcolumn}},
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -38,15 +59,22 @@ void RewriteFunctionToSubcolumnData::visit(ASTFunction & function, ASTPtr & ast)
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
const auto * identifier = arguments[0]->as<ASTIdentifier>();
|
const auto * identifier = arguments[0]->as<ASTIdentifier>();
|
||||||
if (!identifier || !columns_to_rewrite.count(identifier->name()))
|
if (!identifier)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
auto it = function_to_subcolumn.find(function.name);
|
auto it = function_to_subcolumn.find(function.name);
|
||||||
if (it == function_to_subcolumn.end())
|
if (it == function_to_subcolumn.end())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
const auto & [subcolumn_name, transformer] = it->second;
|
const auto & [type_id, subcolumn_name, transformer] = it->second;
|
||||||
ast = transformer(*identifier, subcolumn_name);
|
const auto & columns = metadata_snapshot->getColumns();
|
||||||
|
const auto & name_in_storage = identifier->name();
|
||||||
|
|
||||||
|
if (columns.has(name_in_storage)
|
||||||
|
&& columns.get(name_in_storage).type->getTypeId() == type_id)
|
||||||
|
{
|
||||||
|
ast = transformer(name_in_storage, subcolumn_name);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
#include <Parsers/ASTFunction.h>
|
#include <Parsers/ASTFunction.h>
|
||||||
#include <Interpreters/InDepthNodeVisitor.h>
|
#include <Interpreters/InDepthNodeVisitor.h>
|
||||||
|
#include <Storages/StorageInMemoryMetadata.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -14,7 +15,7 @@ public:
|
|||||||
using TypeToVisit = ASTFunction;
|
using TypeToVisit = ASTFunction;
|
||||||
void visit(ASTFunction & function, ASTPtr & ast);
|
void visit(ASTFunction & function, ASTPtr & ast);
|
||||||
|
|
||||||
const NameSet & columns_to_rewrite;
|
StorageMetadataPtr metadata_snapshot;
|
||||||
};
|
};
|
||||||
|
|
||||||
using RewriteFunctionToSubcolumnMatcher = OneTypeMatcher<RewriteFunctionToSubcolumnData>;
|
using RewriteFunctionToSubcolumnMatcher = OneTypeMatcher<RewriteFunctionToSubcolumnData>;
|
||||||
|
@ -581,9 +581,9 @@ void transformIfStringsIntoEnum(ASTPtr & query)
|
|||||||
ConvertStringsToEnumVisitor(convert_data).visit(query);
|
ConvertStringsToEnumVisitor(convert_data).visit(query);
|
||||||
}
|
}
|
||||||
|
|
||||||
void optimizeFunctionsToSubcolumns(ASTPtr & query, const NameSet & source_columns)
|
void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & metadata_snapshot)
|
||||||
{
|
{
|
||||||
RewriteFunctionToSubcolumnVisitor::Data data{source_columns};
|
RewriteFunctionToSubcolumnVisitor::Data data{metadata_snapshot};
|
||||||
RewriteFunctionToSubcolumnVisitor(data).visit(query);
|
RewriteFunctionToSubcolumnVisitor(data).visit(query);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -607,8 +607,9 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result,
|
|||||||
if (!select_query)
|
if (!select_query)
|
||||||
throw Exception("Select analyze for not select asts.", ErrorCodes::LOGICAL_ERROR);
|
throw Exception("Select analyze for not select asts.", ErrorCodes::LOGICAL_ERROR);
|
||||||
|
|
||||||
if (result.storage && result.storage->supportsSubcolumns() && settings.optimize_functions_to_subcolumns)
|
if (settings.optimize_functions_to_subcolumns && result.storage
|
||||||
optimizeFunctionsToSubcolumns(query, result.source_columns_set);
|
&& result.storage->supportsSubcolumns() && result.metadata_snapshot)
|
||||||
|
optimizeFunctionsToSubcolumns(query, result.metadata_snapshot);
|
||||||
|
|
||||||
optimizeIf(query, result.aliases, settings.optimize_if_chain_to_multiif);
|
optimizeIf(query, result.aliases, settings.optimize_if_chain_to_multiif);
|
||||||
|
|
||||||
|
@ -1,12 +1,30 @@
|
|||||||
3 0 1 ['foo','bar'] [1,2]
|
0 0 1
|
||||||
1 1 0 [] []
|
0 1 0
|
||||||
|
SELECT
|
||||||
|
isNull(id),
|
||||||
|
`n.null`,
|
||||||
|
NOT `n.null`
|
||||||
|
FROM t_func_to_subcolumns
|
||||||
|
3 0 1 0
|
||||||
|
0 1 0 \N
|
||||||
SELECT
|
SELECT
|
||||||
`arr.size0`,
|
`arr.size0`,
|
||||||
`n.null`,
|
`arr.size0` = 0,
|
||||||
NOT `n.null`,
|
`arr.size0` != 0,
|
||||||
|
empty(n)
|
||||||
|
FROM t_func_to_subcolumns
|
||||||
|
['foo','bar'] [1,2]
|
||||||
|
[] []
|
||||||
|
SELECT
|
||||||
`m.keys`,
|
`m.keys`,
|
||||||
`m.values`
|
`m.values`
|
||||||
FROM t_func_to_subcolumns
|
FROM t_func_to_subcolumns
|
||||||
|
1
|
||||||
|
SELECT sum(NOT `n.null`)
|
||||||
|
FROM t_func_to_subcolumns
|
||||||
|
2
|
||||||
|
SELECT count(id)
|
||||||
|
FROM t_func_to_subcolumns
|
||||||
1 0 0
|
1 0 0
|
||||||
2 1 0
|
2 1 0
|
||||||
3 0 0
|
3 0 0
|
||||||
@ -25,3 +43,5 @@ ALL FULL OUTER JOIN
|
|||||||
3 AS id,
|
3 AS id,
|
||||||
\'www\'
|
\'www\'
|
||||||
) AS right USING (id)
|
) AS right USING (id)
|
||||||
|
0 10
|
||||||
|
0 20
|
||||||
|
@ -6,13 +6,37 @@ SET optimize_functions_to_subcolumns = 1;
|
|||||||
CREATE TABLE t_func_to_subcolumns (id UInt64, arr Array(UInt64), n Nullable(String), m Map(String, UInt64))
|
CREATE TABLE t_func_to_subcolumns (id UInt64, arr Array(UInt64), n Nullable(String), m Map(String, UInt64))
|
||||||
ENGINE = MergeTree ORDER BY tuple();
|
ENGINE = MergeTree ORDER BY tuple();
|
||||||
|
|
||||||
INSERT INTO t_func_to_subcolumns VALUES (1, [1, 2, 3], 'abc', map('foo', 1, 'bar', 2)) (2, [22], NULL, map());
|
INSERT INTO t_func_to_subcolumns VALUES (1, [1, 2, 3], 'abc', map('foo', 1, 'bar', 2)) (2, [], NULL, map());
|
||||||
|
|
||||||
SELECT length(arr), n IS NULL, n IS NOT NULL, mapKeys(m), mapValues(m) FROM t_func_to_subcolumns;
|
SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns;
|
||||||
EXPLAIN SYNTAX SELECT length(arr), n IS NULL, n IS NOT NULL, mapKeys(m), mapValues(m) FROM t_func_to_subcolumns;
|
EXPLAIN SYNTAX SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns;
|
||||||
|
|
||||||
|
SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns;
|
||||||
|
EXPLAIN SYNTAX SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns;
|
||||||
|
|
||||||
|
SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns;
|
||||||
|
EXPLAIN SYNTAX SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns;
|
||||||
|
|
||||||
|
SELECT count(n) FROM t_func_to_subcolumns;
|
||||||
|
EXPLAIN SYNTAX SELECT count(n) FROM t_func_to_subcolumns;
|
||||||
|
|
||||||
|
SELECT count(id) FROM t_func_to_subcolumns;
|
||||||
|
EXPLAIN SYNTAX SELECT count(id) FROM t_func_to_subcolumns;
|
||||||
|
|
||||||
SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left
|
SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left
|
||||||
FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id);
|
FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id);
|
||||||
|
|
||||||
EXPLAIN SYNTAX SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left
|
EXPLAIN SYNTAX SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left
|
||||||
FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id);
|
FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id);
|
||||||
|
|
||||||
|
DROP TABLE t_func_to_subcolumns;
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS t_tuple_null;
|
||||||
|
|
||||||
|
CREATE TABLE t_tuple_null (t Tuple(null UInt32)) ENGINE = MergeTree ORDER BY tuple();
|
||||||
|
|
||||||
|
INSERT INTO t_tuple_null VALUES ((10)), ((20));
|
||||||
|
|
||||||
|
SELECT t IS NULL, t.null FROM t_tuple_null;
|
||||||
|
|
||||||
|
DROP TABLE t_tuple_null;
|
||||||
|
Loading…
Reference in New Issue
Block a user