mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
optimize some functions to subcolumns
This commit is contained in:
parent
13cfedd188
commit
75adbd8e23
@ -1,6 +1,7 @@
|
||||
#include <Interpreters/RewriteFunctionToSubcolumnVisitor.h>
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -8,25 +9,45 @@ namespace DB
|
||||
namespace
|
||||
{
|
||||
|
||||
ASTPtr transformToSubcolumn(const ASTIdentifier & identifier, const String & subcolumn_name)
|
||||
ASTPtr transformToSubcolumn(const String & name_in_storage, const String & subcolumn_name)
|
||||
{
|
||||
return std::make_shared<ASTIdentifier>(Nested::concatenateName(identifier.name(), subcolumn_name));
|
||||
return std::make_shared<ASTIdentifier>(Nested::concatenateName(name_in_storage, subcolumn_name));
|
||||
}
|
||||
|
||||
ASTPtr transformIsNotNullToSubcolumn(const ASTIdentifier & identifier, const String & subcolumn_name)
|
||||
ASTPtr transformEmptyToSubcolumn(const String & name_in_storage, const String & subcolumn_name)
|
||||
{
|
||||
auto ast = transformToSubcolumn(identifier, subcolumn_name);
|
||||
ast = makeASTFunction("NOT", ast);
|
||||
return ast;
|
||||
auto ast = transformToSubcolumn(name_in_storage, subcolumn_name);
|
||||
return makeASTFunction("equals", ast, std::make_shared<ASTLiteral>(0u));
|
||||
}
|
||||
|
||||
const std::unordered_map<String, std::pair<String, decltype(&transformToSubcolumn)>> function_to_subcolumn =
|
||||
ASTPtr transformNotEmptyToSubcolumn(const String & name_in_storage, const String & subcolumn_name)
|
||||
{
|
||||
{"length", {"size0", transformToSubcolumn}},
|
||||
{"isNull", {"null", transformToSubcolumn}},
|
||||
{"isNotNull", {"null", transformIsNotNullToSubcolumn}},
|
||||
{"mapKeys", {"keys", transformToSubcolumn}},
|
||||
{"mapValues", {"values", transformToSubcolumn}}
|
||||
auto ast = transformToSubcolumn(name_in_storage, subcolumn_name);
|
||||
return makeASTFunction("notEquals", ast, std::make_shared<ASTLiteral>(0u));
|
||||
}
|
||||
|
||||
ASTPtr transformIsNotNullToSubcolumn(const String & name_in_storage, const String & subcolumn_name)
|
||||
{
|
||||
auto ast = transformToSubcolumn(name_in_storage, subcolumn_name);
|
||||
return makeASTFunction("not", ast);
|
||||
}
|
||||
|
||||
ASTPtr transformCountNullableToSubcolumn(const String & name_in_storage, const String & subcolumn_name)
|
||||
{
|
||||
auto ast = transformToSubcolumn(name_in_storage, subcolumn_name);
|
||||
return makeASTFunction("sum", makeASTFunction("not", ast));
|
||||
}
|
||||
|
||||
const std::unordered_map<String, std::tuple<TypeIndex, String, decltype(&transformToSubcolumn)>> function_to_subcolumn =
|
||||
{
|
||||
{"length", {TypeIndex::Array, "size0", transformToSubcolumn}},
|
||||
{"empty", {TypeIndex::Array, "size0", transformEmptyToSubcolumn}},
|
||||
{"notEmpty", {TypeIndex::Array, "size0", transformNotEmptyToSubcolumn}},
|
||||
{"isNull", {TypeIndex::Nullable, "null", transformToSubcolumn}},
|
||||
{"isNotNull", {TypeIndex::Nullable, "null", transformIsNotNullToSubcolumn}},
|
||||
{"count", {TypeIndex::Nullable, "null", transformCountNullableToSubcolumn}},
|
||||
{"mapKeys", {TypeIndex::Map, "keys", transformToSubcolumn}},
|
||||
{"mapValues", {TypeIndex::Map, "values", transformToSubcolumn}},
|
||||
};
|
||||
|
||||
}
|
||||
@ -38,15 +59,22 @@ void RewriteFunctionToSubcolumnData::visit(ASTFunction & function, ASTPtr & ast)
|
||||
return;
|
||||
|
||||
const auto * identifier = arguments[0]->as<ASTIdentifier>();
|
||||
if (!identifier || !columns_to_rewrite.count(identifier->name()))
|
||||
if (!identifier)
|
||||
return;
|
||||
|
||||
auto it = function_to_subcolumn.find(function.name);
|
||||
if (it == function_to_subcolumn.end())
|
||||
return;
|
||||
|
||||
const auto & [subcolumn_name, transformer] = it->second;
|
||||
ast = transformer(*identifier, subcolumn_name);
|
||||
const auto & [type_id, subcolumn_name, transformer] = it->second;
|
||||
const auto & columns = metadata_snapshot->getColumns();
|
||||
const auto & name_in_storage = identifier->name();
|
||||
|
||||
if (columns.has(name_in_storage)
|
||||
&& columns.get(name_in_storage).type->getTypeId() == type_id)
|
||||
{
|
||||
ast = transformer(name_in_storage, subcolumn_name);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
#include <Storages/StorageInMemoryMetadata.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -14,7 +15,7 @@ public:
|
||||
using TypeToVisit = ASTFunction;
|
||||
void visit(ASTFunction & function, ASTPtr & ast);
|
||||
|
||||
const NameSet & columns_to_rewrite;
|
||||
StorageMetadataPtr metadata_snapshot;
|
||||
};
|
||||
|
||||
using RewriteFunctionToSubcolumnMatcher = OneTypeMatcher<RewriteFunctionToSubcolumnData>;
|
||||
|
@ -581,9 +581,9 @@ void transformIfStringsIntoEnum(ASTPtr & query)
|
||||
ConvertStringsToEnumVisitor(convert_data).visit(query);
|
||||
}
|
||||
|
||||
void optimizeFunctionsToSubcolumns(ASTPtr & query, const NameSet & source_columns)
|
||||
void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & metadata_snapshot)
|
||||
{
|
||||
RewriteFunctionToSubcolumnVisitor::Data data{source_columns};
|
||||
RewriteFunctionToSubcolumnVisitor::Data data{metadata_snapshot};
|
||||
RewriteFunctionToSubcolumnVisitor(data).visit(query);
|
||||
}
|
||||
|
||||
@ -607,8 +607,9 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result,
|
||||
if (!select_query)
|
||||
throw Exception("Select analyze for not select asts.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
if (result.storage && result.storage->supportsSubcolumns() && settings.optimize_functions_to_subcolumns)
|
||||
optimizeFunctionsToSubcolumns(query, result.source_columns_set);
|
||||
if (settings.optimize_functions_to_subcolumns && result.storage
|
||||
&& result.storage->supportsSubcolumns() && result.metadata_snapshot)
|
||||
optimizeFunctionsToSubcolumns(query, result.metadata_snapshot);
|
||||
|
||||
optimizeIf(query, result.aliases, settings.optimize_if_chain_to_multiif);
|
||||
|
||||
|
@ -1,12 +1,30 @@
|
||||
3 0 1 ['foo','bar'] [1,2]
|
||||
1 1 0 [] []
|
||||
0 0 1
|
||||
0 1 0
|
||||
SELECT
|
||||
isNull(id),
|
||||
`n.null`,
|
||||
NOT `n.null`
|
||||
FROM t_func_to_subcolumns
|
||||
3 0 1 0
|
||||
0 1 0 \N
|
||||
SELECT
|
||||
`arr.size0`,
|
||||
`n.null`,
|
||||
NOT `n.null`,
|
||||
`arr.size0` = 0,
|
||||
`arr.size0` != 0,
|
||||
empty(n)
|
||||
FROM t_func_to_subcolumns
|
||||
['foo','bar'] [1,2]
|
||||
[] []
|
||||
SELECT
|
||||
`m.keys`,
|
||||
`m.values`
|
||||
FROM t_func_to_subcolumns
|
||||
1
|
||||
SELECT sum(NOT `n.null`)
|
||||
FROM t_func_to_subcolumns
|
||||
2
|
||||
SELECT count(id)
|
||||
FROM t_func_to_subcolumns
|
||||
1 0 0
|
||||
2 1 0
|
||||
3 0 0
|
||||
@ -25,3 +43,5 @@ ALL FULL OUTER JOIN
|
||||
3 AS id,
|
||||
\'www\'
|
||||
) AS right USING (id)
|
||||
0 10
|
||||
0 20
|
||||
|
@ -6,13 +6,37 @@ SET optimize_functions_to_subcolumns = 1;
|
||||
CREATE TABLE t_func_to_subcolumns (id UInt64, arr Array(UInt64), n Nullable(String), m Map(String, UInt64))
|
||||
ENGINE = MergeTree ORDER BY tuple();
|
||||
|
||||
INSERT INTO t_func_to_subcolumns VALUES (1, [1, 2, 3], 'abc', map('foo', 1, 'bar', 2)) (2, [22], NULL, map());
|
||||
INSERT INTO t_func_to_subcolumns VALUES (1, [1, 2, 3], 'abc', map('foo', 1, 'bar', 2)) (2, [], NULL, map());
|
||||
|
||||
SELECT length(arr), n IS NULL, n IS NOT NULL, mapKeys(m), mapValues(m) FROM t_func_to_subcolumns;
|
||||
EXPLAIN SYNTAX SELECT length(arr), n IS NULL, n IS NOT NULL, mapKeys(m), mapValues(m) FROM t_func_to_subcolumns;
|
||||
SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns;
|
||||
EXPLAIN SYNTAX SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns;
|
||||
|
||||
SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns;
|
||||
EXPLAIN SYNTAX SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns;
|
||||
|
||||
SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns;
|
||||
EXPLAIN SYNTAX SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns;
|
||||
|
||||
SELECT count(n) FROM t_func_to_subcolumns;
|
||||
EXPLAIN SYNTAX SELECT count(n) FROM t_func_to_subcolumns;
|
||||
|
||||
SELECT count(id) FROM t_func_to_subcolumns;
|
||||
EXPLAIN SYNTAX SELECT count(id) FROM t_func_to_subcolumns;
|
||||
|
||||
SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left
|
||||
FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id);
|
||||
|
||||
EXPLAIN SYNTAX SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left
|
||||
FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id);
|
||||
|
||||
DROP TABLE t_func_to_subcolumns;
|
||||
|
||||
DROP TABLE IF EXISTS t_tuple_null;
|
||||
|
||||
CREATE TABLE t_tuple_null (t Tuple(null UInt32)) ENGINE = MergeTree ORDER BY tuple();
|
||||
|
||||
INSERT INTO t_tuple_null VALUES ((10)), ((20));
|
||||
|
||||
SELECT t IS NULL, t.null FROM t_tuple_null;
|
||||
|
||||
DROP TABLE t_tuple_null;
|
||||
|
Loading…
Reference in New Issue
Block a user