apply review advices

This commit is contained in:
sundy-li 2020-12-15 16:35:19 +08:00
parent 041f5c8843
commit d72d8ee7d8
6 changed files with 71 additions and 17 deletions

View File

@ -33,7 +33,6 @@ bool ColumnAliasesMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &)
void ColumnAliasesMatcher::visit(ASTPtr & ast, Data & data)
{
auto aa = queryToString(ast);
// If it's select query, only replace filters.
if (auto * query = ast->as<ASTSelectQuery>())
{
@ -64,11 +63,14 @@ void ColumnAliasesMatcher::visit(ASTFunction & node, ASTPtr & /*ast*/, Data & da
if (node.name == "lambda")
{
Names local_aliases;
for (const auto & name : RequiredSourceColumnsMatcher::extractNamesFromLambda(node))
auto names_from_lambda = RequiredSourceColumnsMatcher::extractNamesFromLambda(node);
for (const auto & name : names_from_lambda)
{
if (data.private_aliases.insert(name).second)
{
local_aliases.push_back(name);
}
}
/// visit child with masked local aliases
Visitor(data).visit(node.arguments->children[1]);
for (const auto & name : local_aliases)

View File

@ -51,9 +51,14 @@ public:
struct Data
{
const ColumnsDescription & columns;
/// forbidden_columns are from array join, we can't rewrite alias columns involved in array join.
/// Do not analyze joined columns.
/// They may have aliases and come to description as is.
const NameSet & forbidden_columns;
const Context & context;
/// private_aliases are from lambda, so these are local names.
NameSet private_aliases;
Data(const ColumnsDescription & columns_, const NameSet & forbidden_columns_, const Context & context_)

View File

@ -725,7 +725,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
result.analyzed_join->table_join);
collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases);
/// rewrite filters for select query, must after getArrayJoinedColumns
/// rewrite filters for select query, must goes after getArrayJoinedColumns
if (settings.optimize_alias_column_prediction && result.metadata_snapshot)
{
replaceAliasColumnsInQuery(query, result.metadata_snapshot->getColumns(), result.getArrayJoinSourceNameSet(), context);

View File

@ -123,6 +123,8 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr &
bool ok;
/// check if it's alias column
/// currently we only support alias column without any function wrapper
/// ie: `order by aliased_column` can have this optimization, but `order by function(aliased_column)` can not.
/// This suits most cases.
if (context.getSettingsRef().optimize_alias_column_prediction && aliase_columns.contains(required_sort_description[i].column_name))
{
auto column_expr = metadata_snapshot->getColumns().get(required_sort_description[i].column_name).default_desc.expression->clone();

View File

@ -21,14 +21,54 @@ array-join
lambda
1
optimize_read_in_order
2020-01-01
Expression (Projection)
Limit (preliminary LIMIT)
MergingSorted (Merge sorted streams for ORDER BY)
MergeSorting (Merge sorted blocks for ORDER BY)
PartialSorting (Sort each block for ORDER BY)
Expression (Before ORDER BY and SELECT + Add table aliases)
SettingQuotaAndLimits (Set limits and quota after reading from storage)
ReadFromStorage (MergeTree)
Expression (Projection)
Limit (preliminary LIMIT)
FinishSorting
Expression (Before ORDER BY and SELECT + Add table aliases)
SettingQuotaAndLimits (Set limits and quota after reading from storage)
Union
ReadFromStorage (MergeTree with order)
ReadFromStorage (MergeTree with order)
ReadFromStorage (MergeTree with order)
Expression (Projection)
Limit (preliminary LIMIT)
FinishSorting
Expression (Before ORDER BY and SELECT)
SettingQuotaAndLimits (Set limits and quota after reading from storage)
Union
ReadFromStorage (MergeTree with order)
ReadFromStorage (MergeTree with order)
ReadFromStorage (MergeTree with order)
optimize_aggregation_in_order
2020-01-01 10
2020-01-02 10
2020-01-03 10
2020-01-01 10
2020-01-02 10
2020-01-03 10
Expression (Projection + Before ORDER BY and SELECT)
Aggregating
Expression (Before GROUP BY + Add table aliases)
SettingQuotaAndLimits (Set limits and quota after reading from storage)
ReadFromStorage (MergeTree)
Expression (Projection + Before ORDER BY and SELECT)
Aggregating
Expression (Before GROUP BY + Add table aliases)
SettingQuotaAndLimits (Set limits and quota after reading from storage)
Union
ReadFromStorage (MergeTree with order)
ReadFromStorage (MergeTree with order)
ReadFromStorage (MergeTree with order)
Expression (Projection + Before ORDER BY and SELECT)
Aggregating
Expression (Before GROUP BY)
SettingQuotaAndLimits (Set limits and quota after reading from storage)
Union
ReadFromStorage (MergeTree with order)
ReadFromStorage (MergeTree with order)
ReadFromStorage (MergeTree with order)
second-index
1
1

View File

@ -69,28 +69,33 @@ SELECT day1 = '2020-01-04' FROM test_table PREWHERE day1 = '2020-01-04' WHERE d
ALTER TABLE test_table add column array Array(UInt8) default [1, 2, 3];
ALTER TABLE test_table add column struct.key Array(UInt8) default [2, 4, 6], add column struct.value Array(UInt8) alias array;
SELECT 'array-join';
set max_rows_to_read = 10;
SELECT count() == 10 FROM test_table WHERE day = '2020-01-01';
SELECT sum(struct.key) == 30, sum(struct.value) == 30 FROM (SELECT struct.key, struct.value FROM test_table array join struct WHERE day = '2020-01-01');
SELECT 'lambda';
-- lambda parameters in filter should not be rewrite
SELECT count() == 10 FROM test_table WHERE arrayMap((day) -> day + 1, [1,2,3]) [1] = 2 AND day = '2020-01-03';
set max_rows_to_read = 0;
-- how to test it? currently just check logs, eg: 00940_order_by_read_in_order
SELECT 'optimize_read_in_order';
SET optimize_read_in_order = 1;
SELECT day AS s FROM test_table ORDER BY s LIMIT 1;
EXPLAIN SELECT day AS s FROM test_table ORDER BY s LIMIT 1 SETTINGS optimize_read_in_order = 0;
EXPLAIN SELECT day AS s FROM test_table ORDER BY s LIMIT 1 SETTINGS optimize_read_in_order = 1;
EXPLAIN SELECT toDate(timestamp) AS s FROM test_table ORDER BY toDate(timestamp) LIMIT 1 SETTINGS optimize_read_in_order = 1;
SELECT 'optimize_aggregation_in_order';
SET optimize_aggregation_in_order = 1;
SELECT day, count() AS s FROM test_table GROUP BY day;
SELECT toDate(timestamp), count() AS s FROM test_table GROUP BY toDate(timestamp);
EXPLAIN SELECT day, count() AS s FROM test_table GROUP BY day SETTINGS optimize_aggregation_in_order = 0;
EXPLAIN SELECT day, count() AS s FROM test_table GROUP BY day SETTINGS optimize_aggregation_in_order = 1;
EXPLAIN SELECT toDate(timestamp), count() AS s FROM test_table GROUP BY toDate(timestamp) SETTINGS optimize_aggregation_in_order = 1;
DROP TABLE test_table;
SELECT 'second-index';
DROP TABLE IF EXISTS test_index;
CREATE TABLE test_index