From ad99dab3adb524fbdfc8b70801f74356352f1fcb Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Sat, 14 Nov 2020 14:01:34 +0800 Subject: [PATCH 01/78] add ColumnAliasesVisitor to improve partition prune for mergetree --- src/Interpreters/ColumnAliasesVisitor.cpp | 50 +++++++++++++ src/Interpreters/ColumnAliasesVisitor.h | 37 ++++++++++ src/Interpreters/InterpreterSelectQuery.cpp | 8 ++- src/Interpreters/ya.make | 1 + src/Storages/MergeTree/KeyCondition.cpp | 11 ++- src/Storages/MergeTree/KeyCondition.h | 3 + .../MergeTree/MergeTreeDataSelectExecutor.cpp | 8 +-- .../MergeTree/MergeTreeIndexBloomFilter.cpp | 2 +- .../MergeTree/MergeTreeIndexBloomFilter.h | 2 +- .../MergeTree/MergeTreeIndexFullText.cpp | 2 +- .../MergeTree/MergeTreeIndexFullText.h | 2 +- .../MergeTree/MergeTreeIndexMinMax.cpp | 7 +- src/Storages/MergeTree/MergeTreeIndexMinMax.h | 3 +- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 2 +- src/Storages/MergeTree/MergeTreeIndexSet.h | 2 +- src/Storages/MergeTree/MergeTreeIndices.h | 2 +- src/Storages/MergeTree/PartitionPruner.h | 5 +- src/Storages/StorageMergeTree.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- .../01576_alias_column_rewrite.reference | 17 +++++ .../01576_alias_column_rewrite.sql | 70 +++++++++++++++++++ 21 files changed, 215 insertions(+), 23 deletions(-) create mode 100644 src/Interpreters/ColumnAliasesVisitor.cpp create mode 100644 src/Interpreters/ColumnAliasesVisitor.h create mode 100644 tests/queries/0_stateless/01576_alias_column_rewrite.reference create mode 100644 tests/queries/0_stateless/01576_alias_column_rewrite.sql diff --git a/src/Interpreters/ColumnAliasesVisitor.cpp b/src/Interpreters/ColumnAliasesVisitor.cpp new file mode 100644 index 00000000000..48fcc44eac7 --- /dev/null +++ b/src/Interpreters/ColumnAliasesVisitor.cpp @@ -0,0 +1,50 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ColumnAliasesMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &) +{ + return !(node->as() + || node->as() + || node->as() + || node->as()); +} + +void ColumnAliasesMatcher::visit(ASTPtr & ast, Data & data) +{ + if (auto * node = ast->as()) + { + if (auto column_name = IdentifierSemantic::getColumnName(*node)) + { + if (const auto column_default = data.columns.getDefault(column_name.value())) + { + if (column_default.value().kind == ColumnDefaultKind::Alias) + { + const auto alias_columns = data.columns.getAliases(); + for (const auto & alias_column : alias_columns) + { + if (alias_column.name == column_name.value()) + { + ast = addTypeConversionToAST(column_default.value().expression->clone(), alias_column.type->getName()); + //revisit ast to track recursive alias columns + Visitor(data).visit(ast); + break; + } + } + } + } + } + } +} + +} diff --git a/src/Interpreters/ColumnAliasesVisitor.h b/src/Interpreters/ColumnAliasesVisitor.h new file mode 100644 index 00000000000..af6f09994e2 --- /dev/null +++ b/src/Interpreters/ColumnAliasesVisitor.h @@ -0,0 +1,37 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class IAST; +using ASTPtr = std::shared_ptr; +class IDataType; +using DataTypePtr = std::shared_ptr; + +/// Visits AST node to rewrite alias columns in filter query +/// Currently works only in `KeyCondition` of select query +class ColumnAliasesMatcher +{ +public: + using Visitor = InDepthNodeVisitor; + + struct Data + { + const ColumnsDescription & columns; + + Data(const ColumnsDescription & columns_) + : columns(columns_) + {} + }; + + static void visit(ASTPtr & ast, Data & data); + static bool needChildVisit(const ASTPtr & node, const ASTPtr & child); +}; + +using ColumnAliasesVisitor = ColumnAliasesMatcher::Visitor; + +} diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 2eee269efe1..6a4561fb113 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -1291,8 +1292,11 @@ void InterpreterSelectQuery::executeFetchColumns( { auto column_decl = storage_columns.get(column); /// TODO: can make CAST only if the type is different (but requires SyntaxAnalyzer). - auto cast_column_default = addTypeConversionToAST(column_default->expression->clone(), column_decl.type->getName()); - column_expr = setAlias(cast_column_default->clone(), column); + column_expr = addTypeConversionToAST(column_default->expression->clone(), column_decl.type->getName()); + // recursive visit for alias to alias + ColumnAliasesVisitor::Data data(storage_columns); + ColumnAliasesVisitor(data).visit(column_expr); + column_expr = setAlias(column_expr, column); } else column_expr = std::make_shared(column); diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make index 02e67116233..e2e2f1c3543 100644 --- a/src/Interpreters/ya.make +++ b/src/Interpreters/ya.make @@ -34,6 +34,7 @@ SRCS( ClusterProxy/SelectStreamFactory.cpp ClusterProxy/executeQuery.cpp CollectJoinOnKeysVisitor.cpp + ColumnAliasesVisitor.cpp Context.cpp CrashLog.cpp CrossToInnerJoinVisitor.cpp diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 7b2044ef765..7d411f41172 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -371,11 +372,12 @@ Block KeyCondition::getBlockWithConstants( KeyCondition::KeyCondition( const SelectQueryInfo & query_info, const Context & context, + const ColumnsDescription & columns_desc_, const Names & key_column_names, const ExpressionActionsPtr & key_expr_, bool single_point_, bool strict_) - : key_expr(key_expr_), prepared_sets(query_info.sets), single_point(single_point_), strict(strict_) + : key_expr(key_expr_), prepared_sets(query_info.sets), columns_desc(columns_desc_), single_point(single_point_), strict(strict_) { for (size_t i = 0, size = key_column_names.size(); i < size; ++i) { @@ -405,7 +407,12 @@ KeyCondition::KeyCondition( * To overcome the problem, before parsing the AST we transform it to its semantically equivalent form where all NOT's * are pushed down and applied (when possible) to leaf nodes. */ - traverseAST(cloneASTWithInversionPushDown(filter_query), context, block_with_constants); + + auto cloned_query = cloneASTWithInversionPushDown(filter_query); + ColumnAliasesVisitor::Data data{columns_desc}; + ColumnAliasesVisitor(data).visit(cloned_query); + + traverseAST(cloned_query, context, block_with_constants); } else { diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index aa8a49226ba..b8d37b8c96a 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB @@ -230,6 +231,7 @@ public: KeyCondition( const SelectQueryInfo & query_info, const Context & context, + const ColumnsDescription & columns_desc_, const Names & key_column_names, const ExpressionActionsPtr & key_expr, bool single_point_ = false, @@ -418,6 +420,7 @@ private: ColumnIndices key_columns; ExpressionActionsPtr key_expr; PreparedSets prepared_sets; + const ColumnsDescription & columns_desc; // If true, always allow key_expr to be wrapped by function bool single_point; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 98a08abab65..28e8b25ce87 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -209,7 +209,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( const auto & primary_key = metadata_snapshot->getPrimaryKey(); Names primary_key_columns = primary_key.column_names; - KeyCondition key_condition(query_info, context, primary_key_columns, primary_key.expression); + KeyCondition key_condition(query_info, context, metadata_snapshot->getColumns(), primary_key_columns, primary_key.expression); if (settings.force_primary_key && key_condition.alwaysUnknownOrTrue()) { @@ -221,8 +221,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( std::optional partition_pruner; if (data.minmax_idx_expr) { - minmax_idx_condition.emplace(query_info, context, data.minmax_idx_columns, data.minmax_idx_expr); - partition_pruner.emplace(metadata_snapshot->getPartitionKey(), query_info, context, false /* strict */); + minmax_idx_condition.emplace(query_info, context, metadata_snapshot->getColumns(), data.minmax_idx_columns, data.minmax_idx_expr); + partition_pruner.emplace(metadata_snapshot->getPartitionKey(), metadata_snapshot->getColumns(), query_info, context, false /* strict */); if (settings.force_index_by_date && (minmax_idx_condition->alwaysUnknownOrTrue() && partition_pruner->isUseless())) { @@ -559,7 +559,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( for (const auto & index : metadata_snapshot->getSecondaryIndices()) { auto index_helper = MergeTreeIndexFactory::instance().get(index); - auto condition = index_helper->createIndexCondition(query_info, context); + auto condition = index_helper->createIndexCondition(query_info, metadata_snapshot->getColumns(), context); if (!condition->alwaysUnknownOrTrue()) useful_indices.emplace_back(index_helper, condition); } diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp index a98ba16978d..ba5486c4749 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp @@ -67,7 +67,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexBloomFilter::createIndexAggregator() c return std::make_shared(bits_per_row, hash_functions, index.column_names); } -MergeTreeIndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const SelectQueryInfo & query_info, const Context & context) const +MergeTreeIndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const SelectQueryInfo & query_info, const ColumnsDescription & /*columns*/, const Context & context) const { return std::make_shared(query_info, context, index.sample_block, hash_functions); } diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h index b0d9a295bcd..f2bb7ad24c8 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h @@ -20,7 +20,7 @@ public: MergeTreeIndexAggregatorPtr createIndexAggregator() const override; - MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query_info, const Context & context) const override; + MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query_info, const ColumnsDescription & columns, const Context & context) const override; bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index 99629144680..839d5c29b6e 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -562,7 +562,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexFullText::createIndexAggregator() cons } MergeTreeIndexConditionPtr MergeTreeIndexFullText::createIndexCondition( - const SelectQueryInfo & query, const Context & context) const + const SelectQueryInfo & query, const ColumnsDescription & /*columns*/, const Context & context) const { return std::make_shared(query, context, index.sample_block, params, token_extractor.get()); }; diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.h b/src/Storages/MergeTree/MergeTreeIndexFullText.h index c3c1ff8de8b..d6b5953cb60 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.h +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.h @@ -199,7 +199,7 @@ public: MergeTreeIndexAggregatorPtr createIndexAggregator() const override; MergeTreeIndexConditionPtr createIndexCondition( - const SelectQueryInfo & query, const Context & context) const override; + const SelectQueryInfo & query, const ColumnsDescription & columns, const Context & context) const override; bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp index de89a27ab46..1ea19262db7 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp @@ -132,9 +132,10 @@ void MergeTreeIndexAggregatorMinMax::update(const Block & block, size_t * pos, s MergeTreeIndexConditionMinMax::MergeTreeIndexConditionMinMax( const IndexDescription & index, const SelectQueryInfo & query, + const ColumnsDescription & columns, const Context & context) : index_data_types(index.data_types) - , condition(query, context, index.column_names, index.expression) + , condition(query, context, columns, index.column_names, index.expression) { } @@ -169,9 +170,9 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexMinMax::createIndexAggregator() const } MergeTreeIndexConditionPtr MergeTreeIndexMinMax::createIndexCondition( - const SelectQueryInfo & query, const Context & context) const + const SelectQueryInfo & query, const ColumnsDescription & columns, const Context & context) const { - return std::make_shared(index, query, context); + return std::make_shared(index, query, columns, context); }; bool MergeTreeIndexMinMax::mayBenefitFromIndexForIn(const ASTPtr & node) const diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.h b/src/Storages/MergeTree/MergeTreeIndexMinMax.h index 3956b1d9f9a..24df4d50ff9 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.h +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.h @@ -52,6 +52,7 @@ public: MergeTreeIndexConditionMinMax( const IndexDescription & index, const SelectQueryInfo & query, + const ColumnsDescription & columns, const Context & context); bool alwaysUnknownOrTrue() const override; @@ -78,7 +79,7 @@ public: MergeTreeIndexAggregatorPtr createIndexAggregator() const override; MergeTreeIndexConditionPtr createIndexCondition( - const SelectQueryInfo & query, const Context & context) const override; + const SelectQueryInfo & query, const ColumnsDescription & columns, const Context & context) const override; bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; }; diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 9aaf894a0cb..2a73cb7dfe6 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -468,7 +468,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexSet::createIndexAggregator() const } MergeTreeIndexConditionPtr MergeTreeIndexSet::createIndexCondition( - const SelectQueryInfo & query, const Context & context) const + const SelectQueryInfo & query, const ColumnsDescription & /*columns*/, const Context & context) const { return std::make_shared(index.name, index.sample_block, max_rows, query, context); }; diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.h b/src/Storages/MergeTree/MergeTreeIndexSet.h index d84991f5e85..ed02d971e77 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.h +++ b/src/Storages/MergeTree/MergeTreeIndexSet.h @@ -129,7 +129,7 @@ public: MergeTreeIndexAggregatorPtr createIndexAggregator() const override; MergeTreeIndexConditionPtr createIndexCondition( - const SelectQueryInfo & query, const Context & context) const override; + const SelectQueryInfo & query, const ColumnsDescription & columns, const Context & context) const override; bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h index 28795ae46b5..32b26e9a280 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.h +++ b/src/Storages/MergeTree/MergeTreeIndices.h @@ -85,7 +85,7 @@ struct IMergeTreeIndex virtual MergeTreeIndexAggregatorPtr createIndexAggregator() const = 0; virtual MergeTreeIndexConditionPtr createIndexCondition( - const SelectQueryInfo & query_info, const Context & context) const = 0; + const SelectQueryInfo & query_info, const ColumnsDescription & columns, const Context & context) const = 0; Names getColumnsRequiredForIndexCalc() const { return index.expression->getRequiredColumns(); } diff --git a/src/Storages/MergeTree/PartitionPruner.h b/src/Storages/MergeTree/PartitionPruner.h index 74b02d671bb..83048363061 100644 --- a/src/Storages/MergeTree/PartitionPruner.h +++ b/src/Storages/MergeTree/PartitionPruner.h @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -21,10 +22,10 @@ private: using DataPartPtr = std::shared_ptr; public: - PartitionPruner(const KeyDescription & partition_key_, const SelectQueryInfo & query_info, const Context & context, bool strict) + PartitionPruner(const KeyDescription & partition_key_, const ColumnsDescription & columns, const SelectQueryInfo & query_info, const Context & context, bool strict) : partition_key(partition_key_) , partition_condition( - query_info, context, partition_key.column_names, partition_key.expression, true /* single_point */, strict) + query_info, context, columns, partition_key.column_names, partition_key.expression, true /* single_point */, strict) , useless(partition_condition.alwaysUnknownOrTrue()) { } diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 4d4b35ce7c4..e762db77614 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -210,7 +210,7 @@ std::optional StorageMergeTree::totalRows() const std::optional StorageMergeTree::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, const Context & context) const { auto metadata_snapshot = getInMemoryMetadataPtr(); - PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), query_info, context, true /* strict */); + PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), metadata_snapshot->getColumns(), query_info, context, true /* strict */); if (partition_pruner.isUseless()) return {}; size_t res = 0; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index f41515331f5..a4d8632f12c 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3730,7 +3730,7 @@ std::optional StorageReplicatedMergeTree::totalRows() const std::optional StorageReplicatedMergeTree::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, const Context & context) const { auto metadata_snapshot = getInMemoryMetadataPtr(); - PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), query_info, context, true /* strict */); + PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), metadata_snapshot->getColumns(), query_info, context, true /* strict */); if (partition_pruner.isUseless()) return {}; size_t res = 0; diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.reference b/tests/queries/0_stateless/01576_alias_column_rewrite.reference new file mode 100644 index 00000000000..1531fc64f8b --- /dev/null +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.reference @@ -0,0 +1,17 @@ +test-partition-prune +1 +1 +1 +1 +1 +test-join +1 +1 +alias2alias +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.sql b/tests/queries/0_stateless/01576_alias_column_rewrite.sql new file mode 100644 index 00000000000..8ff23f3f2b0 --- /dev/null +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.sql @@ -0,0 +1,70 @@ +DROP TABLE IF EXISTS table_with_alias_column; +CREATE TABLE table_with_alias_column +( + `timestamp` DateTime, + `value` UInt64, + `day` Date ALIAS toDate(timestamp), + `day1` Date ALIAS day + 1, + `day2` Date ALIAS day1 + 1, + `time` DateTime ALIAS timestamp +) +ENGINE = MergeTree +PARTITION BY toYYYYMMDD(timestamp) +ORDER BY timestamp; + + +INSERT INTO table_with_alias_column(timestamp, value) SELECT toDateTime('2020-01-01 12:00:00'), 1 FROM numbers(1000); + +INSERT INTO table_with_alias_column(timestamp, value) SELECT toDateTime('2020-01-02 12:00:00'), 1 FROM numbers(1000); + +INSERT INTO table_with_alias_column(timestamp, value) SELECT toDateTime('2020-01-03 12:00:00'), 1 FROM numbers(1000); + + +SELECT 'test-partition-prune'; + +SELECT COUNT() = 1000 FROM table_with_alias_column WHERE day = '2020-01-01' SETTINGS max_rows_to_read = 1000; +SELECT t = '2020-01-03' FROM (SELECT day as t FROM table_with_alias_column WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 1000); +SELECT COUNT() = 1000 FROM table_with_alias_column WHERE day = '2020-01-01' UNION ALL select 1 from numbers(1) SETTINGS max_rows_to_read = 1001; +SELECT COUNT() = 0 FROM (SELECT toDate('2019-01-01') as day, day as t FROM table_with_alias_column PREWHERE t = '2020-01-03' WHERE t = '2020-01-03' GROUP BY t ); + +SELECT 'test-join'; + +SELECT day = '2020-01-03' +FROM +( + SELECT toDate('2020-01-03') AS day + FROM numbers(1) +) AS a +INNER JOIN +( + SELECT day + FROM table_with_alias_column + WHERE day = '2020-01-03' + GROUP BY day SETTINGS max_rows_to_read = 1000 +) AS b ON a.day = b.day; + +SELECT day = '2020-01-01' +FROM +( + SELECT day + FROM table_with_alias_column + WHERE day = '2020-01-01' + GROUP BY day SETTINGS max_rows_to_read = 1001 +) AS a +INNER JOIN +( + SELECT toDate('2020-01-01') AS day + FROM numbers(1) +) AS b ON a.day = b.day; + + +SELECT 'alias2alias'; +SELECT COUNT() = 1000 FROM table_with_alias_column WHERE day1 = '2020-01-02' SETTINGS max_rows_to_read = 1000; +SELECT t = '2020-01-03' FROM (SELECT day1 as t FROM table_with_alias_column WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 1000); +SELECT t = '2020-01-03' FROM (SELECT day2 as t FROM table_with_alias_column WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 1000); +SELECT COUNT() = 1000 FROM table_with_alias_column WHERE day1 = '2020-01-03' UNION ALL select 1 from numbers(1) SETTINGS max_rows_to_read = 1001; +SELECT COUNT() = 0 FROM (SELECT toDate('2019-01-01') as day1, day1 as t FROM table_with_alias_column PREWHERE t = '2020-01-03' WHERE t = '2020-01-03' GROUP BY t ); +SELECT day1 = '2020-01-04' FROM table_with_alias_column PREWHERE day1 = '2020-01-04' WHERE day1 = '2020-01-04' GROUP BY day1 SETTINGS max_rows_to_read = 1000; + +DROP TABLE table_with_alias_column; + From 11907eca8f3839a5fe13d690282d2679d4e06f25 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Sat, 21 Nov 2020 21:29:51 +0800 Subject: [PATCH 02/78] rewrite in excutor --- src/Storages/MergeTree/KeyCondition.cpp | 11 ++-------- src/Storages/MergeTree/KeyCondition.h | 3 --- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 20 +++++++++++++------ .../MergeTree/MergeTreeIndexBloomFilter.cpp | 2 +- .../MergeTree/MergeTreeIndexBloomFilter.h | 2 +- .../MergeTree/MergeTreeIndexFullText.cpp | 2 +- .../MergeTree/MergeTreeIndexFullText.h | 2 +- .../MergeTree/MergeTreeIndexMinMax.cpp | 7 +++---- src/Storages/MergeTree/MergeTreeIndexMinMax.h | 3 +-- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 2 +- src/Storages/MergeTree/MergeTreeIndexSet.h | 2 +- src/Storages/MergeTree/MergeTreeIndices.h | 2 +- src/Storages/MergeTree/PartitionPruner.h | 5 ++--- src/Storages/StorageMergeTree.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 15 files changed, 31 insertions(+), 36 deletions(-) diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 7d411f41172..7b2044ef765 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -372,12 +371,11 @@ Block KeyCondition::getBlockWithConstants( KeyCondition::KeyCondition( const SelectQueryInfo & query_info, const Context & context, - const ColumnsDescription & columns_desc_, const Names & key_column_names, const ExpressionActionsPtr & key_expr_, bool single_point_, bool strict_) - : key_expr(key_expr_), prepared_sets(query_info.sets), columns_desc(columns_desc_), single_point(single_point_), strict(strict_) + : key_expr(key_expr_), prepared_sets(query_info.sets), single_point(single_point_), strict(strict_) { for (size_t i = 0, size = key_column_names.size(); i < size; ++i) { @@ -407,12 +405,7 @@ KeyCondition::KeyCondition( * To overcome the problem, before parsing the AST we transform it to its semantically equivalent form where all NOT's * are pushed down and applied (when possible) to leaf nodes. */ - - auto cloned_query = cloneASTWithInversionPushDown(filter_query); - ColumnAliasesVisitor::Data data{columns_desc}; - ColumnAliasesVisitor(data).visit(cloned_query); - - traverseAST(cloned_query, context, block_with_constants); + traverseAST(cloneASTWithInversionPushDown(filter_query), context, block_with_constants); } else { diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index b8d37b8c96a..aa8a49226ba 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -8,7 +8,6 @@ #include #include #include -#include namespace DB @@ -231,7 +230,6 @@ public: KeyCondition( const SelectQueryInfo & query_info, const Context & context, - const ColumnsDescription & columns_desc_, const Names & key_column_names, const ExpressionActionsPtr & key_expr, bool single_point_ = false, @@ -420,7 +418,6 @@ private: ColumnIndices key_columns; ExpressionActionsPtr key_expr; PreparedSets prepared_sets; - const ColumnsDescription & columns_desc; // If true, always allow key_expr to be wrapped by function bool single_point; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 28e8b25ce87..9a9040d0952 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -209,7 +210,16 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( const auto & primary_key = metadata_snapshot->getPrimaryKey(); Names primary_key_columns = primary_key.column_names; - KeyCondition key_condition(query_info, context, metadata_snapshot->getColumns(), primary_key_columns, primary_key.expression); + // rewrite query_info.query by ColumnAliasesVisitor before applying indexes + auto & select = query_info.query->as(); + ColumnAliasesVisitor::Data aliase_column_data(metadata_snapshot->getColumns()); + ColumnAliasesVisitor aliase_column_visitor(aliase_column_data); + if (select.where()) + aliase_column_visitor.visit(select.refWhere()); + if (select.prewhere()) + aliase_column_visitor.visit(select.refPrewhere()); + + KeyCondition key_condition(query_info, context, primary_key_columns, primary_key.expression); if (settings.force_primary_key && key_condition.alwaysUnknownOrTrue()) { @@ -221,8 +231,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( std::optional partition_pruner; if (data.minmax_idx_expr) { - minmax_idx_condition.emplace(query_info, context, metadata_snapshot->getColumns(), data.minmax_idx_columns, data.minmax_idx_expr); - partition_pruner.emplace(metadata_snapshot->getPartitionKey(), metadata_snapshot->getColumns(), query_info, context, false /* strict */); + minmax_idx_condition.emplace(query_info, context, data.minmax_idx_columns, data.minmax_idx_expr); + partition_pruner.emplace(metadata_snapshot->getPartitionKey(), query_info, context, false /* strict */); if (settings.force_index_by_date && (minmax_idx_condition->alwaysUnknownOrTrue() && partition_pruner->isUseless())) { @@ -285,8 +295,6 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( RelativeSize relative_sample_size = 0; RelativeSize relative_sample_offset = 0; - const auto & select = query_info.query->as(); - auto select_sample_size = select.sampleSize(); auto select_sample_offset = select.sampleOffset(); @@ -559,7 +567,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( for (const auto & index : metadata_snapshot->getSecondaryIndices()) { auto index_helper = MergeTreeIndexFactory::instance().get(index); - auto condition = index_helper->createIndexCondition(query_info, metadata_snapshot->getColumns(), context); + auto condition = index_helper->createIndexCondition(query_info, context); if (!condition->alwaysUnknownOrTrue()) useful_indices.emplace_back(index_helper, condition); } diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp index ba5486c4749..a98ba16978d 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp @@ -67,7 +67,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexBloomFilter::createIndexAggregator() c return std::make_shared(bits_per_row, hash_functions, index.column_names); } -MergeTreeIndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const SelectQueryInfo & query_info, const ColumnsDescription & /*columns*/, const Context & context) const +MergeTreeIndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const SelectQueryInfo & query_info, const Context & context) const { return std::make_shared(query_info, context, index.sample_block, hash_functions); } diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h index f2bb7ad24c8..b0d9a295bcd 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h @@ -20,7 +20,7 @@ public: MergeTreeIndexAggregatorPtr createIndexAggregator() const override; - MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query_info, const ColumnsDescription & columns, const Context & context) const override; + MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query_info, const Context & context) const override; bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index 839d5c29b6e..99629144680 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -562,7 +562,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexFullText::createIndexAggregator() cons } MergeTreeIndexConditionPtr MergeTreeIndexFullText::createIndexCondition( - const SelectQueryInfo & query, const ColumnsDescription & /*columns*/, const Context & context) const + const SelectQueryInfo & query, const Context & context) const { return std::make_shared(query, context, index.sample_block, params, token_extractor.get()); }; diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.h b/src/Storages/MergeTree/MergeTreeIndexFullText.h index d6b5953cb60..c3c1ff8de8b 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.h +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.h @@ -199,7 +199,7 @@ public: MergeTreeIndexAggregatorPtr createIndexAggregator() const override; MergeTreeIndexConditionPtr createIndexCondition( - const SelectQueryInfo & query, const ColumnsDescription & columns, const Context & context) const override; + const SelectQueryInfo & query, const Context & context) const override; bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp index 1ea19262db7..de89a27ab46 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp @@ -132,10 +132,9 @@ void MergeTreeIndexAggregatorMinMax::update(const Block & block, size_t * pos, s MergeTreeIndexConditionMinMax::MergeTreeIndexConditionMinMax( const IndexDescription & index, const SelectQueryInfo & query, - const ColumnsDescription & columns, const Context & context) : index_data_types(index.data_types) - , condition(query, context, columns, index.column_names, index.expression) + , condition(query, context, index.column_names, index.expression) { } @@ -170,9 +169,9 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexMinMax::createIndexAggregator() const } MergeTreeIndexConditionPtr MergeTreeIndexMinMax::createIndexCondition( - const SelectQueryInfo & query, const ColumnsDescription & columns, const Context & context) const + const SelectQueryInfo & query, const Context & context) const { - return std::make_shared(index, query, columns, context); + return std::make_shared(index, query, context); }; bool MergeTreeIndexMinMax::mayBenefitFromIndexForIn(const ASTPtr & node) const diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.h b/src/Storages/MergeTree/MergeTreeIndexMinMax.h index 24df4d50ff9..3956b1d9f9a 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.h +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.h @@ -52,7 +52,6 @@ public: MergeTreeIndexConditionMinMax( const IndexDescription & index, const SelectQueryInfo & query, - const ColumnsDescription & columns, const Context & context); bool alwaysUnknownOrTrue() const override; @@ -79,7 +78,7 @@ public: MergeTreeIndexAggregatorPtr createIndexAggregator() const override; MergeTreeIndexConditionPtr createIndexCondition( - const SelectQueryInfo & query, const ColumnsDescription & columns, const Context & context) const override; + const SelectQueryInfo & query, const Context & context) const override; bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; }; diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 2a73cb7dfe6..9aaf894a0cb 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -468,7 +468,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexSet::createIndexAggregator() const } MergeTreeIndexConditionPtr MergeTreeIndexSet::createIndexCondition( - const SelectQueryInfo & query, const ColumnsDescription & /*columns*/, const Context & context) const + const SelectQueryInfo & query, const Context & context) const { return std::make_shared(index.name, index.sample_block, max_rows, query, context); }; diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.h b/src/Storages/MergeTree/MergeTreeIndexSet.h index ed02d971e77..d84991f5e85 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.h +++ b/src/Storages/MergeTree/MergeTreeIndexSet.h @@ -129,7 +129,7 @@ public: MergeTreeIndexAggregatorPtr createIndexAggregator() const override; MergeTreeIndexConditionPtr createIndexCondition( - const SelectQueryInfo & query, const ColumnsDescription & columns, const Context & context) const override; + const SelectQueryInfo & query, const Context & context) const override; bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h index 32b26e9a280..28795ae46b5 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.h +++ b/src/Storages/MergeTree/MergeTreeIndices.h @@ -85,7 +85,7 @@ struct IMergeTreeIndex virtual MergeTreeIndexAggregatorPtr createIndexAggregator() const = 0; virtual MergeTreeIndexConditionPtr createIndexCondition( - const SelectQueryInfo & query_info, const ColumnsDescription & columns, const Context & context) const = 0; + const SelectQueryInfo & query_info, const Context & context) const = 0; Names getColumnsRequiredForIndexCalc() const { return index.expression->getRequiredColumns(); } diff --git a/src/Storages/MergeTree/PartitionPruner.h b/src/Storages/MergeTree/PartitionPruner.h index 83048363061..74b02d671bb 100644 --- a/src/Storages/MergeTree/PartitionPruner.h +++ b/src/Storages/MergeTree/PartitionPruner.h @@ -3,7 +3,6 @@ #include #include -#include #include #include @@ -22,10 +21,10 @@ private: using DataPartPtr = std::shared_ptr; public: - PartitionPruner(const KeyDescription & partition_key_, const ColumnsDescription & columns, const SelectQueryInfo & query_info, const Context & context, bool strict) + PartitionPruner(const KeyDescription & partition_key_, const SelectQueryInfo & query_info, const Context & context, bool strict) : partition_key(partition_key_) , partition_condition( - query_info, context, columns, partition_key.column_names, partition_key.expression, true /* single_point */, strict) + query_info, context, partition_key.column_names, partition_key.expression, true /* single_point */, strict) , useless(partition_condition.alwaysUnknownOrTrue()) { } diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index e762db77614..4d4b35ce7c4 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -210,7 +210,7 @@ std::optional StorageMergeTree::totalRows() const std::optional StorageMergeTree::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, const Context & context) const { auto metadata_snapshot = getInMemoryMetadataPtr(); - PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), metadata_snapshot->getColumns(), query_info, context, true /* strict */); + PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), query_info, context, true /* strict */); if (partition_pruner.isUseless()) return {}; size_t res = 0; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index a4d8632f12c..f41515331f5 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3730,7 +3730,7 @@ std::optional StorageReplicatedMergeTree::totalRows() const std::optional StorageReplicatedMergeTree::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, const Context & context) const { auto metadata_snapshot = getInMemoryMetadataPtr(); - PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), metadata_snapshot->getColumns(), query_info, context, true /* strict */); + PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), query_info, context, true /* strict */); if (partition_pruner.isUseless()) return {}; size_t res = 0; From 128785e187620b0cf2f3ec22ecbec18fda72302d Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Sat, 21 Nov 2020 22:31:31 +0800 Subject: [PATCH 03/78] do not pass columns into indexes --- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 24 ++++++++++++------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 9a9040d0952..c9b1c9fd540 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -210,16 +210,21 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( const auto & primary_key = metadata_snapshot->getPrimaryKey(); Names primary_key_columns = primary_key.column_names; - // rewrite query_info.query by ColumnAliasesVisitor before applying indexes - auto & select = query_info.query->as(); + // query_info_for_index is a cloned SelectQueryInfo just for index + SelectQueryInfo query_info_for_index; + query_info_for_index.query = query_info.query->clone(); + query_info_for_index.syntax_analyzer_result = query_info.syntax_analyzer_result; + query_info_for_index.sets = query_info.sets; + + auto & temp_select = query_info_for_index.query->as(); ColumnAliasesVisitor::Data aliase_column_data(metadata_snapshot->getColumns()); ColumnAliasesVisitor aliase_column_visitor(aliase_column_data); - if (select.where()) - aliase_column_visitor.visit(select.refWhere()); - if (select.prewhere()) - aliase_column_visitor.visit(select.refPrewhere()); + if (temp_select.where()) + aliase_column_visitor.visit(temp_select.refWhere()); + if (temp_select.prewhere()) + aliase_column_visitor.visit(temp_select.refPrewhere()); - KeyCondition key_condition(query_info, context, primary_key_columns, primary_key.expression); + KeyCondition key_condition(query_info_for_index, context, primary_key_columns, primary_key.expression); if (settings.force_primary_key && key_condition.alwaysUnknownOrTrue()) { @@ -231,8 +236,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( std::optional partition_pruner; if (data.minmax_idx_expr) { - minmax_idx_condition.emplace(query_info, context, data.minmax_idx_columns, data.minmax_idx_expr); - partition_pruner.emplace(metadata_snapshot->getPartitionKey(), query_info, context, false /* strict */); + minmax_idx_condition.emplace(query_info_for_index, context, data.minmax_idx_columns, data.minmax_idx_expr); + partition_pruner.emplace(metadata_snapshot->getPartitionKey(), query_info_for_index, context, false /* strict */); if (settings.force_index_by_date && (minmax_idx_condition->alwaysUnknownOrTrue() && partition_pruner->isUseless())) { @@ -295,6 +300,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( RelativeSize relative_sample_size = 0; RelativeSize relative_sample_offset = 0; + const auto & select = query_info.query->as(); auto select_sample_size = select.sampleSize(); auto select_sample_offset = select.sampleOffset(); From f4808df41e98de3f92fa0be3d62e1fb423cdf63e Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Sat, 21 Nov 2020 23:04:39 +0800 Subject: [PATCH 04/78] add tests for second indexes --- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 26 +++++++++---------- .../01576_alias_column_rewrite.reference | 3 +++ .../01576_alias_column_rewrite.sql | 19 ++++++++++++++ 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index c9b1c9fd540..ea9ece5fd4f 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -211,18 +211,18 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( Names primary_key_columns = primary_key.column_names; // query_info_for_index is a cloned SelectQueryInfo just for index - SelectQueryInfo query_info_for_index; - query_info_for_index.query = query_info.query->clone(); - query_info_for_index.syntax_analyzer_result = query_info.syntax_analyzer_result; - query_info_for_index.sets = query_info.sets; - - auto & temp_select = query_info_for_index.query->as(); - ColumnAliasesVisitor::Data aliase_column_data(metadata_snapshot->getColumns()); - ColumnAliasesVisitor aliase_column_visitor(aliase_column_data); - if (temp_select.where()) - aliase_column_visitor.visit(temp_select.refWhere()); - if (temp_select.prewhere()) - aliase_column_visitor.visit(temp_select.refPrewhere()); + SelectQueryInfo query_info_for_index = query_info; + if (!metadata_snapshot->getColumns().getAliases().empty()) + { + query_info_for_index.query = query_info.query->clone(); + auto & temp_select = query_info_for_index.query->as(); + ColumnAliasesVisitor::Data aliase_column_data(metadata_snapshot->getColumns()); + ColumnAliasesVisitor aliase_column_visitor(aliase_column_data); + if (temp_select.where()) + aliase_column_visitor.visit(temp_select.refWhere()); + if (temp_select.prewhere()) + aliase_column_visitor.visit(temp_select.refPrewhere()); + } KeyCondition key_condition(query_info_for_index, context, primary_key_columns, primary_key.expression); @@ -573,7 +573,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( for (const auto & index : metadata_snapshot->getSecondaryIndices()) { auto index_helper = MergeTreeIndexFactory::instance().get(index); - auto condition = index_helper->createIndexCondition(query_info, context); + auto condition = index_helper->createIndexCondition(query_info_for_index, context); if (!condition->alwaysUnknownOrTrue()) useful_indices.emplace_back(index_helper, condition); } diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.reference b/tests/queries/0_stateless/01576_alias_column_rewrite.reference index 1531fc64f8b..18600e6829a 100644 --- a/tests/queries/0_stateless/01576_alias_column_rewrite.reference +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.reference @@ -15,3 +15,6 @@ alias2alias 1 1 1 +second_index +1 +1 diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.sql b/tests/queries/0_stateless/01576_alias_column_rewrite.sql index 8ff23f3f2b0..395f26c98d1 100644 --- a/tests/queries/0_stateless/01576_alias_column_rewrite.sql +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.sql @@ -68,3 +68,22 @@ SELECT day1 = '2020-01-04' FROM table_with_alias_column PREWHERE day1 = '2020-01 DROP TABLE table_with_alias_column; + +SELECT 'second_index'; + +DROP TABLE IF EXISTS test_index; +CREATE TABLE test_index +( + `key_string` String, + `key_uint32` ALIAS toUInt32(key_string), + INDEX idx toUInt32(key_string) TYPE set(0) GRANULARITY 1 +) +ENGINE = MergeTree +PARTITION BY tuple() +PRIMARY KEY tuple() +ORDER BY key_string SETTINGS index_granularity = 1; + +INSERT INTO test_index SELECT * FROM numbers(10); +SELECT COUNT() == 1 FROM test_index WHERE key_uint32 = 1 SETTINGS max_rows_to_read = 1; +SELECT COUNT() == 1 FROM test_index WHERE toUInt32(key_string) = 1 SETTINGS max_rows_to_read = 1; +DROP TABLE IF EXISTS test_index; From fe8b11fd5f25d88f2e6933ecefe485a533d03bf9 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Sun, 22 Nov 2020 12:16:27 +0800 Subject: [PATCH 05/78] add replaceAliasColumnsInFilter function --- src/Interpreters/ColumnAliasesVisitor.cpp | 8 +++---- src/Interpreters/InterpreterSelectQuery.cpp | 3 ++- .../replaceAliasColumnsInFilter.cpp | 22 +++++++++++++++++++ .../replaceAliasColumnsInFilter.h | 12 ++++++++++ src/Interpreters/ya.make | 1 + .../MergeTree/MergeTreeDataSelectExecutor.cpp | 10 ++------- 6 files changed, 43 insertions(+), 13 deletions(-) create mode 100644 src/Interpreters/replaceAliasColumnsInFilter.cpp create mode 100644 src/Interpreters/replaceAliasColumnsInFilter.h diff --git a/src/Interpreters/ColumnAliasesVisitor.cpp b/src/Interpreters/ColumnAliasesVisitor.cpp index 48fcc44eac7..0028f4da25c 100644 --- a/src/Interpreters/ColumnAliasesVisitor.cpp +++ b/src/Interpreters/ColumnAliasesVisitor.cpp @@ -26,16 +26,16 @@ void ColumnAliasesMatcher::visit(ASTPtr & ast, Data & data) { if (auto column_name = IdentifierSemantic::getColumnName(*node)) { - if (const auto column_default = data.columns.getDefault(column_name.value())) + if (const auto column_default = data.columns.getDefault(*column_name)) { - if (column_default.value().kind == ColumnDefaultKind::Alias) + if (column_default->kind == ColumnDefaultKind::Alias) { const auto alias_columns = data.columns.getAliases(); for (const auto & alias_column : alias_columns) { - if (alias_column.name == column_name.value()) + if (alias_column.name == *column_name) { - ast = addTypeConversionToAST(column_default.value().expression->clone(), alias_column.type->getName()); + ast = addTypeConversionToAST(column_default->expression->clone(), alias_column.type->getName()); //revisit ast to track recursive alias columns Visitor(data).visit(ast); break; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 6a4561fb113..0f36a9d64ad 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -1182,7 +1183,7 @@ void InterpreterSelectQuery::executeFetchColumns( else // It's possible to optimize count() given only partition predicates { SelectQueryInfo temp_query_info; - temp_query_info.query = query_ptr; + temp_query_info.query = replaceAliasColumnsInFilter(query_ptr->clone(), storage->getInMemoryMetadata().getColumns()); temp_query_info.syntax_analyzer_result = syntax_analyzer_result; temp_query_info.sets = query_analyzer->getPreparedSets(); num_rows = storage->totalRowsByPartitionPredicate(temp_query_info, *context); diff --git a/src/Interpreters/replaceAliasColumnsInFilter.cpp b/src/Interpreters/replaceAliasColumnsInFilter.cpp new file mode 100644 index 00000000000..7d113be88c7 --- /dev/null +++ b/src/Interpreters/replaceAliasColumnsInFilter.cpp @@ -0,0 +1,22 @@ +#include +#include +#include +#include + +namespace DB +{ + +ASTPtr replaceAliasColumnsInFilter(ASTPtr && ast, const ColumnsDescription & columns) +{ + auto & temp_select = ast->as(); + ColumnAliasesVisitor::Data aliase_column_data(columns); + ColumnAliasesVisitor aliase_column_visitor(aliase_column_data); + if (temp_select.where()) + aliase_column_visitor.visit(temp_select.refWhere()); + if (temp_select.prewhere()) + aliase_column_visitor.visit(temp_select.refPrewhere()); + + return ast; +} + +} diff --git a/src/Interpreters/replaceAliasColumnsInFilter.h b/src/Interpreters/replaceAliasColumnsInFilter.h new file mode 100644 index 00000000000..c7599ad1725 --- /dev/null +++ b/src/Interpreters/replaceAliasColumnsInFilter.h @@ -0,0 +1,12 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ColumnsDescription; +ASTPtr replaceAliasColumnsInFilter(ASTPtr && ast, const ColumnsDescription & columns); + +} diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make index e2e2f1c3543..0b88527ace4 100644 --- a/src/Interpreters/ya.make +++ b/src/Interpreters/ya.make @@ -153,6 +153,7 @@ SRCS( interpretSubquery.cpp join_common.cpp loadMetadata.cpp + replaceAliasColumnsInFilter.cpp sortBlock.cpp ) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index ea9ece5fd4f..0d7e29cffd5 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -214,14 +215,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( SelectQueryInfo query_info_for_index = query_info; if (!metadata_snapshot->getColumns().getAliases().empty()) { - query_info_for_index.query = query_info.query->clone(); - auto & temp_select = query_info_for_index.query->as(); - ColumnAliasesVisitor::Data aliase_column_data(metadata_snapshot->getColumns()); - ColumnAliasesVisitor aliase_column_visitor(aliase_column_data); - if (temp_select.where()) - aliase_column_visitor.visit(temp_select.refWhere()); - if (temp_select.prewhere()) - aliase_column_visitor.visit(temp_select.refPrewhere()); + query_info_for_index.query = replaceAliasColumnsInFilter(query_info.query->clone(), metadata_snapshot->getColumns()); } KeyCondition key_condition(query_info_for_index, context, primary_key_columns, primary_key.expression); From bcf07d19107a937a244bd8894e14fccff02ddb22 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Sun, 22 Nov 2020 13:11:29 +0800 Subject: [PATCH 06/78] update --- src/Interpreters/replaceAliasColumnsInFilter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/replaceAliasColumnsInFilter.cpp b/src/Interpreters/replaceAliasColumnsInFilter.cpp index 7d113be88c7..db1abe38cdd 100644 --- a/src/Interpreters/replaceAliasColumnsInFilter.cpp +++ b/src/Interpreters/replaceAliasColumnsInFilter.cpp @@ -16,7 +16,7 @@ ASTPtr replaceAliasColumnsInFilter(ASTPtr && ast, const ColumnsDescription & col if (temp_select.prewhere()) aliase_column_visitor.visit(temp_select.refPrewhere()); - return ast; + return std::move(ast); } } From 20f0d396456d37345616a72b1fa8fce36a000584 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Sun, 22 Nov 2020 17:52:02 +0800 Subject: [PATCH 07/78] update tests --- .../01576_alias_column_rewrite.sql | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.sql b/tests/queries/0_stateless/01576_alias_column_rewrite.sql index 395f26c98d1..c91bf4f6d29 100644 --- a/tests/queries/0_stateless/01576_alias_column_rewrite.sql +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.sql @@ -10,21 +10,21 @@ CREATE TABLE table_with_alias_column ) ENGINE = MergeTree PARTITION BY toYYYYMMDD(timestamp) -ORDER BY timestamp; +ORDER BY timestamp SETTINGS index_granularity = 1; -INSERT INTO table_with_alias_column(timestamp, value) SELECT toDateTime('2020-01-01 12:00:00'), 1 FROM numbers(1000); +INSERT INTO table_with_alias_column(timestamp, value) SELECT toDateTime('2020-01-01 12:00:00'), 1 FROM numbers(10); -INSERT INTO table_with_alias_column(timestamp, value) SELECT toDateTime('2020-01-02 12:00:00'), 1 FROM numbers(1000); +INSERT INTO table_with_alias_column(timestamp, value) SELECT toDateTime('2020-01-02 12:00:00'), 1 FROM numbers(10); -INSERT INTO table_with_alias_column(timestamp, value) SELECT toDateTime('2020-01-03 12:00:00'), 1 FROM numbers(1000); +INSERT INTO table_with_alias_column(timestamp, value) SELECT toDateTime('2020-01-03 12:00:00'), 1 FROM numbers(10); SELECT 'test-partition-prune'; -SELECT COUNT() = 1000 FROM table_with_alias_column WHERE day = '2020-01-01' SETTINGS max_rows_to_read = 1000; -SELECT t = '2020-01-03' FROM (SELECT day as t FROM table_with_alias_column WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 1000); -SELECT COUNT() = 1000 FROM table_with_alias_column WHERE day = '2020-01-01' UNION ALL select 1 from numbers(1) SETTINGS max_rows_to_read = 1001; +SELECT COUNT() = 10 FROM table_with_alias_column WHERE day = '2020-01-01' SETTINGS max_rows_to_read = 10; +SELECT t = '2020-01-03' FROM (SELECT day as t FROM table_with_alias_column WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 10); +SELECT COUNT() = 10 FROM table_with_alias_column WHERE day = '2020-01-01' UNION ALL select 1 from numbers(1) SETTINGS max_rows_to_read = 11; SELECT COUNT() = 0 FROM (SELECT toDate('2019-01-01') as day, day as t FROM table_with_alias_column PREWHERE t = '2020-01-03' WHERE t = '2020-01-03' GROUP BY t ); SELECT 'test-join'; @@ -40,7 +40,7 @@ INNER JOIN SELECT day FROM table_with_alias_column WHERE day = '2020-01-03' - GROUP BY day SETTINGS max_rows_to_read = 1000 + GROUP BY day SETTINGS max_rows_to_read = 11 ) AS b ON a.day = b.day; SELECT day = '2020-01-01' @@ -49,7 +49,7 @@ FROM SELECT day FROM table_with_alias_column WHERE day = '2020-01-01' - GROUP BY day SETTINGS max_rows_to_read = 1001 + GROUP BY day SETTINGS max_rows_to_read = 11 ) AS a INNER JOIN ( @@ -59,12 +59,12 @@ INNER JOIN SELECT 'alias2alias'; -SELECT COUNT() = 1000 FROM table_with_alias_column WHERE day1 = '2020-01-02' SETTINGS max_rows_to_read = 1000; -SELECT t = '2020-01-03' FROM (SELECT day1 as t FROM table_with_alias_column WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 1000); -SELECT t = '2020-01-03' FROM (SELECT day2 as t FROM table_with_alias_column WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 1000); -SELECT COUNT() = 1000 FROM table_with_alias_column WHERE day1 = '2020-01-03' UNION ALL select 1 from numbers(1) SETTINGS max_rows_to_read = 1001; +SELECT COUNT() = 10 FROM table_with_alias_column WHERE day1 = '2020-01-02' SETTINGS max_rows_to_read = 10; +SELECT t = '2020-01-03' FROM (SELECT day1 as t FROM table_with_alias_column WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 10); +SELECT t = '2020-01-03' FROM (SELECT day2 as t FROM table_with_alias_column WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 10); +SELECT COUNT() = 10 FROM table_with_alias_column WHERE day1 = '2020-01-03' UNION ALL select 1 from numbers(1) SETTINGS max_rows_to_read = 11; SELECT COUNT() = 0 FROM (SELECT toDate('2019-01-01') as day1, day1 as t FROM table_with_alias_column PREWHERE t = '2020-01-03' WHERE t = '2020-01-03' GROUP BY t ); -SELECT day1 = '2020-01-04' FROM table_with_alias_column PREWHERE day1 = '2020-01-04' WHERE day1 = '2020-01-04' GROUP BY day1 SETTINGS max_rows_to_read = 1000; +SELECT day1 = '2020-01-04' FROM table_with_alias_column PREWHERE day1 = '2020-01-04' WHERE day1 = '2020-01-04' GROUP BY day1 SETTINGS max_rows_to_read = 10; DROP TABLE table_with_alias_column; @@ -84,6 +84,6 @@ PRIMARY KEY tuple() ORDER BY key_string SETTINGS index_granularity = 1; INSERT INTO test_index SELECT * FROM numbers(10); -SELECT COUNT() == 1 FROM test_index WHERE key_uint32 = 1 SETTINGS max_rows_to_read = 1; -SELECT COUNT() == 1 FROM test_index WHERE toUInt32(key_string) = 1 SETTINGS max_rows_to_read = 1; +SELECT COUNT() == 1 FROM test_index WHERE key_uint32 = 1 SETTINGS max_rows_to_read = 10; +SELECT COUNT() == 1 FROM test_index WHERE toUInt32(key_string) = 1 SETTINGS max_rows_to_read = 10; DROP TABLE IF EXISTS test_index; From 7e51120496014b740d0eaaa85a0acd25e3a98659 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Sun, 29 Nov 2020 08:34:22 +0800 Subject: [PATCH 08/78] Trigger tests again --- src/Interpreters/ColumnAliasesVisitor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/ColumnAliasesVisitor.h b/src/Interpreters/ColumnAliasesVisitor.h index af6f09994e2..ff2e4df9a9e 100644 --- a/src/Interpreters/ColumnAliasesVisitor.h +++ b/src/Interpreters/ColumnAliasesVisitor.h @@ -13,7 +13,7 @@ class IDataType; using DataTypePtr = std::shared_ptr; /// Visits AST node to rewrite alias columns in filter query -/// Currently works only in `KeyCondition` of select query +/// Currently works only in `KeyCondition` of select query and `required_columns` in `InterpreterSelectQuery.cpp` class ColumnAliasesMatcher { public: From 66c7f611e2f6a1d5bb3261cdad4982aa255acb58 Mon Sep 17 00:00:00 2001 From: Olga Revyakina Date: Sat, 5 Dec 2020 04:19:29 +0300 Subject: [PATCH 09/78] Syntax fixed --- docs/en/sql-reference/statements/insert-into.md | 2 +- docs/ru/sql-reference/statements/insert-into.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md index ae5e074fd15..25d169e9452 100644 --- a/docs/en/sql-reference/statements/insert-into.md +++ b/docs/en/sql-reference/statements/insert-into.md @@ -13,7 +13,7 @@ Basic query format: INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` -You can specify a list of columns to insert using the `(c1, c2, c3)` or `COLUMNS(c1,c2,c3)` syntax. +You can specify a list of columns to insert using the `(c1, c2, c3)` syntax. Instead of listing all the required columns you can use the `(* EXCEPT(column_list))` syntax. diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md index 0d38be81ac6..212f1695a06 100644 --- a/docs/ru/sql-reference/statements/insert-into.md +++ b/docs/ru/sql-reference/statements/insert-into.md @@ -13,7 +13,7 @@ toc_title: INSERT INTO INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` -Вы можете указать список столбцов для вставки, используя следующий синтаксис: `(c1, c2, c3)` или `COLUMNS(c1,c2,c3)`. +Вы можете указать список столбцов для вставки, используя следующий синтаксис: `(c1, c2, c3)`. Можно не перечислять все необходимые столбцы, а использовать синтаксис `(* EXCEPT(column_list))`. From c8e1f72f22ae956143c4c63dea7150593920d490 Mon Sep 17 00:00:00 2001 From: Olga Revyakina Date: Fri, 11 Dec 2020 08:50:08 +0300 Subject: [PATCH 10/78] Fixed --- docs/en/sql-reference/statements/insert-into.md | 2 +- docs/ru/sql-reference/statements/insert-into.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md index 25d169e9452..e55c10a5211 100644 --- a/docs/en/sql-reference/statements/insert-into.md +++ b/docs/en/sql-reference/statements/insert-into.md @@ -13,7 +13,7 @@ Basic query format: INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` -You can specify a list of columns to insert using the `(c1, c2, c3)` syntax. +You can specify a list of columns to insert using the `(c1, c2, c3)` or `(COLUMNS(c1, c2, c3))` syntax. Instead of listing all the required columns you can use the `(* EXCEPT(column_list))` syntax. diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md index 212f1695a06..690fbcf38c9 100644 --- a/docs/ru/sql-reference/statements/insert-into.md +++ b/docs/ru/sql-reference/statements/insert-into.md @@ -13,7 +13,7 @@ toc_title: INSERT INTO INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` -Вы можете указать список столбцов для вставки, используя следующий синтаксис: `(c1, c2, c3)`. +Вы можете указать список столбцов для вставки, используя следующий синтаксис: `(c1, c2, c3)` или `(COLUMNS(c1, c2, c3))`. Можно не перечислять все необходимые столбцы, а использовать синтаксис `(* EXCEPT(column_list))`. From 94070285408f3af50287517f3b5e1f0d52d70c63 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Sun, 13 Dec 2020 00:42:15 +0800 Subject: [PATCH 11/78] update --- src/Core/Settings.h | 1 + src/Interpreters/ColumnAliasesVisitor.cpp | 82 ++++++++--- src/Interpreters/ColumnAliasesVisitor.h | 16 ++- src/Interpreters/InterpreterSelectQuery.cpp | 15 +- src/Interpreters/TreeRewriter.cpp | 21 ++- src/Interpreters/TreeRewriter.h | 1 + src/Interpreters/addTypeConversionToAST.cpp | 30 +++- src/Interpreters/addTypeConversionToAST.h | 4 + .../replaceAliasColumnsInFilter.cpp | 22 --- .../replaceAliasColumnsInFilter.h | 12 -- .../replaceAliasColumnsInQuery.cpp | 16 +++ src/Interpreters/replaceAliasColumnsInQuery.h | 14 ++ src/Interpreters/ya.make | 2 +- src/Parsers/ASTSelectQuery.h | 2 + .../MergeTree/MergeTreeDataSelectExecutor.cpp | 18 +-- src/Storages/ReadInOrderOptimizer.cpp | 132 +++++++++++------- src/Storages/ReadInOrderOptimizer.h | 5 +- src/Storages/StorageBuffer.cpp | 2 +- src/Storages/StorageMaterializedView.cpp | 2 +- src/Storages/StorageMerge.cpp | 2 +- .../01576_alias_column_rewrite.reference | 16 ++- .../01576_alias_column_rewrite.sql | 83 +++++++---- 22 files changed, 334 insertions(+), 164 deletions(-) delete mode 100644 src/Interpreters/replaceAliasColumnsInFilter.cpp delete mode 100644 src/Interpreters/replaceAliasColumnsInFilter.h create mode 100644 src/Interpreters/replaceAliasColumnsInQuery.cpp create mode 100644 src/Interpreters/replaceAliasColumnsInQuery.h diff --git a/src/Core/Settings.h b/src/Core/Settings.h index e7141677f78..dc04ae07363 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -364,6 +364,7 @@ class IColumn; M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \ M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \ M(Bool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \ + M(Bool, optimize_alias_column_prediction, true, "If it is set to true, it will rewrite the filter query with aliased columns, this could help with partition prune and secondary indexes. And also help with optimize_aggregation_in_order and optimize_read_in_order", 0) \ M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \ M(Bool, optimize_move_functions_out_of_any, true, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \ M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \ diff --git a/src/Interpreters/ColumnAliasesVisitor.cpp b/src/Interpreters/ColumnAliasesVisitor.cpp index 0028f4da25c..43462f2f9bf 100644 --- a/src/Interpreters/ColumnAliasesVisitor.cpp +++ b/src/Interpreters/ColumnAliasesVisitor.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -8,43 +9,90 @@ #include #include #include +#include +#include namespace DB { bool ColumnAliasesMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &) { + if (const auto * f = node->as()) + { + /// "lambda" visit children itself. + if (f->name == "lambda") + return false; + } + return !(node->as() || node->as() || node->as() + || node->as() || node->as()); } void ColumnAliasesMatcher::visit(ASTPtr & ast, Data & data) { + auto aa = queryToString(ast); + // If it's select query, only replace filters. + if (auto * query = ast->as()) + { + if (query->where()) + Visitor(data).visit(query->refWhere()); + if (query->prewhere()) + Visitor(data).visit(query->refPrewhere()); + + return; + } + + if (auto * node = ast->as()) + { + visit(*node, ast, data); + return; + } + if (auto * node = ast->as()) { - if (auto column_name = IdentifierSemantic::getColumnName(*node)) - { - if (const auto column_default = data.columns.getDefault(*column_name)) + visit(*node, ast, data); + return; + } +} + +void ColumnAliasesMatcher::visit(ASTFunction & node, ASTPtr & /*ast*/, Data & data) +{ + /// Do not add formal parameters of the lambda expression + if (node.name == "lambda") + { + Names local_aliases; + for (const auto & name : RequiredSourceColumnsMatcher::extractNamesFromLambda(node)) + if (data.private_aliases.insert(name).second) { - if (column_default->kind == ColumnDefaultKind::Alias) - { - const auto alias_columns = data.columns.getAliases(); - for (const auto & alias_column : alias_columns) - { - if (alias_column.name == *column_name) - { - ast = addTypeConversionToAST(column_default->expression->clone(), alias_column.type->getName()); - //revisit ast to track recursive alias columns - Visitor(data).visit(ast); - break; - } - } - } + local_aliases.push_back(name); } + /// visit child with masked local aliases + Visitor(data).visit(node.arguments->children[1]); + for (const auto & name : local_aliases) + data.private_aliases.erase(name); + } +} + +void ColumnAliasesMatcher::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) +{ + if (auto column_name = IdentifierSemantic::getColumnName(node)) + { + if (data.forbidden_columns.count(*column_name) || data.private_aliases.count(*column_name)) + return; + + const auto & col = data.columns.get(*column_name); + if (col.default_desc.kind == ColumnDefaultKind::Alias) + { + ast = addTypeConversionToAST(col.default_desc.expression->clone(), col.type->getName(), data.columns.getAll(), data.context); + auto str = queryToString(ast); + //revisit ast to track recursive alias columns + Visitor(data).visit(ast); } } } + } diff --git a/src/Interpreters/ColumnAliasesVisitor.h b/src/Interpreters/ColumnAliasesVisitor.h index af6f09994e2..5e128dfebd9 100644 --- a/src/Interpreters/ColumnAliasesVisitor.h +++ b/src/Interpreters/ColumnAliasesVisitor.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include @@ -10,6 +10,8 @@ namespace DB class IAST; using ASTPtr = std::shared_ptr; class IDataType; +class ASTFunction; +class ASTIdentifier; using DataTypePtr = std::shared_ptr; /// Visits AST node to rewrite alias columns in filter query @@ -22,14 +24,24 @@ public: struct Data { const ColumnsDescription & columns; + const NameSet & forbidden_columns; + const Context & context; - Data(const ColumnsDescription & columns_) + NameSet private_aliases; + + Data(const ColumnsDescription & columns_, const NameSet & forbidden_columns_, const Context & context_) : columns(columns_) + , forbidden_columns(forbidden_columns_) + , context(context_) {} }; static void visit(ASTPtr & ast, Data & data); static bool needChildVisit(const ASTPtr & node, const ASTPtr & child); + +private: + static void visit(ASTIdentifier & node, ASTPtr & ast, Data & data); + static void visit(ASTFunction & node, ASTPtr & ast, Data & data); }; using ColumnAliasesVisitor = ColumnAliasesMatcher::Visitor; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 0f36a9d64ad..de049edea8f 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -31,7 +30,7 @@ #include #include #include -#include +#include #include #include @@ -1183,9 +1182,10 @@ void InterpreterSelectQuery::executeFetchColumns( else // It's possible to optimize count() given only partition predicates { SelectQueryInfo temp_query_info; - temp_query_info.query = replaceAliasColumnsInFilter(query_ptr->clone(), storage->getInMemoryMetadata().getColumns()); + temp_query_info.query = query_ptr; temp_query_info.syntax_analyzer_result = syntax_analyzer_result; temp_query_info.sets = query_analyzer->getPreparedSets(); + num_rows = storage->totalRowsByPartitionPredicate(temp_query_info, *context); } if (num_rows) @@ -1292,11 +1292,10 @@ void InterpreterSelectQuery::executeFetchColumns( if (is_alias) { auto column_decl = storage_columns.get(column); - /// TODO: can make CAST only if the type is different (but requires SyntaxAnalyzer). - column_expr = addTypeConversionToAST(column_default->expression->clone(), column_decl.type->getName()); + column_expr = column_default->expression->clone(); + // recursive visit for alias to alias - ColumnAliasesVisitor::Data data(storage_columns); - ColumnAliasesVisitor(data).visit(column_expr); + replaceAliasColumnsInQuery(column_expr, metadata_snapshot->getColumns(), syntax_analyzer_result->getArrayJoinSourceNameSet(), *context); column_expr = setAlias(column_expr, column); } else @@ -1509,7 +1508,7 @@ void InterpreterSelectQuery::executeFetchColumns( getSortDescriptionFromGroupBy(query), query_info.syntax_analyzer_result); - query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot); + query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, *context); } StreamLocalLimits limits; diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index b2dbd027191..dec0481e402 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -18,6 +18,7 @@ #include /// getSmallestColumn() #include #include +#include #include #include @@ -367,6 +368,7 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & sele } } + std::vector getAggregates(ASTPtr & query, const ASTSelectQuery & select_query) { /// There can not be aggregate functions inside the WHERE and PREWHERE. @@ -512,8 +514,8 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select if (std::find(partition_source_columns.begin(), partition_source_columns.end(), required_column) == partition_source_columns.end()) { - optimize_trivial_count = false; - break; + optimize_trivial_count = false; + break; } } } @@ -591,6 +593,13 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select required_source_columns.swap(source_columns); } +NameSet TreeRewriterResult::getArrayJoinSourceNameSet() const +{ + NameSet forbidden_columns; + for (const auto & elem : array_join_result_to_source) + forbidden_columns.insert(elem.first); + return forbidden_columns; +} TreeRewriterResultPtr TreeRewriter::analyzeSelect( ASTPtr & query, @@ -654,6 +663,12 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( result.analyzed_join->table_join); collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases); + /// rewrite filters for select query, must after getArrayJoinedColumns + if (settings.optimize_alias_column_prediction && result.metadata_snapshot) + { + replaceAliasColumnsInQuery(query, result.metadata_snapshot->getColumns(), result.getArrayJoinSourceNameSet(), context); + } + result.aggregates = getAggregates(query, *select_query); result.collectUsedColumns(query, true); result.ast_join = select_query->join(); @@ -702,7 +717,7 @@ TreeRewriterResultPtr TreeRewriter::analyze( else assertNoAggregates(query, "in wrong place"); - result.collectUsedColumns(query, false); + result.collectUsedColumns(query ,false); return std::make_shared(result); } diff --git a/src/Interpreters/TreeRewriter.h b/src/Interpreters/TreeRewriter.h index 83cfabe2ec4..00be5cacc47 100644 --- a/src/Interpreters/TreeRewriter.h +++ b/src/Interpreters/TreeRewriter.h @@ -71,6 +71,7 @@ struct TreeRewriterResult void collectSourceColumns(bool add_special); void collectUsedColumns(const ASTPtr & query, bool is_select); Names requiredSourceColumns() const { return required_source_columns.getNames(); } + NameSet getArrayJoinSourceNameSet() const; const Scalars & getScalars() const { return scalars; } }; diff --git a/src/Interpreters/addTypeConversionToAST.cpp b/src/Interpreters/addTypeConversionToAST.cpp index 699c3bd27c3..18591fd732c 100644 --- a/src/Interpreters/addTypeConversionToAST.cpp +++ b/src/Interpreters/addTypeConversionToAST.cpp @@ -4,11 +4,20 @@ #include #include #include - +#include +#include +#include +#include +#include namespace DB { +namespace ErrorCodes +{ + extern const int THERE_IS_NO_DEFAULT_VALUE; +} + ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name) { auto func = makeASTFunction("CAST", ast, std::make_shared(type_name)); @@ -23,4 +32,23 @@ ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name) return func; } +ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name, const NamesAndTypesList & all_columns, const Context & context) +{ + auto syntax_analyzer_result = TreeRewriter(context).analyze(ast, all_columns); + const auto actions = ExpressionAnalyzer(ast, syntax_analyzer_result, context).getActions(true); + + for (const auto & action : actions->getActions()) + if (action.node->type == ActionsDAG::ActionType::ARRAY_JOIN) + throw Exception("Unsupported default value that requires ARRAY JOIN action", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE); + + auto block = actions->getSampleBlock(); + + auto desc_type = block.getByName(ast->getColumnName()).type; + if (desc_type->getName() != type_name) + return addTypeConversionToAST(std::move(ast), type_name); + + return std::move(ast); +} + + } diff --git a/src/Interpreters/addTypeConversionToAST.h b/src/Interpreters/addTypeConversionToAST.h index 1951eebc3f5..61334403fe2 100644 --- a/src/Interpreters/addTypeConversionToAST.h +++ b/src/Interpreters/addTypeConversionToAST.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB @@ -10,4 +11,7 @@ namespace DB /// It will produce an expression with CAST to get an AST with the required type. ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name); +// If same type, then ignore the wrapper of CAST function +ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name, const NamesAndTypesList & all_columns, const Context & context); + } diff --git a/src/Interpreters/replaceAliasColumnsInFilter.cpp b/src/Interpreters/replaceAliasColumnsInFilter.cpp deleted file mode 100644 index db1abe38cdd..00000000000 --- a/src/Interpreters/replaceAliasColumnsInFilter.cpp +++ /dev/null @@ -1,22 +0,0 @@ -#include -#include -#include -#include - -namespace DB -{ - -ASTPtr replaceAliasColumnsInFilter(ASTPtr && ast, const ColumnsDescription & columns) -{ - auto & temp_select = ast->as(); - ColumnAliasesVisitor::Data aliase_column_data(columns); - ColumnAliasesVisitor aliase_column_visitor(aliase_column_data); - if (temp_select.where()) - aliase_column_visitor.visit(temp_select.refWhere()); - if (temp_select.prewhere()) - aliase_column_visitor.visit(temp_select.refPrewhere()); - - return std::move(ast); -} - -} diff --git a/src/Interpreters/replaceAliasColumnsInFilter.h b/src/Interpreters/replaceAliasColumnsInFilter.h deleted file mode 100644 index c7599ad1725..00000000000 --- a/src/Interpreters/replaceAliasColumnsInFilter.h +++ /dev/null @@ -1,12 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -class ColumnsDescription; -ASTPtr replaceAliasColumnsInFilter(ASTPtr && ast, const ColumnsDescription & columns); - -} diff --git a/src/Interpreters/replaceAliasColumnsInQuery.cpp b/src/Interpreters/replaceAliasColumnsInQuery.cpp new file mode 100644 index 00000000000..4daa787c397 --- /dev/null +++ b/src/Interpreters/replaceAliasColumnsInQuery.cpp @@ -0,0 +1,16 @@ +#include +#include +#include +#include + +namespace DB +{ + +void replaceAliasColumnsInQuery(ASTPtr & ast, const ColumnsDescription & columns, const NameSet & forbidden_columns, const Context & context) +{ + ColumnAliasesVisitor::Data aliase_column_data(columns, forbidden_columns, context); + ColumnAliasesVisitor aliase_column_visitor(aliase_column_data); + aliase_column_visitor.visit(ast); +} + +} diff --git a/src/Interpreters/replaceAliasColumnsInQuery.h b/src/Interpreters/replaceAliasColumnsInQuery.h new file mode 100644 index 00000000000..bf7143ba099 --- /dev/null +++ b/src/Interpreters/replaceAliasColumnsInQuery.h @@ -0,0 +1,14 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class ColumnsDescription; +class Context; +void replaceAliasColumnsInQuery(ASTPtr & ast, const ColumnsDescription & columns, const NameSet & forbidden_columns, const Context & context); + +} diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make index 0b88527ace4..1ed65194f77 100644 --- a/src/Interpreters/ya.make +++ b/src/Interpreters/ya.make @@ -153,7 +153,7 @@ SRCS( interpretSubquery.cpp join_common.cpp loadMetadata.cpp - replaceAliasColumnsInFilter.cpp + replaceAliasColumnsInQuery.cpp sortBlock.cpp ) diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index 9690b51ff2f..5fc3ded0d58 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -50,6 +50,8 @@ public: ASTPtr & refPrewhere() { return getExpression(Expression::PREWHERE); } ASTPtr & refWhere() { return getExpression(Expression::WHERE); } ASTPtr & refHaving() { return getExpression(Expression::HAVING); } + ASTPtr & refOrderBy() { return getExpression(Expression::ORDER_BY); } + ASTPtr & refGroupBy() { return getExpression(Expression::GROUP_BY); } const ASTPtr with() const { return getExpression(Expression::WITH); } const ASTPtr select() const { return getExpression(Expression::SELECT); } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 0d7e29cffd5..98a08abab65 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -22,8 +22,6 @@ #include #include #include -#include -#include #include #include #include @@ -211,14 +209,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( const auto & primary_key = metadata_snapshot->getPrimaryKey(); Names primary_key_columns = primary_key.column_names; - // query_info_for_index is a cloned SelectQueryInfo just for index - SelectQueryInfo query_info_for_index = query_info; - if (!metadata_snapshot->getColumns().getAliases().empty()) - { - query_info_for_index.query = replaceAliasColumnsInFilter(query_info.query->clone(), metadata_snapshot->getColumns()); - } - - KeyCondition key_condition(query_info_for_index, context, primary_key_columns, primary_key.expression); + KeyCondition key_condition(query_info, context, primary_key_columns, primary_key.expression); if (settings.force_primary_key && key_condition.alwaysUnknownOrTrue()) { @@ -230,8 +221,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( std::optional partition_pruner; if (data.minmax_idx_expr) { - minmax_idx_condition.emplace(query_info_for_index, context, data.minmax_idx_columns, data.minmax_idx_expr); - partition_pruner.emplace(metadata_snapshot->getPartitionKey(), query_info_for_index, context, false /* strict */); + minmax_idx_condition.emplace(query_info, context, data.minmax_idx_columns, data.minmax_idx_expr); + partition_pruner.emplace(metadata_snapshot->getPartitionKey(), query_info, context, false /* strict */); if (settings.force_index_by_date && (minmax_idx_condition->alwaysUnknownOrTrue() && partition_pruner->isUseless())) { @@ -295,6 +286,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( RelativeSize relative_sample_offset = 0; const auto & select = query_info.query->as(); + auto select_sample_size = select.sampleSize(); auto select_sample_offset = select.sampleOffset(); @@ -567,7 +559,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( for (const auto & index : metadata_snapshot->getSecondaryIndices()) { auto index_helper = MergeTreeIndexFactory::instance().get(index); - auto condition = index_helper->createIndexCondition(query_info_for_index, context); + auto condition = index_helper->createIndexCondition(query_info, context); if (!condition->alwaysUnknownOrTrue()) useful_indices.emplace_back(index_helper, condition); } diff --git a/src/Storages/ReadInOrderOptimizer.cpp b/src/Storages/ReadInOrderOptimizer.cpp index 3613fbff5cf..2961c8a420c 100644 --- a/src/Storages/ReadInOrderOptimizer.cpp +++ b/src/Storages/ReadInOrderOptimizer.cpp @@ -1,7 +1,9 @@ #include #include #include +#include #include +#include #include namespace DB @@ -26,11 +28,10 @@ ReadInOrderOptimizer::ReadInOrderOptimizer( /// Do not analyze joined columns. /// They may have aliases and come to description as is. /// We can mismatch them with order key columns at stage of fetching columns. - for (const auto & elem : syntax_result->array_join_result_to_source) - forbidden_columns.insert(elem.first); + forbidden_columns = syntax_result->getArrayJoinSourceNameSet(); } -InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr & metadata_snapshot) const +InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr & metadata_snapshot, const Context & context) const { Names sorting_key_columns = metadata_snapshot->getSortingKeyColumns(); if (!metadata_snapshot->hasSortingKey()) @@ -40,6 +41,7 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr & int read_direction = required_sort_description.at(0).direction; size_t prefix_size = std::min(required_sort_description.size(), sorting_key_columns.size()); + auto aliase_columns = metadata_snapshot->getColumns().getAliases(); for (size_t i = 0; i < prefix_size; ++i) { @@ -48,60 +50,92 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr & /// Optimize in case of exact match with order key element /// or in some simple cases when order key element is wrapped into monotonic function. - int current_direction = required_sort_description[i].direction; - if (required_sort_description[i].column_name == sorting_key_columns[i] && current_direction == read_direction) - order_key_prefix_descr.push_back(required_sort_description[i]); - else + auto apply_order_judge = [&] (const ExpressionActions::Actions & actions, const String & sort_column) { - /// Allow only one simple monotonic functions with one argument - bool found_function = false; - for (const auto & action : elements_actions[i]->getActions()) + int current_direction = required_sort_description[i].direction; + /// For the path: order by (sort_column, ...) + if (sort_column == sorting_key_columns[i] && current_direction == read_direction) { - if (action.node->type != ActionsDAG::ActionType::FUNCTION) - continue; - - if (found_function) - { - current_direction = 0; - break; - } - else - found_function = true; - - if (action.node->children.size() != 1 || action.node->children.at(0)->result_name != sorting_key_columns[i]) - { - current_direction = 0; - break; - } - - const auto & func = *action.node->function_base; - if (!func.hasInformationAboutMonotonicity()) - { - current_direction = 0; - break; - } - - auto monotonicity = func.getMonotonicityForRange(*func.getArgumentTypes().at(0), {}, {}); - if (!monotonicity.is_monotonic) - { - current_direction = 0; - break; - } - else if (!monotonicity.is_positive) - current_direction *= -1; + return true; } + /// For the path: order by (function(sort_column), ...) + /// Allow only one simple monotonic functions with one argument + /// Why not allow multi monotonic functions? + else + { + bool found_function = false; - if (!found_function) - current_direction = 0; + for (const auto & action : actions) + { + if (action.node->type != ActionsDAG::ActionType::FUNCTION) + { + continue; + } - if (!current_direction || (i > 0 && current_direction != read_direction)) - break; + if (found_function) + { + current_direction = 0; + break; + } + else + found_function = true; - if (i == 0) - read_direction = current_direction; + if (action.node->children.size() != 1 || action.node->children.at(0)->result_name != sorting_key_columns[i]) + { + current_direction = 0; + break; + } - order_key_prefix_descr.push_back(required_sort_description[i]); + const auto & func = *action.node->function_base; + if (!func.hasInformationAboutMonotonicity()) + { + current_direction = 0; + break; + } + + auto monotonicity = func.getMonotonicityForRange(*func.getArgumentTypes().at(0), {}, {}); + if (!monotonicity.is_monotonic) + { + current_direction = 0; + break; + } + else if (!monotonicity.is_positive) + current_direction *= -1; + } + + if (!found_function) + current_direction = 0; + + if (!current_direction || (i > 0 && current_direction != read_direction)) + return false; + + if (i == 0) + read_direction = current_direction; + + return true; + } + }; + + const auto & actions = elements_actions[i]->getActions(); + bool ok; + /// check if it's alias column + /// currently we only support alias column without any function wrapper + if (context.getSettingsRef().optimize_alias_column_prediction && aliase_columns.contains(required_sort_description[i].column_name)) + { + auto column_expr = metadata_snapshot->getColumns().get(required_sort_description[i].column_name).default_desc.expression->clone(); + replaceAliasColumnsInQuery(column_expr, metadata_snapshot->getColumns(), forbidden_columns, context); + + auto syntax_analyzer_result = TreeRewriter(context).analyze(column_expr, metadata_snapshot->getColumns().getAll()); + const auto expression_analyzer = ExpressionAnalyzer(column_expr, syntax_analyzer_result, context).getActions(true); + const auto & alias_actions = expression_analyzer->getActions(); + + ok = apply_order_judge(alias_actions, column_expr->getColumnName()); } + else + ok = apply_order_judge(actions, required_sort_description[i].column_name); + + if (ok) + order_key_prefix_descr.push_back(required_sort_description[i]); } if (order_key_prefix_descr.empty()) diff --git a/src/Storages/ReadInOrderOptimizer.h b/src/Storages/ReadInOrderOptimizer.h index 7a268189222..3676f4cc88c 100644 --- a/src/Storages/ReadInOrderOptimizer.h +++ b/src/Storages/ReadInOrderOptimizer.h @@ -12,6 +12,8 @@ namespace DB * common prefix, which is needed for * performing reading in order of PK. */ +class Context; + class ReadInOrderOptimizer { public: @@ -20,7 +22,7 @@ public: const SortDescription & required_sort_description, const TreeRewriterResultPtr & syntax_result); - InputOrderInfoPtr getInputOrder(const StorageMetadataPtr & metadata_snapshot) const; + InputOrderInfoPtr getInputOrder(const StorageMetadataPtr & metadata_snapshot, const Context & context) const; private: /// Actions for every element of order expression to analyze functions for monotonicity @@ -28,5 +30,4 @@ private: NameSet forbidden_columns; SortDescription required_sort_description; }; - } diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index a2c2325bcc1..657f1eccfe5 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -195,7 +195,7 @@ void StorageBuffer::read( if (dst_has_same_structure) { if (query_info.order_optimizer) - query_info.input_order_info = query_info.order_optimizer->getInputOrder(destination_metadata_snapshot); + query_info.input_order_info = query_info.order_optimizer->getInputOrder(destination_metadata_snapshot, context); /// The destination table has the same structure of the requested columns and we can simply read blocks from there. destination->read( diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index fba4adbbf96..a990ebb1e4b 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -136,7 +136,7 @@ void StorageMaterializedView::read( auto metadata_snapshot = storage->getInMemoryMetadataPtr(); if (query_info.order_optimizer) - query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot); + query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, context); storage->read(query_plan, column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 3e6f99878a6..adb8af7dbec 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -211,7 +211,7 @@ Pipe StorageMerge::read( { auto storage_ptr = std::get<0>(*it); auto storage_metadata_snapshot = storage_ptr->getInMemoryMetadataPtr(); - auto current_info = query_info.order_optimizer->getInputOrder(storage_metadata_snapshot); + auto current_info = query_info.order_optimizer->getInputOrder(storage_metadata_snapshot, context); if (it == selected_tables.begin()) input_sorting_info = current_info; else if (!current_info || (input_sorting_info && *current_info != *input_sorting_info)) diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.reference b/tests/queries/0_stateless/01576_alias_column_rewrite.reference index 18600e6829a..7b362d78f91 100644 --- a/tests/queries/0_stateless/01576_alias_column_rewrite.reference +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.reference @@ -15,6 +15,20 @@ alias2alias 1 1 1 -second_index +array-join +1 +0 0 +lambda +1 +optimize_read_in_order +2020-01-01 +optimize_aggregation_in_order +2020-01-01 10 +2020-01-02 10 +2020-01-03 10 +2020-01-01 10 +2020-01-02 10 +2020-01-03 10 +second-index 1 1 diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.sql b/tests/queries/0_stateless/01576_alias_column_rewrite.sql index c91bf4f6d29..610687210ae 100644 --- a/tests/queries/0_stateless/01576_alias_column_rewrite.sql +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.sql @@ -1,5 +1,5 @@ -DROP TABLE IF EXISTS table_with_alias_column; -CREATE TABLE table_with_alias_column +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table ( `timestamp` DateTime, `value` UInt64, @@ -13,22 +13,21 @@ PARTITION BY toYYYYMMDD(timestamp) ORDER BY timestamp SETTINGS index_granularity = 1; -INSERT INTO table_with_alias_column(timestamp, value) SELECT toDateTime('2020-01-01 12:00:00'), 1 FROM numbers(10); - -INSERT INTO table_with_alias_column(timestamp, value) SELECT toDateTime('2020-01-02 12:00:00'), 1 FROM numbers(10); - -INSERT INTO table_with_alias_column(timestamp, value) SELECT toDateTime('2020-01-03 12:00:00'), 1 FROM numbers(10); - +INSERT INTO test_table(timestamp, value) SELECT toDateTime('2020-01-01 12:00:00'), 1 FROM numbers(10); +INSERT INTO test_table(timestamp, value) SELECT toDateTime('2020-01-02 12:00:00'), 1 FROM numbers(10); +INSERT INTO test_table(timestamp, value) SELECT toDateTime('2020-01-03 12:00:00'), 1 FROM numbers(10); +set optimize_alias_column_prediction = 1; SELECT 'test-partition-prune'; -SELECT COUNT() = 10 FROM table_with_alias_column WHERE day = '2020-01-01' SETTINGS max_rows_to_read = 10; -SELECT t = '2020-01-03' FROM (SELECT day as t FROM table_with_alias_column WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 10); -SELECT COUNT() = 10 FROM table_with_alias_column WHERE day = '2020-01-01' UNION ALL select 1 from numbers(1) SETTINGS max_rows_to_read = 11; -SELECT COUNT() = 0 FROM (SELECT toDate('2019-01-01') as day, day as t FROM table_with_alias_column PREWHERE t = '2020-01-03' WHERE t = '2020-01-03' GROUP BY t ); +SELECT COUNT() = 10 FROM test_table WHERE day = '2020-01-01' SETTINGS max_rows_to_read = 10; +SELECT t = '2020-01-03' FROM (SELECT day AS t FROM test_table WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 10); +SELECT COUNT() = 10 FROM test_table WHERE day = '2020-01-01' UNION ALL SELECT 1 FROM numbers(1) SETTINGS max_rows_to_read = 11; +SELECT COUNT() = 0 FROM (SELECT toDate('2019-01-01') AS day, day AS t FROM test_table PREWHERE t = '2020-01-03' WHERE t = '2020-01-03' GROUP BY t ); + + SELECT 'test-join'; - SELECT day = '2020-01-03' FROM ( @@ -38,39 +37,61 @@ FROM INNER JOIN ( SELECT day - FROM table_with_alias_column + FROM test_table WHERE day = '2020-01-03' - GROUP BY day SETTINGS max_rows_to_read = 11 -) AS b ON a.day = b.day; + GROUP BY day +) AS b ON a.day = b.day SETTINGS max_rows_to_read = 11; SELECT day = '2020-01-01' FROM ( SELECT day - FROM table_with_alias_column + FROM test_table WHERE day = '2020-01-01' - GROUP BY day SETTINGS max_rows_to_read = 11 + GROUP BY day ) AS a INNER JOIN ( SELECT toDate('2020-01-01') AS day FROM numbers(1) -) AS b ON a.day = b.day; +) AS b ON a.day = b.day SETTINGS max_rows_to_read = 11; SELECT 'alias2alias'; -SELECT COUNT() = 10 FROM table_with_alias_column WHERE day1 = '2020-01-02' SETTINGS max_rows_to_read = 10; -SELECT t = '2020-01-03' FROM (SELECT day1 as t FROM table_with_alias_column WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 10); -SELECT t = '2020-01-03' FROM (SELECT day2 as t FROM table_with_alias_column WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 10); -SELECT COUNT() = 10 FROM table_with_alias_column WHERE day1 = '2020-01-03' UNION ALL select 1 from numbers(1) SETTINGS max_rows_to_read = 11; -SELECT COUNT() = 0 FROM (SELECT toDate('2019-01-01') as day1, day1 as t FROM table_with_alias_column PREWHERE t = '2020-01-03' WHERE t = '2020-01-03' GROUP BY t ); -SELECT day1 = '2020-01-04' FROM table_with_alias_column PREWHERE day1 = '2020-01-04' WHERE day1 = '2020-01-04' GROUP BY day1 SETTINGS max_rows_to_read = 10; - -DROP TABLE table_with_alias_column; +SELECT COUNT() = 10 FROM test_table WHERE day1 = '2020-01-02' SETTINGS max_rows_to_read = 10; +SELECT t = '2020-01-03' FROM (SELECT day1 AS t FROM test_table WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 10); +SELECT t = '2020-01-03' FROM (SELECT day2 AS t FROM test_table WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 10); +SELECT COUNT() = 10 FROM test_table WHERE day1 = '2020-01-03' UNION ALL SELECT 1 FROM numbers(1) SETTINGS max_rows_to_read = 11; +SELECT COUNT() = 0 FROM (SELECT toDate('2019-01-01') AS day1, day1 AS t FROM test_table PREWHERE t = '2020-01-03' WHERE t = '2020-01-03' GROUP BY t ); +SELECT day1 = '2020-01-04' FROM test_table PREWHERE day1 = '2020-01-04' WHERE day1 = '2020-01-04' GROUP BY day1 SETTINGS max_rows_to_read = 10; -SELECT 'second_index'; +ALTER TABLE test_table add column array Array(UInt8) default [1, 2, 3]; +ALTER TABLE test_table add column struct.key Array(UInt8) default [2, 4, 6], add column struct.value Array(UInt8) alias array; +SELECT 'array-join'; +set max_rows_to_read = 10; +SELECT count() == 10 FROM test_table WHERE day = '2020-01-01'; +SELECT sum(struct.key) == 30, sum(struct.value) == 30 FROM (SELECT struct.key, struct.value FROM test_table array join struct WHERE day = '2020-01-01'); + +SELECT 'lambda'; +-- lambda parameters in filter should not be rewrite +SELECT count() == 10 FROM test_table WHERE arrayMap((day) -> day + 1, [1,2,3]) [1] = 2 AND day = '2020-01-03'; + +set max_rows_to_read = 0; +-- how to test it? currently just check logs, eg: 00940_order_by_read_in_order +SELECT 'optimize_read_in_order'; +SET optimize_read_in_order = 1; +SELECT day AS s FROM test_table ORDER BY s LIMIT 1; + +SELECT 'optimize_aggregation_in_order'; +SET optimize_aggregation_in_order = 1; +SELECT day, count() AS s FROM test_table GROUP BY day; +SELECT toDate(timestamp), count() AS s FROM test_table GROUP BY toDate(timestamp); + +DROP TABLE test_table; + +SELECT 'second-index'; DROP TABLE IF EXISTS test_index; CREATE TABLE test_index ( @@ -84,6 +105,8 @@ PRIMARY KEY tuple() ORDER BY key_string SETTINGS index_granularity = 1; INSERT INTO test_index SELECT * FROM numbers(10); -SELECT COUNT() == 1 FROM test_index WHERE key_uint32 = 1 SETTINGS max_rows_to_read = 10; -SELECT COUNT() == 1 FROM test_index WHERE toUInt32(key_string) = 1 SETTINGS max_rows_to_read = 10; +set max_rows_to_read = 1; +SELECT COUNT() == 1 FROM test_index WHERE key_uint32 = 1; +SELECT COUNT() == 1 FROM test_index WHERE toUInt32(key_string) = 1; DROP TABLE IF EXISTS test_index; + From 9ff39da5d2dbb0dd3bf32f04e21c4486f2d67305 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Sun, 13 Dec 2020 09:55:56 +0800 Subject: [PATCH 12/78] fixes build --- src/Interpreters/ColumnAliasesVisitor.h | 2 +- src/Storages/ReadInOrderOptimizer.cpp | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/ColumnAliasesVisitor.h b/src/Interpreters/ColumnAliasesVisitor.h index dcdf48d19d6..0bd4bd9fb9d 100644 --- a/src/Interpreters/ColumnAliasesVisitor.h +++ b/src/Interpreters/ColumnAliasesVisitor.h @@ -40,7 +40,7 @@ public: static bool needChildVisit(const ASTPtr & node, const ASTPtr & child); private: - static void visit(ASTIdentifier & node, ASTPtr & ast, Data & data); + static void visit(ASTIdentifier & node, ASTPtr & ast, Data & data); static void visit(ASTFunction & node, ASTPtr & ast, Data & data); }; diff --git a/src/Storages/ReadInOrderOptimizer.cpp b/src/Storages/ReadInOrderOptimizer.cpp index 259f5d81fdd..e718583e7b9 100644 --- a/src/Storages/ReadInOrderOptimizer.cpp +++ b/src/Storages/ReadInOrderOptimizer.cpp @@ -1,14 +1,13 @@ #include -#include -#include -#include -#include -#include #include -#include +#include +#include #include #include +#include +#include +#include namespace DB { From d2209a14605ef82b10e26ff33a6e17f3ba36e9af Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Sun, 13 Dec 2020 17:33:02 +0800 Subject: [PATCH 13/78] trivial fix --- src/Interpreters/ColumnAliasesVisitor.cpp | 2 +- src/Interpreters/InterpreterSelectQuery.cpp | 3 ++- src/Interpreters/TreeRewriter.cpp | 6 +++--- src/Parsers/ASTSelectQuery.h | 2 -- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/ColumnAliasesVisitor.cpp b/src/Interpreters/ColumnAliasesVisitor.cpp index 43462f2f9bf..c2a2ebfac1e 100644 --- a/src/Interpreters/ColumnAliasesVisitor.cpp +++ b/src/Interpreters/ColumnAliasesVisitor.cpp @@ -80,7 +80,7 @@ void ColumnAliasesMatcher::visit(ASTIdentifier & node, ASTPtr & ast, Data & data { if (auto column_name = IdentifierSemantic::getColumnName(node)) { - if (data.forbidden_columns.count(*column_name) || data.private_aliases.count(*column_name)) + if (data.forbidden_columns.count(*column_name) || data.private_aliases.count(*column_name) || !data.columns.has(*column_name)) return; const auto & col = data.columns.get(*column_name); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index c385161ec54..7c04d97d78a 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1295,9 +1295,10 @@ void InterpreterSelectQuery::executeFetchColumns( { auto column_decl = storage_columns.get(column); column_expr = column_default->expression->clone(); - // recursive visit for alias to alias replaceAliasColumnsInQuery(column_expr, metadata_snapshot->getColumns(), syntax_analyzer_result->getArrayJoinSourceNameSet(), *context); + + column_expr = addTypeConversionToAST(std::move(column_expr), column_decl.type->getName(), metadata_snapshot->getColumns().getAll(), *context); column_expr = setAlias(column_expr, column); } else diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 54991834495..b0c5ee12e4d 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -576,8 +576,8 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select if (std::find(partition_source_columns.begin(), partition_source_columns.end(), required_column) == partition_source_columns.end()) { - optimize_trivial_count = false; - break; + optimize_trivial_count = false; + break; } } } @@ -779,7 +779,7 @@ TreeRewriterResultPtr TreeRewriter::analyze( else assertNoAggregates(query, "in wrong place"); - result.collectUsedColumns(query ,false); + result.collectUsedColumns(query, false); return std::make_shared(result); } diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index 5fc3ded0d58..9690b51ff2f 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -50,8 +50,6 @@ public: ASTPtr & refPrewhere() { return getExpression(Expression::PREWHERE); } ASTPtr & refWhere() { return getExpression(Expression::WHERE); } ASTPtr & refHaving() { return getExpression(Expression::HAVING); } - ASTPtr & refOrderBy() { return getExpression(Expression::ORDER_BY); } - ASTPtr & refGroupBy() { return getExpression(Expression::GROUP_BY); } const ASTPtr with() const { return getExpression(Expression::WITH); } const ASTPtr select() const { return getExpression(Expression::SELECT); } From 041f5c8843e836e08b63047d4ea2cea987bcbc5c Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Sun, 13 Dec 2020 19:30:53 +0800 Subject: [PATCH 14/78] add comments --- src/Interpreters/ColumnAliasesVisitor.h | 31 +++++++++++++++++++++-- src/Interpreters/addTypeConversionToAST.h | 4 +-- src/Storages/ReadInOrderOptimizer.cpp | 3 ++- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/ColumnAliasesVisitor.h b/src/Interpreters/ColumnAliasesVisitor.h index 0bd4bd9fb9d..b886b40af20 100644 --- a/src/Interpreters/ColumnAliasesVisitor.h +++ b/src/Interpreters/ColumnAliasesVisitor.h @@ -14,8 +14,35 @@ class ASTFunction; class ASTIdentifier; using DataTypePtr = std::shared_ptr; -/// Visits AST node to rewrite alias columns in filter query -/// Currently works only in `KeyCondition` of select query and `required_columns` in `InterpreterSelectQuery.cpp` +/// Visits AST node to rewrite alias columns in query +/// Currently works only 3 kind ways below + +/// For example: +// CREATE TABLE test_table +// ( +// `timestamp` DateTime, +// `value` UInt64, +// `day` Date ALIAS toDate(timestamp), +// `day1` Date ALIAS day + 1, +// `day2` Date ALIAS day1 + 1, +// `time` DateTime ALIAS timestamp +// )ENGINE = MergeTree +// PARTITION BY toYYYYMMDD(timestamp) +// ORDER BY timestamp SETTINGS index_granularity = 1; + +/// 1. Rewrite the filters in query when enable optimize_alias_column_prediction +/// this could help with `optimize_trivial_count`, Partition Prune in `KeyCondition` and secondary indexes. +/// eg: select max(value) from test_table where day2 = today(), filters will be: ((toDate(timestamp) + 1) + 1) = today() . + +/// 2. Alias on alias for `required_columns` extracted in `InterpreterSelectQuery.cpp`, it could help get all dependent physical columns for query. +/// eg: select day2 from test_table. `required_columns` can got require columns from the tempory rewrited AST `((toDate(timestamp) + 1) + 1)`. + +/// 3. Help with `optimize_aggregation_in_order` and `optimize_read_in_order` in `ReadInOrderOptimizer.cpp`: +/// For queries with alias columns in `orderBy` and `groupBy`, these ASTs will not change. +/// But we generate tempory asts and generate tempory Actions to get the `InputOrderInfo` +/// eg: select day1 from test_table order by day1; + + class ColumnAliasesMatcher { public: diff --git a/src/Interpreters/addTypeConversionToAST.h b/src/Interpreters/addTypeConversionToAST.h index 61334403fe2..16fa98f6e0c 100644 --- a/src/Interpreters/addTypeConversionToAST.h +++ b/src/Interpreters/addTypeConversionToAST.h @@ -2,12 +2,12 @@ #include #include -#include namespace DB { - +class Context; +class NamesAndTypesList; /// It will produce an expression with CAST to get an AST with the required type. ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name); diff --git a/src/Storages/ReadInOrderOptimizer.cpp b/src/Storages/ReadInOrderOptimizer.cpp index e718583e7b9..6d65d73676e 100644 --- a/src/Storages/ReadInOrderOptimizer.cpp +++ b/src/Storages/ReadInOrderOptimizer.cpp @@ -139,11 +139,12 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr & if (ok) order_key_prefix_descr.push_back(required_sort_description[i]); + else + break; } if (order_key_prefix_descr.empty()) return {}; - return std::make_shared(std::move(order_key_prefix_descr), read_direction); } From d72d8ee7d8e29ef29c10ed9d08b3326ccbd002fc Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Tue, 15 Dec 2020 16:35:19 +0800 Subject: [PATCH 15/78] apply review advices --- src/Interpreters/ColumnAliasesVisitor.cpp | 8 +-- src/Interpreters/ColumnAliasesVisitor.h | 5 ++ src/Interpreters/TreeRewriter.cpp | 2 +- src/Storages/ReadInOrderOptimizer.cpp | 2 + .../01576_alias_column_rewrite.reference | 54 ++++++++++++++++--- .../01576_alias_column_rewrite.sql | 17 +++--- 6 files changed, 71 insertions(+), 17 deletions(-) diff --git a/src/Interpreters/ColumnAliasesVisitor.cpp b/src/Interpreters/ColumnAliasesVisitor.cpp index c2a2ebfac1e..24be7be8188 100644 --- a/src/Interpreters/ColumnAliasesVisitor.cpp +++ b/src/Interpreters/ColumnAliasesVisitor.cpp @@ -33,7 +33,6 @@ bool ColumnAliasesMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &) void ColumnAliasesMatcher::visit(ASTPtr & ast, Data & data) { - auto aa = queryToString(ast); // If it's select query, only replace filters. if (auto * query = ast->as()) { @@ -64,11 +63,14 @@ void ColumnAliasesMatcher::visit(ASTFunction & node, ASTPtr & /*ast*/, Data & da if (node.name == "lambda") { Names local_aliases; - for (const auto & name : RequiredSourceColumnsMatcher::extractNamesFromLambda(node)) + auto names_from_lambda = RequiredSourceColumnsMatcher::extractNamesFromLambda(node); + for (const auto & name : names_from_lambda) + { if (data.private_aliases.insert(name).second) { local_aliases.push_back(name); } + } /// visit child with masked local aliases Visitor(data).visit(node.arguments->children[1]); for (const auto & name : local_aliases) @@ -88,7 +90,7 @@ void ColumnAliasesMatcher::visit(ASTIdentifier & node, ASTPtr & ast, Data & data { ast = addTypeConversionToAST(col.default_desc.expression->clone(), col.type->getName(), data.columns.getAll(), data.context); auto str = queryToString(ast); - //revisit ast to track recursive alias columns + // revisit ast to track recursive alias columns Visitor(data).visit(ast); } } diff --git a/src/Interpreters/ColumnAliasesVisitor.h b/src/Interpreters/ColumnAliasesVisitor.h index b886b40af20..f4e76eaeb10 100644 --- a/src/Interpreters/ColumnAliasesVisitor.h +++ b/src/Interpreters/ColumnAliasesVisitor.h @@ -51,9 +51,14 @@ public: struct Data { const ColumnsDescription & columns; + + /// forbidden_columns are from array join, we can't rewrite alias columns involved in array join. + /// Do not analyze joined columns. + /// They may have aliases and come to description as is. const NameSet & forbidden_columns; const Context & context; + /// private_aliases are from lambda, so these are local names. NameSet private_aliases; Data(const ColumnsDescription & columns_, const NameSet & forbidden_columns_, const Context & context_) diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index b0c5ee12e4d..2f731d351d9 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -725,7 +725,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( result.analyzed_join->table_join); collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases); - /// rewrite filters for select query, must after getArrayJoinedColumns + /// rewrite filters for select query, must goes after getArrayJoinedColumns if (settings.optimize_alias_column_prediction && result.metadata_snapshot) { replaceAliasColumnsInQuery(query, result.metadata_snapshot->getColumns(), result.getArrayJoinSourceNameSet(), context); diff --git a/src/Storages/ReadInOrderOptimizer.cpp b/src/Storages/ReadInOrderOptimizer.cpp index 6d65d73676e..eb5c0f98dca 100644 --- a/src/Storages/ReadInOrderOptimizer.cpp +++ b/src/Storages/ReadInOrderOptimizer.cpp @@ -123,6 +123,8 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr & bool ok; /// check if it's alias column /// currently we only support alias column without any function wrapper + /// ie: `order by aliased_column` can have this optimization, but `order by function(aliased_column)` can not. + /// This suits most cases. if (context.getSettingsRef().optimize_alias_column_prediction && aliase_columns.contains(required_sort_description[i].column_name)) { auto column_expr = metadata_snapshot->getColumns().get(required_sort_description[i].column_name).default_desc.expression->clone(); diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.reference b/tests/queries/0_stateless/01576_alias_column_rewrite.reference index 7b362d78f91..b162effcd48 100644 --- a/tests/queries/0_stateless/01576_alias_column_rewrite.reference +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.reference @@ -21,14 +21,54 @@ array-join lambda 1 optimize_read_in_order -2020-01-01 +Expression (Projection) + Limit (preliminary LIMIT) + MergingSorted (Merge sorted streams for ORDER BY) + MergeSorting (Merge sorted blocks for ORDER BY) + PartialSorting (Sort each block for ORDER BY) + Expression (Before ORDER BY and SELECT + Add table aliases) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + ReadFromStorage (MergeTree) +Expression (Projection) + Limit (preliminary LIMIT) + FinishSorting + Expression (Before ORDER BY and SELECT + Add table aliases) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Union + ReadFromStorage (MergeTree with order) + ReadFromStorage (MergeTree with order) + ReadFromStorage (MergeTree with order) +Expression (Projection) + Limit (preliminary LIMIT) + FinishSorting + Expression (Before ORDER BY and SELECT) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Union + ReadFromStorage (MergeTree with order) + ReadFromStorage (MergeTree with order) + ReadFromStorage (MergeTree with order) optimize_aggregation_in_order -2020-01-01 10 -2020-01-02 10 -2020-01-03 10 -2020-01-01 10 -2020-01-02 10 -2020-01-03 10 +Expression (Projection + Before ORDER BY and SELECT) + Aggregating + Expression (Before GROUP BY + Add table aliases) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + ReadFromStorage (MergeTree) +Expression (Projection + Before ORDER BY and SELECT) + Aggregating + Expression (Before GROUP BY + Add table aliases) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Union + ReadFromStorage (MergeTree with order) + ReadFromStorage (MergeTree with order) + ReadFromStorage (MergeTree with order) +Expression (Projection + Before ORDER BY and SELECT) + Aggregating + Expression (Before GROUP BY) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Union + ReadFromStorage (MergeTree with order) + ReadFromStorage (MergeTree with order) + ReadFromStorage (MergeTree with order) second-index 1 1 diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.sql b/tests/queries/0_stateless/01576_alias_column_rewrite.sql index 610687210ae..0fcdf34b0ee 100644 --- a/tests/queries/0_stateless/01576_alias_column_rewrite.sql +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.sql @@ -69,28 +69,33 @@ SELECT day1 = '2020-01-04' FROM test_table PREWHERE day1 = '2020-01-04' WHERE d ALTER TABLE test_table add column array Array(UInt8) default [1, 2, 3]; ALTER TABLE test_table add column struct.key Array(UInt8) default [2, 4, 6], add column struct.value Array(UInt8) alias array; + SELECT 'array-join'; set max_rows_to_read = 10; SELECT count() == 10 FROM test_table WHERE day = '2020-01-01'; SELECT sum(struct.key) == 30, sum(struct.value) == 30 FROM (SELECT struct.key, struct.value FROM test_table array join struct WHERE day = '2020-01-01'); + SELECT 'lambda'; -- lambda parameters in filter should not be rewrite SELECT count() == 10 FROM test_table WHERE arrayMap((day) -> day + 1, [1,2,3]) [1] = 2 AND day = '2020-01-03'; set max_rows_to_read = 0; --- how to test it? currently just check logs, eg: 00940_order_by_read_in_order + SELECT 'optimize_read_in_order'; -SET optimize_read_in_order = 1; -SELECT day AS s FROM test_table ORDER BY s LIMIT 1; +EXPLAIN SELECT day AS s FROM test_table ORDER BY s LIMIT 1 SETTINGS optimize_read_in_order = 0; +EXPLAIN SELECT day AS s FROM test_table ORDER BY s LIMIT 1 SETTINGS optimize_read_in_order = 1; +EXPLAIN SELECT toDate(timestamp) AS s FROM test_table ORDER BY toDate(timestamp) LIMIT 1 SETTINGS optimize_read_in_order = 1; + SELECT 'optimize_aggregation_in_order'; -SET optimize_aggregation_in_order = 1; -SELECT day, count() AS s FROM test_table GROUP BY day; -SELECT toDate(timestamp), count() AS s FROM test_table GROUP BY toDate(timestamp); +EXPLAIN SELECT day, count() AS s FROM test_table GROUP BY day SETTINGS optimize_aggregation_in_order = 0; +EXPLAIN SELECT day, count() AS s FROM test_table GROUP BY day SETTINGS optimize_aggregation_in_order = 1; +EXPLAIN SELECT toDate(timestamp), count() AS s FROM test_table GROUP BY toDate(timestamp) SETTINGS optimize_aggregation_in_order = 1; DROP TABLE test_table; + SELECT 'second-index'; DROP TABLE IF EXISTS test_index; CREATE TABLE test_index From 24a190a461d7e0e9a013924bb4e399cdb829ffb5 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Wed, 16 Dec 2020 10:46:47 +0000 Subject: [PATCH 16/78] update setting description --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index bcc45f8c5b4..d92f6b573f5 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -367,7 +367,7 @@ class IColumn; M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \ M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \ M(Bool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \ - M(Bool, optimize_alias_column_prediction, true, "If it is set to true, it will rewrite the filter query with aliased columns, this could help with partition prune and secondary indexes. And also help with optimize_aggregation_in_order and optimize_read_in_order", 0) \ + M(Bool, optimize_alias_column_prediction, true, "If it is set to true, it will respect aliases in WHERE/GROUP BY/ORDER BY, that will help with partition pruning/secondary indexes/optimize_aggregation_in_order/optimize_read_in_order/optimize_trivial_count", 0) \ M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \ M(Bool, optimize_move_functions_out_of_any, true, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \ M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \ From 84807e9791b6a720632646ec5cd4f63ea9a10aff Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Thu, 17 Dec 2020 02:17:59 +0000 Subject: [PATCH 17/78] rename settings --- src/Core/Settings.h | 2 +- src/Interpreters/ColumnAliasesVisitor.h | 2 +- src/Interpreters/TreeRewriter.cpp | 2 +- src/Storages/ReadInOrderOptimizer.cpp | 2 +- tests/queries/0_stateless/01576_alias_column_rewrite.sql | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d92f6b573f5..b3276f29113 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -367,7 +367,7 @@ class IColumn; M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \ M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \ M(Bool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \ - M(Bool, optimize_alias_column_prediction, true, "If it is set to true, it will respect aliases in WHERE/GROUP BY/ORDER BY, that will help with partition pruning/secondary indexes/optimize_aggregation_in_order/optimize_read_in_order/optimize_trivial_count", 0) \ + M(Bool, optimize_respect_aliases, true, "If it is set to true, it will respect aliases in WHERE/GROUP BY/ORDER BY, that will help with partition pruning/secondary indexes/optimize_aggregation_in_order/optimize_read_in_order/optimize_trivial_count", 0) \ M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \ M(Bool, optimize_move_functions_out_of_any, true, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \ M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \ diff --git a/src/Interpreters/ColumnAliasesVisitor.h b/src/Interpreters/ColumnAliasesVisitor.h index f4e76eaeb10..a9cf8142321 100644 --- a/src/Interpreters/ColumnAliasesVisitor.h +++ b/src/Interpreters/ColumnAliasesVisitor.h @@ -30,7 +30,7 @@ using DataTypePtr = std::shared_ptr; // PARTITION BY toYYYYMMDD(timestamp) // ORDER BY timestamp SETTINGS index_granularity = 1; -/// 1. Rewrite the filters in query when enable optimize_alias_column_prediction +/// 1. Rewrite the filters in query when enable optimize_respect_aliases /// this could help with `optimize_trivial_count`, Partition Prune in `KeyCondition` and secondary indexes. /// eg: select max(value) from test_table where day2 = today(), filters will be: ((toDate(timestamp) + 1) + 1) = today() . diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 2f731d351d9..945144335e9 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -726,7 +726,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases); /// rewrite filters for select query, must goes after getArrayJoinedColumns - if (settings.optimize_alias_column_prediction && result.metadata_snapshot) + if (settings.optimize_respect_aliases && result.metadata_snapshot) { replaceAliasColumnsInQuery(query, result.metadata_snapshot->getColumns(), result.getArrayJoinSourceNameSet(), context); } diff --git a/src/Storages/ReadInOrderOptimizer.cpp b/src/Storages/ReadInOrderOptimizer.cpp index eb5c0f98dca..2b751329208 100644 --- a/src/Storages/ReadInOrderOptimizer.cpp +++ b/src/Storages/ReadInOrderOptimizer.cpp @@ -125,7 +125,7 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr & /// currently we only support alias column without any function wrapper /// ie: `order by aliased_column` can have this optimization, but `order by function(aliased_column)` can not. /// This suits most cases. - if (context.getSettingsRef().optimize_alias_column_prediction && aliase_columns.contains(required_sort_description[i].column_name)) + if (context.getSettingsRef().optimize_respect_aliases && aliase_columns.contains(required_sort_description[i].column_name)) { auto column_expr = metadata_snapshot->getColumns().get(required_sort_description[i].column_name).default_desc.expression->clone(); replaceAliasColumnsInQuery(column_expr, metadata_snapshot->getColumns(), forbidden_columns, context); diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.sql b/tests/queries/0_stateless/01576_alias_column_rewrite.sql index 0fcdf34b0ee..fabe20010af 100644 --- a/tests/queries/0_stateless/01576_alias_column_rewrite.sql +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.sql @@ -17,7 +17,7 @@ INSERT INTO test_table(timestamp, value) SELECT toDateTime('2020-01-01 12:00:00' INSERT INTO test_table(timestamp, value) SELECT toDateTime('2020-01-02 12:00:00'), 1 FROM numbers(10); INSERT INTO test_table(timestamp, value) SELECT toDateTime('2020-01-03 12:00:00'), 1 FROM numbers(10); -set optimize_alias_column_prediction = 1; +set optimize_respect_aliases = 1; SELECT 'test-partition-prune'; SELECT COUNT() = 10 FROM test_table WHERE day = '2020-01-01' SETTINGS max_rows_to_read = 10; From cfbb32ec87316721e03b30b89f626a63b701569f Mon Sep 17 00:00:00 2001 From: Olga Revyakina Date: Sun, 20 Dec 2020 18:18:47 +0300 Subject: [PATCH 18/78] Upd - about modifiers --- docs/en/sql-reference/statements/insert-into.md | 12 ++++-------- docs/ru/sql-reference/statements/insert-into.md | 4 +--- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md index e55c10a5211..c38f8bc4641 100644 --- a/docs/en/sql-reference/statements/insert-into.md +++ b/docs/en/sql-reference/statements/insert-into.md @@ -13,9 +13,7 @@ Basic query format: INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` -You can specify a list of columns to insert using the `(c1, c2, c3)` or `(COLUMNS(c1, c2, c3))` syntax. - -Instead of listing all the required columns you can use the `(* EXCEPT(column_list))` syntax. +You can specify a list of columns to insert using the `(c1, c2, c3)` or `(COLUMNS(c1, c2, c3))` syntax. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier). For example, consider the table: @@ -23,9 +21,8 @@ For example, consider the table: SHOW CREATE insert_select_testtable; ``` -``` -┌─statement────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ CREATE TABLE insert_select_testtable +```text +CREATE TABLE insert_select_testtable ( `a` Int8, `b` String, @@ -33,8 +30,7 @@ SHOW CREATE insert_select_testtable; ) ENGINE = MergeTree() ORDER BY a -SETTINGS index_granularity = 8192 │ -└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +SETTINGS index_granularity = 8192 ``` ``` sql diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md index 690fbcf38c9..f93d726d04c 100644 --- a/docs/ru/sql-reference/statements/insert-into.md +++ b/docs/ru/sql-reference/statements/insert-into.md @@ -13,9 +13,7 @@ toc_title: INSERT INTO INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` -Вы можете указать список столбцов для вставки, используя следующий синтаксис: `(c1, c2, c3)` или `(COLUMNS(c1, c2, c3))`. - -Можно не перечислять все необходимые столбцы, а использовать синтаксис `(* EXCEPT(column_list))`. +Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)` или синтаксис `(COLUMNS(c1, c2, c3))`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или [модификаторами](../../sql-reference/statements/select/index.md#select-modifiers), такими как [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier). В качестве примера рассмотрим таблицу: From 1f10032299836e773e997033d691765d30799b3c Mon Sep 17 00:00:00 2001 From: spongedc Date: Sun, 27 Dec 2020 22:14:08 +0800 Subject: [PATCH 19/78] support EXISTS VIEW syntax --- src/Interpreters/InterpreterExistsQuery.cpp | 7 +++++++ src/Interpreters/InterpreterFactory.cpp | 4 ++++ src/Parsers/ParserTablePropertiesQuery.cpp | 20 +++++++++++++------ src/Parsers/TablePropertiesQueriesASTs.h | 10 ++++++++++ .../0_stateless/01048_exists_query.reference | 7 +++++++ .../0_stateless/01048_exists_query.sql | 14 +++++++++++++ 6 files changed, 56 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/InterpreterExistsQuery.cpp b/src/Interpreters/InterpreterExistsQuery.cpp index 94a31db2e99..aeb5c0f9bcf 100644 --- a/src/Interpreters/InterpreterExistsQuery.cpp +++ b/src/Interpreters/InterpreterExistsQuery.cpp @@ -53,6 +53,13 @@ BlockInputStreamPtr InterpreterExistsQuery::executeImpl() result = DatabaseCatalog::instance().isTableExist({database, exists_query->table}, context); } } + else if ((exists_query = query_ptr->as())) + { + String database = context.resolveDatabase(exists_query->database); + context.checkAccess(AccessType::SHOW_TABLES, database, exists_query->table); + auto tbl = DatabaseCatalog::instance().tryGetTable({database, exists_query->table}, context); + result = tbl != nullptr && tbl->isView(); + } else if ((exists_query = query_ptr->as())) { String database = context.resolveDatabase(exists_query->database); diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index 7c2e82e76c1..2f5493c3775 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -156,6 +156,10 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, Context & { return std::make_unique(query, context); } + else if (query->as()) + { + return std::make_unique(query, context); + } else if (query->as()) { return std::make_unique(query, context); diff --git a/src/Parsers/ParserTablePropertiesQuery.cpp b/src/Parsers/ParserTablePropertiesQuery.cpp index dc080b2f13b..2b123138db9 100644 --- a/src/Parsers/ParserTablePropertiesQuery.cpp +++ b/src/Parsers/ParserTablePropertiesQuery.cpp @@ -21,6 +21,7 @@ bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ParserKeyword s_create("CREATE"); ParserKeyword s_database("DATABASE"); ParserKeyword s_table("TABLE"); + ParserKeyword s_view("VIEW"); ParserKeyword s_dictionary("DICTIONARY"); ParserToken s_dot(TokenType::Dot); ParserIdentifier name_p; @@ -30,6 +31,7 @@ bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & std::shared_ptr query; bool parse_only_database_name = false; + bool exists_view = false; bool temporary = false; if (s_exists.ignore(pos, expected)) @@ -39,6 +41,11 @@ bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & query = std::make_shared(); parse_only_database_name = true; } + else if (s_view.ignore(pos, expected)) + { + query = std::make_shared(); + exists_view = true; + } else { if (s_temporary.ignore(pos, expected)) @@ -79,15 +86,16 @@ bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & } else { - if (temporary || s_temporary.ignore(pos, expected)) - query->temporary = true; - - if (!s_table.ignore(pos, expected)) - s_dictionary.ignore(pos, expected); + if (!exists_view) + { + if (temporary || s_temporary.ignore(pos, expected)) + query->temporary = true; + if (!s_table.ignore(pos, expected)) + s_dictionary.ignore(pos, expected); + } if (!name_p.parse(pos, table, expected)) return false; - if (s_dot.ignore(pos, expected)) { database = table; diff --git a/src/Parsers/TablePropertiesQueriesASTs.h b/src/Parsers/TablePropertiesQueriesASTs.h index 3e0798dbcbd..66c202433bd 100644 --- a/src/Parsers/TablePropertiesQueriesASTs.h +++ b/src/Parsers/TablePropertiesQueriesASTs.h @@ -22,6 +22,15 @@ struct ASTExistsTableQueryIDAndQueryNames static constexpr auto QueryTemporary = "EXISTS TEMPORARY TABLE"; }; +struct ASTExistsViewQueryIDAndQueryNames +{ + static constexpr auto ID = "ExistsViewQuery"; + static constexpr auto Query = "EXISTS VIEW"; + /// No temporary view are supported, just for parsing + static constexpr auto QueryTemporary = ""; +}; + + struct ASTExistsDictionaryQueryIDAndQueryNames { static constexpr auto ID = "ExistsDictionaryQuery"; @@ -61,6 +70,7 @@ struct ASTDescribeQueryExistsQueryIDAndQueryNames using ASTExistsDatabaseQuery = ASTQueryWithTableAndOutputImpl; using ASTExistsTableQuery = ASTQueryWithTableAndOutputImpl; +using ASTExistsViewQuery = ASTQueryWithTableAndOutputImpl; using ASTExistsDictionaryQuery = ASTQueryWithTableAndOutputImpl; using ASTShowCreateTableQuery = ASTQueryWithTableAndOutputImpl; using ASTShowCreateDictionaryQuery = ASTQueryWithTableAndOutputImpl; diff --git a/tests/queries/0_stateless/01048_exists_query.reference b/tests/queries/0_stateless/01048_exists_query.reference index f1db7c70e71..ede3b4cdea7 100644 --- a/tests/queries/0_stateless/01048_exists_query.reference +++ b/tests/queries/0_stateless/01048_exists_query.reference @@ -21,6 +21,13 @@ 0 0 0 +1 +0 +0 +0 +0 +0 +0 0 0 0 diff --git a/tests/queries/0_stateless/01048_exists_query.sql b/tests/queries/0_stateless/01048_exists_query.sql index fca2c233c64..239f865fa99 100644 --- a/tests/queries/0_stateless/01048_exists_query.sql +++ b/tests/queries/0_stateless/01048_exists_query.sql @@ -40,6 +40,20 @@ EXISTS db_01048.t_01048; EXISTS TABLE db_01048.t_01048; EXISTS DICTIONARY db_01048.t_01048; + +CREATE TABLE db_01048.t_01048_2 (x UInt8) ENGINE = Memory; +CREATE VIEW db_01048.v_01048 AS SELECT * FROM db_01048.t_01048_2; +EXISTS VIEW db_01048.v_01048; +EXISTS VIEW db_01048.t_01048_2; +EXISTS VIEW db_01048.v_not_exist; +DROP VIEW db_01048.v_01048; +EXISTS VIEW db_01048.v_01048; +EXISTS VIEW db_01048.t_01048_2; +EXISTS VIEW db_01048.v_not_exist; +EXISTS VIEW db_not_exists.v_not_exist; +DROP TABLE db_01048.t_01048_2; + + DROP DATABASE db_01048; EXISTS db_01048.t_01048; EXISTS TABLE db_01048.t_01048; From 7a04724b3c98374e2384e922d58115b4b1245329 Mon Sep 17 00:00:00 2001 From: feng lv Date: Mon, 4 Jan 2021 14:23:17 +0000 Subject: [PATCH 20/78] add select all syntax --- src/Parsers/ExpressionListParsers.cpp | 4 ++++ src/Parsers/ParserSelectQuery.cpp | 12 ++++++++++++ .../0_stateless/01632_select_all_syntax.reference | 7 +++++++ .../queries/0_stateless/01632_select_all_syntax.sql | 9 +++++++++ 4 files changed, 32 insertions(+) create mode 100644 tests/queries/0_stateless/01632_select_all_syntax.reference create mode 100644 tests/queries/0_stateless/01632_select_all_syntax.sql diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index f50cf71f54d..6e169e9135c 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -8,6 +8,7 @@ #include #include +#include namespace DB { @@ -86,6 +87,9 @@ bool ParserList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto parse_element = [&] { + ParserKeyword all("ALL"); + all.ignore(pos, expected); + ASTPtr element; if (!elem_parser->parse(pos, element, expected)) return false; diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index 91c48fc362d..b1c07a777c9 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -21,6 +21,7 @@ namespace ErrorCodes extern const int LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED; extern const int ROW_AND_ROWS_TOGETHER; extern const int FIRST_AND_NEXT_TOGETHER; + extern const int LOGICAL_ERROR; } @@ -30,6 +31,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) node = select_query; ParserKeyword s_select("SELECT"); + ParserKeyword s_all("ALL"); ParserKeyword s_distinct("DISTINCT"); ParserKeyword s_from("FROM"); ParserKeyword s_prewhere("PREWHERE"); @@ -93,12 +95,22 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) /// SELECT [DISTINCT] [TOP N [WITH TIES]] expr list { + bool has_all = false; if (!s_select.ignore(pos, expected)) return false; + if (s_all.ignore(pos, expected)) + has_all = true; + if (s_distinct.ignore(pos, expected)) select_query->distinct = true; + if (!has_all && s_all.ignore(pos, expected)) + has_all = true; + + if (has_all && select_query->distinct) + throw Exception("Can not use DISTINCT alongside ALL", ErrorCodes::LOGICAL_ERROR); + if (s_top.ignore(pos, expected)) { ParserNumber num; diff --git a/tests/queries/0_stateless/01632_select_all_syntax.reference b/tests/queries/0_stateless/01632_select_all_syntax.reference new file mode 100644 index 00000000000..47c2a8c9b59 --- /dev/null +++ b/tests/queries/0_stateless/01632_select_all_syntax.reference @@ -0,0 +1,7 @@ +a +a +1 +1 +2 +45 +45 diff --git a/tests/queries/0_stateless/01632_select_all_syntax.sql b/tests/queries/0_stateless/01632_select_all_syntax.sql new file mode 100644 index 00000000000..9415f5ef0c7 --- /dev/null +++ b/tests/queries/0_stateless/01632_select_all_syntax.sql @@ -0,0 +1,9 @@ +SELECT ALL 'a'; +SELECT DISTINCT 'a'; +SELECT ALL * FROM (SELECT 1 UNION ALL SELECT 1); +SELECT DISTINCT * FROM (SELECT 2 UNION ALL SELECT 2); +SELECT ALL DISTINCT 1; -- { clientError 49 } +SELECT DISTINCT ALL 1; -- { clientError 49 } + +SELECT sum(number) FROM numbers(10); +SELECT sum(ALL number) FROM numbers(10); From dd884349acc63bacfddda61fd307cb76c906785a Mon Sep 17 00:00:00 2001 From: feng lv Date: Mon, 4 Jan 2021 14:34:22 +0000 Subject: [PATCH 21/78] update document --- .../en/sql-reference/statements/select/all.md | 21 +++++++++++++++++++ .../sql-reference/statements/select/index.md | 1 + 2 files changed, 22 insertions(+) create mode 100644 docs/en/sql-reference/statements/select/all.md diff --git a/docs/en/sql-reference/statements/select/all.md b/docs/en/sql-reference/statements/select/all.md new file mode 100644 index 00000000000..5e0de4c142b --- /dev/null +++ b/docs/en/sql-reference/statements/select/all.md @@ -0,0 +1,21 @@ +--- +toc_title: ALL +--- + +# ALL Clause {#select-all} + +`SELECT ALL` is identical to `SELECT` without `DISTINCT`. + +- If `ALL` specified, ignore it. +- If both `ALL` and `DISTINCT` specified, exception will be thrown. + +`ALL` can also be specified inside aggregate function with the same effect(noop), for instance: + +```sql +SELECT sum(ALL number) FROM numbers(10); +``` +equals to + +```sql +SELECT sum(number) FROM numbers(10); +``` diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md index ed69198ed4d..0c6a3449853 100644 --- a/docs/en/sql-reference/statements/select/index.md +++ b/docs/en/sql-reference/statements/select/index.md @@ -44,6 +44,7 @@ Specifics of each optional clause are covered in separate sections, which are li - [LIMIT BY clause](../../../sql-reference/statements/select/limit-by.md) - [HAVING clause](../../../sql-reference/statements/select/having.md) - [SELECT clause](#select-clause) +- [ALL clause](../../../sql-reference/statements/select/all.md) - [DISTINCT clause](../../../sql-reference/statements/select/distinct.md) - [LIMIT clause](../../../sql-reference/statements/select/limit.md) - [UNION clause](../../../sql-reference/statements/select/union.md) From c70ab6a18a412b00a9b46e184ec191ad2a330ddb Mon Sep 17 00:00:00 2001 From: feng lv Date: Mon, 4 Jan 2021 15:05:27 +0000 Subject: [PATCH 22/78] fix fix --- src/Parsers/ExpressionElementParsers.cpp | 22 ++++++++++++++++++- src/Parsers/ExpressionListParsers.cpp | 3 --- src/Parsers/ParserSelectQuery.cpp | 4 ++-- .../01632_select_all_syntax.reference | 3 +++ .../0_stateless/01632_select_all_syntax.sql | 10 +++++++-- 5 files changed, 34 insertions(+), 8 deletions(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 7c82c4aca1e..a71dbf153cb 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -261,10 +261,12 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserIdentifier id_parser; ParserKeyword distinct("DISTINCT"); + ParserKeyword all("ALL"); ParserExpressionList contents(false); ParserSelectWithUnionQuery select; ParserKeyword over("OVER"); + bool has_all = false; bool has_distinct_modifier = false; ASTPtr identifier; @@ -279,10 +281,19 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; ++pos; + if (all.ignore(pos, expected)) + has_all = true; if (distinct.ignore(pos, expected)) has_distinct_modifier = true; - else + + if (!has_all && all.ignore(pos, expected)) + has_all = true; + + if (has_all && has_distinct_modifier) + throw Exception("Can not use DISTINCT alongside ALL", ErrorCodes::SYNTAX_ERROR); + + if (!has_distinct_modifier) { auto old_pos = pos; auto maybe_an_subquery = pos->type == TokenType::OpeningRoundBracket; @@ -356,9 +367,18 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) expr_list_params = expr_list_args; expr_list_args = nullptr; + if (all.ignore(pos, expected)) + has_all = true; + if (distinct.ignore(pos, expected)) has_distinct_modifier = true; + if (!has_all && all.ignore(pos, expected)) + has_all = true; + + if (has_all && has_distinct_modifier) + throw Exception("Can not use DISTINCT alongside ALL", ErrorCodes::SYNTAX_ERROR); + if (!contents.parse(pos, expr_list_args, expected)) return false; diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 6e169e9135c..4ee1128c2ef 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -87,9 +87,6 @@ bool ParserList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto parse_element = [&] { - ParserKeyword all("ALL"); - all.ignore(pos, expected); - ASTPtr element; if (!elem_parser->parse(pos, element, expected)) return false; diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index b1c07a777c9..c72264cc001 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -21,7 +21,7 @@ namespace ErrorCodes extern const int LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED; extern const int ROW_AND_ROWS_TOGETHER; extern const int FIRST_AND_NEXT_TOGETHER; - extern const int LOGICAL_ERROR; + extern const int SYNTAX_ERROR; } @@ -109,7 +109,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) has_all = true; if (has_all && select_query->distinct) - throw Exception("Can not use DISTINCT alongside ALL", ErrorCodes::LOGICAL_ERROR); + throw Exception("Can not use DISTINCT alongside ALL", ErrorCodes::SYNTAX_ERROR); if (s_top.ignore(pos, expected)) { diff --git a/tests/queries/0_stateless/01632_select_all_syntax.reference b/tests/queries/0_stateless/01632_select_all_syntax.reference index 47c2a8c9b59..3fad8c33238 100644 --- a/tests/queries/0_stateless/01632_select_all_syntax.reference +++ b/tests/queries/0_stateless/01632_select_all_syntax.reference @@ -5,3 +5,6 @@ a 2 45 45 +45 +2 +1 diff --git a/tests/queries/0_stateless/01632_select_all_syntax.sql b/tests/queries/0_stateless/01632_select_all_syntax.sql index 9415f5ef0c7..42fd63bc290 100644 --- a/tests/queries/0_stateless/01632_select_all_syntax.sql +++ b/tests/queries/0_stateless/01632_select_all_syntax.sql @@ -2,8 +2,14 @@ SELECT ALL 'a'; SELECT DISTINCT 'a'; SELECT ALL * FROM (SELECT 1 UNION ALL SELECT 1); SELECT DISTINCT * FROM (SELECT 2 UNION ALL SELECT 2); -SELECT ALL DISTINCT 1; -- { clientError 49 } -SELECT DISTINCT ALL 1; -- { clientError 49 } +SELECT ALL DISTINCT 1; -- { clientError 62 } +SELECT DISTINCT ALL 1; -- { clientError 62 } SELECT sum(number) FROM numbers(10); SELECT sum(ALL number) FROM numbers(10); +SELECT sum(DISTINCT number) FROM numbers(10); + +SELECT sum(ALL x) FROM (SELECT 1 x UNION ALL SELECT 1); +SELECT sum(DISTINCT x) FROM (SELECT 1 x UNION ALL SELECT 1); +SELECT sum(ALL DISTINCT x) FROM (SELECT 1 x UNION ALL SELECT 1); -- { clientError 62 } +SELECT sum(DISTINCT ALL x) FROM (SELECT 1 x UNION ALL SELECT 1); -- { clientError 62 } From 3ca5cf18a4fd041bc335924ada4d87640f9b31eb Mon Sep 17 00:00:00 2001 From: feng lv Date: Tue, 5 Jan 2021 04:54:02 +0000 Subject: [PATCH 23/78] remove throw exception fix --- src/Parsers/ExpressionElementParsers.cpp | 4 ++-- src/Parsers/ExpressionListParsers.cpp | 1 - src/Parsers/ParserSelectQuery.cpp | 3 +-- tests/queries/0_stateless/01632_select_all_syntax.sql | 4 ---- 4 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index a71dbf153cb..918674edbd6 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -291,7 +291,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) has_all = true; if (has_all && has_distinct_modifier) - throw Exception("Can not use DISTINCT alongside ALL", ErrorCodes::SYNTAX_ERROR); + return false; if (!has_distinct_modifier) { @@ -377,7 +377,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) has_all = true; if (has_all && has_distinct_modifier) - throw Exception("Can not use DISTINCT alongside ALL", ErrorCodes::SYNTAX_ERROR); + return false; if (!contents.parse(pos, expr_list_args, expected)) return false; diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 4ee1128c2ef..f50cf71f54d 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -8,7 +8,6 @@ #include #include -#include namespace DB { diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index c72264cc001..12fc1619705 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -21,7 +21,6 @@ namespace ErrorCodes extern const int LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED; extern const int ROW_AND_ROWS_TOGETHER; extern const int FIRST_AND_NEXT_TOGETHER; - extern const int SYNTAX_ERROR; } @@ -109,7 +108,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) has_all = true; if (has_all && select_query->distinct) - throw Exception("Can not use DISTINCT alongside ALL", ErrorCodes::SYNTAX_ERROR); + return false; if (s_top.ignore(pos, expected)) { diff --git a/tests/queries/0_stateless/01632_select_all_syntax.sql b/tests/queries/0_stateless/01632_select_all_syntax.sql index 42fd63bc290..03af91d9e33 100644 --- a/tests/queries/0_stateless/01632_select_all_syntax.sql +++ b/tests/queries/0_stateless/01632_select_all_syntax.sql @@ -2,8 +2,6 @@ SELECT ALL 'a'; SELECT DISTINCT 'a'; SELECT ALL * FROM (SELECT 1 UNION ALL SELECT 1); SELECT DISTINCT * FROM (SELECT 2 UNION ALL SELECT 2); -SELECT ALL DISTINCT 1; -- { clientError 62 } -SELECT DISTINCT ALL 1; -- { clientError 62 } SELECT sum(number) FROM numbers(10); SELECT sum(ALL number) FROM numbers(10); @@ -11,5 +9,3 @@ SELECT sum(DISTINCT number) FROM numbers(10); SELECT sum(ALL x) FROM (SELECT 1 x UNION ALL SELECT 1); SELECT sum(DISTINCT x) FROM (SELECT 1 x UNION ALL SELECT 1); -SELECT sum(ALL DISTINCT x) FROM (SELECT 1 x UNION ALL SELECT 1); -- { clientError 62 } -SELECT sum(DISTINCT ALL x) FROM (SELECT 1 x UNION ALL SELECT 1); -- { clientError 62 } From 5dbda5d6272723bf9c175c8f6664d34a33ee2012 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Fri, 8 Jan 2021 16:18:12 +0800 Subject: [PATCH 24/78] Update tests --- src/Interpreters/ColumnAliasesVisitor.h | 4 ++-- .../0_stateless/01576_alias_column_rewrite.reference | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Interpreters/ColumnAliasesVisitor.h b/src/Interpreters/ColumnAliasesVisitor.h index a9cf8142321..ea69cb2ab32 100644 --- a/src/Interpreters/ColumnAliasesVisitor.h +++ b/src/Interpreters/ColumnAliasesVisitor.h @@ -35,11 +35,11 @@ using DataTypePtr = std::shared_ptr; /// eg: select max(value) from test_table where day2 = today(), filters will be: ((toDate(timestamp) + 1) + 1) = today() . /// 2. Alias on alias for `required_columns` extracted in `InterpreterSelectQuery.cpp`, it could help get all dependent physical columns for query. -/// eg: select day2 from test_table. `required_columns` can got require columns from the tempory rewrited AST `((toDate(timestamp) + 1) + 1)`. +/// eg: select day2 from test_table. `required_columns` can got require columns from the temporary rewritten AST `((toDate(timestamp) + 1) + 1)`. /// 3. Help with `optimize_aggregation_in_order` and `optimize_read_in_order` in `ReadInOrderOptimizer.cpp`: /// For queries with alias columns in `orderBy` and `groupBy`, these ASTs will not change. -/// But we generate tempory asts and generate tempory Actions to get the `InputOrderInfo` +/// But we generate temporary asts and generate temporary Actions to get the `InputOrderInfo` /// eg: select day1 from test_table order by day1; diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.reference b/tests/queries/0_stateless/01576_alias_column_rewrite.reference index b162effcd48..ebc8be4f79b 100644 --- a/tests/queries/0_stateless/01576_alias_column_rewrite.reference +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.reference @@ -26,13 +26,13 @@ Expression (Projection) MergingSorted (Merge sorted streams for ORDER BY) MergeSorting (Merge sorted blocks for ORDER BY) PartialSorting (Sort each block for ORDER BY) - Expression (Before ORDER BY and SELECT + Add table aliases) + Expression (Before ORDER BY + Add table aliases) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (MergeTree) Expression (Projection) Limit (preliminary LIMIT) FinishSorting - Expression (Before ORDER BY and SELECT + Add table aliases) + Expression (Before ORDER BY + Add table aliases) SettingQuotaAndLimits (Set limits and quota after reading from storage) Union ReadFromStorage (MergeTree with order) @@ -41,19 +41,19 @@ Expression (Projection) Expression (Projection) Limit (preliminary LIMIT) FinishSorting - Expression (Before ORDER BY and SELECT) + Expression (Before ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) Union ReadFromStorage (MergeTree with order) ReadFromStorage (MergeTree with order) ReadFromStorage (MergeTree with order) optimize_aggregation_in_order -Expression (Projection + Before ORDER BY and SELECT) +Expression (Projection + Before ORDER BY) Aggregating Expression (Before GROUP BY + Add table aliases) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (MergeTree) -Expression (Projection + Before ORDER BY and SELECT) +Expression (Projection + Before ORDER BY) Aggregating Expression (Before GROUP BY + Add table aliases) SettingQuotaAndLimits (Set limits and quota after reading from storage) @@ -61,7 +61,7 @@ Expression (Projection + Before ORDER BY and SELECT) ReadFromStorage (MergeTree with order) ReadFromStorage (MergeTree with order) ReadFromStorage (MergeTree with order) -Expression (Projection + Before ORDER BY and SELECT) +Expression (Projection + Before ORDER BY) Aggregating Expression (Before GROUP BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) From 5f962015caf6e48f3ec5ea115a8f424e88741c8b Mon Sep 17 00:00:00 2001 From: feng lv Date: Sun, 10 Jan 2021 14:51:06 +0000 Subject: [PATCH 25/78] fix fix style fix build --- src/Parsers/ExpressionElementParsers.cpp | 45 +++++++++++++++---- src/Parsers/ParserSelectQuery.cpp | 2 +- .../01632_select_all_syntax.reference | 25 +++++++++++ .../0_stateless/01632_select_all_syntax.sql | 14 ++++++ 4 files changed, 77 insertions(+), 9 deletions(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index d55ce3d62b7..501d3329593 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -267,7 +267,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword over("OVER"); bool has_all = false; - bool has_distinct_modifier = false; + bool has_distinct = false; ASTPtr identifier; ASTPtr query; @@ -281,19 +281,34 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; ++pos; + auto pos_after_bracket = pos; + auto old_expected = expected; + if (all.ignore(pos, expected)) has_all = true; if (distinct.ignore(pos, expected)) - has_distinct_modifier = true; + has_distinct = true; if (!has_all && all.ignore(pos, expected)) has_all = true; - if (has_all && has_distinct_modifier) + if (has_all && has_distinct) return false; - if (!has_distinct_modifier) + if (has_all || has_distinct) + { + /// case f(ALL), f(ALL, x), f(DISTINCT), f(DISTINCT, x), ALL and DISTINCT should be treat as identifier + if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) + { + pos = pos_after_bracket; + expected = old_expected; + has_all = false; + has_distinct = false; + } + } + + if (!has_distinct && !has_all) { auto old_pos = pos; auto maybe_an_subquery = pos->type == TokenType::OpeningRoundBracket; @@ -381,24 +396,38 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ++pos; /// Parametric aggregate functions cannot have DISTINCT in parameters list. - if (has_distinct_modifier) + if (has_distinct) return false; expr_list_params = expr_list_args; expr_list_args = nullptr; + pos_after_bracket = pos; + old_expected = expected; + if (all.ignore(pos, expected)) has_all = true; if (distinct.ignore(pos, expected)) - has_distinct_modifier = true; + has_distinct = true; if (!has_all && all.ignore(pos, expected)) has_all = true; - if (has_all && has_distinct_modifier) + if (has_all && has_distinct) return false; + if (has_all || has_distinct) + { + /// case f(ALL), f(ALL, x), f(DISTINCT), f(DISTINCT, x), ALL and DISTINCT should be treat as identifier + if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) + { + pos = pos_after_bracket; + expected = old_expected; + has_distinct = false; + } + } + if (!contents.parse(pos, expr_list_args, expected)) return false; @@ -411,7 +440,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) tryGetIdentifierNameInto(identifier, function_node->name); /// func(DISTINCT ...) is equivalent to funcDistinct(...) - if (has_distinct_modifier) + if (has_distinct) function_node->name += "Distinct"; function_node->arguments = expr_list_args; diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index 12fc1619705..f515901edd4 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -92,7 +92,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } - /// SELECT [DISTINCT] [TOP N [WITH TIES]] expr list + /// SELECT [ALL/DISTINCT] [TOP N [WITH TIES]] expr list { bool has_all = false; if (!s_select.ignore(pos, expected)) diff --git a/tests/queries/0_stateless/01632_select_all_syntax.reference b/tests/queries/0_stateless/01632_select_all_syntax.reference index 3fad8c33238..c836beb205d 100644 --- a/tests/queries/0_stateless/01632_select_all_syntax.reference +++ b/tests/queries/0_stateless/01632_select_all_syntax.reference @@ -8,3 +8,28 @@ a 45 2 1 +1 +1 + +a +aa +aaa +aaaa +aaaaa +aaaaaa +aaaaaaa +aaaaaaaa +aaaaaaaaa + +a +aa +aaa +aaaa +aaaaa +aaaaaa +aaaaaaa +aaaaaaaa +aaaaaaaaa +aaaaa +aaaaa +aaaaa diff --git a/tests/queries/0_stateless/01632_select_all_syntax.sql b/tests/queries/0_stateless/01632_select_all_syntax.sql index 03af91d9e33..f5e96a5cb4e 100644 --- a/tests/queries/0_stateless/01632_select_all_syntax.sql +++ b/tests/queries/0_stateless/01632_select_all_syntax.sql @@ -9,3 +9,17 @@ SELECT sum(DISTINCT number) FROM numbers(10); SELECT sum(ALL x) FROM (SELECT 1 x UNION ALL SELECT 1); SELECT sum(DISTINCT x) FROM (SELECT 1 x UNION ALL SELECT 1); + +SELECT sum(ALL) FROM (SELECT 1 AS ALL); + +SELECT sum(DISTINCT) FROM (SELECT 1 AS DISTINCT); + +SELECT repeat('a', ALL) FROM (SELECT number AS ALL FROM numbers(10)); + +SELECT repeat('a', DISTINCT) FROM (SELECT number AS DISTINCT FROM numbers(10)); + +SELECT repeat(ALL, 5) FROM (SELECT 'a' AS ALL); + +SELECT repeat(DISTINCT, 5) FROM (SELECT 'a' AS DISTINCT); + +SELECT repeat(ALL, DISTINCT) FROM (SELECT 'a' AS ALL, 5 AS DISTINCT); From 9f22bcefb0177dd663456c4be4cb8350b82ec057 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Mon, 11 Jan 2021 18:23:21 +0100 Subject: [PATCH 26/78] Docker: better server entrypoint --- docker/server/.dockerignore | 4 +- docker/server/.gitignore | 3 +- docker/server/Dockerfile.alpine | 2 +- docker/server/alpine-build.sh | 16 +-- docker/server/entrypoint.alpine.sh | 152 ----------------------------- docker/server/entrypoint.sh | 71 +++++++++----- 6 files changed, 58 insertions(+), 190 deletions(-) delete mode 100755 docker/server/entrypoint.alpine.sh mode change 100644 => 100755 docker/server/entrypoint.sh diff --git a/docker/server/.dockerignore b/docker/server/.dockerignore index 468a8cafb00..d360712c18f 100644 --- a/docker/server/.dockerignore +++ b/docker/server/.dockerignore @@ -4,5 +4,5 @@ alpine-root/install/* # docs (looks useless) alpine-root/usr/share/doc/* -# packages, etc. (used by prepare.sh) -alpine-root/tgz-packages/* \ No newline at end of file +# packages, etc. (used by alpine-build.sh) +tgz-packages/* diff --git a/docker/server/.gitignore b/docker/server/.gitignore index 4081b5f124c..692758d55aa 100644 --- a/docker/server/.gitignore +++ b/docker/server/.gitignore @@ -1 +1,2 @@ -alpine-root/* \ No newline at end of file +alpine-root/* +tgz-packages/* diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index fc2756eac8c..2de834e7b9c 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -16,7 +16,7 @@ RUN addgroup clickhouse \ && chown root:clickhouse /var/log/clickhouse-server \ && chmod 775 /var/log/clickhouse-server \ && chmod +x /entrypoint.sh \ - && apk add --no-cache su-exec + && apk add --no-cache su-exec bash EXPOSE 9000 8123 9009 diff --git a/docker/server/alpine-build.sh b/docker/server/alpine-build.sh index c9ba03f7f35..fcad1739c64 100755 --- a/docker/server/alpine-build.sh +++ b/docker/server/alpine-build.sh @@ -4,6 +4,7 @@ set -x REPO_CHANNEL="${REPO_CHANNEL:-stable}" # lts / testing / prestable / etc REPO_URL="${REPO_URL:-"https://repo.yandex.ru/clickhouse/tgz/${REPO_CHANNEL}"}" VERSION="${VERSION:-20.9.3.45}" +DOCKER_IMAGE="${DOCKER_IMAGE:-yandex/clickhouse-server}" # where original files live DOCKER_BUILD_FOLDER="${BASH_SOURCE%/*}" @@ -11,12 +12,12 @@ DOCKER_BUILD_FOLDER="${BASH_SOURCE%/*}" # we will create root for our image here CONTAINER_ROOT_FOLDER="${DOCKER_BUILD_FOLDER}/alpine-root" -# where to put downloaded tgz -TGZ_PACKAGES_FOLDER="${CONTAINER_ROOT_FOLDER}/tgz-packages" - -# clean up the root from old runs +# clean up the root from old runs, it's reconstructed each time rm -rf "$CONTAINER_ROOT_FOLDER" +mkdir -p "$CONTAINER_ROOT_FOLDER" +# where to put downloaded tgz +TGZ_PACKAGES_FOLDER="${DOCKER_BUILD_FOLDER}/tgz-packages" mkdir -p "$TGZ_PACKAGES_FOLDER" PACKAGES=( "clickhouse-client" "clickhouse-server" "clickhouse-common-static" ) @@ -24,7 +25,7 @@ PACKAGES=( "clickhouse-client" "clickhouse-server" "clickhouse-common-static" ) # download tars from the repo for package in "${PACKAGES[@]}" do - wget -q --show-progress "${REPO_URL}/${package}-${VERSION}.tgz" -O "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz" + wget -c -q --show-progress "${REPO_URL}/${package}-${VERSION}.tgz" -O "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz" done # unpack tars @@ -42,7 +43,7 @@ mkdir -p "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/users.d" \ "${CONTAINER_ROOT_FOLDER}/lib64" cp "${DOCKER_BUILD_FOLDER}/docker_related_config.xml" "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/config.d/" -cp "${DOCKER_BUILD_FOLDER}/entrypoint.alpine.sh" "${CONTAINER_ROOT_FOLDER}/entrypoint.sh" +cp "${DOCKER_BUILD_FOLDER}/entrypoint.sh" "${CONTAINER_ROOT_FOLDER}/entrypoint.sh" ## get glibc components from ubuntu 20.04 and put them to expected place docker pull ubuntu:20.04 @@ -56,4 +57,5 @@ docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_dns.so.2 "${CONTAIN docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libresolv.so.2 "${CONTAINER_ROOT_FOLDER}/lib" docker cp -L "${ubuntu20image}":/lib64/ld-linux-x86-64.so.2 "${CONTAINER_ROOT_FOLDER}/lib64" -docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "yandex/clickhouse-server:${VERSION}-alpine" --pull +docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "${DOCKER_IMAGE}:${VERSION}-alpine" --pull +rm -rf "$CONTAINER_ROOT_FOLDER" \ No newline at end of file diff --git a/docker/server/entrypoint.alpine.sh b/docker/server/entrypoint.alpine.sh deleted file mode 100755 index f0cc62d276d..00000000000 --- a/docker/server/entrypoint.alpine.sh +++ /dev/null @@ -1,152 +0,0 @@ -#!/bin/sh -#set -x - -DO_CHOWN=1 -if [ "$CLICKHOUSE_DO_NOT_CHOWN" = 1 ]; then - DO_CHOWN=0 -fi - -CLICKHOUSE_UID="${CLICKHOUSE_UID:-"$(id -u clickhouse)"}" -CLICKHOUSE_GID="${CLICKHOUSE_GID:-"$(id -g clickhouse)"}" - -# support --user -if [ "$(id -u)" = "0" ]; then - USER=$CLICKHOUSE_UID - GROUP=$CLICKHOUSE_GID - # busybox has setuidgid & chpst buildin - gosu="su-exec $USER:$GROUP" -else - USER="$(id -u)" - GROUP="$(id -g)" - gosu="" - DO_CHOWN=0 -fi - -# set some vars -CLICKHOUSE_CONFIG="${CLICKHOUSE_CONFIG:-/etc/clickhouse-server/config.xml}" - -# port is needed to check if clickhouse-server is ready for connections -HTTP_PORT="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=http_port)" - -# get CH directories locations -DATA_DIR="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=path || true)" -TMP_DIR="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=tmp_path || true)" -USER_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=user_files_path || true)" -LOG_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=logger.log || true)" -LOG_DIR="$(dirname "${LOG_PATH}" || true)" -ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=logger.errorlog || true)" -ERROR_LOG_DIR="$(dirname "${ERROR_LOG_PATH}" || true)" -FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=format_schema_path || true)" - -CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}" -CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}" -CLICKHOUSE_DB="${CLICKHOUSE_DB:-}" - -for dir in "$DATA_DIR" \ - "$ERROR_LOG_DIR" \ - "$LOG_DIR" \ - "$TMP_DIR" \ - "$USER_PATH" \ - "$FORMAT_SCHEMA_PATH" -do - # check if variable not empty - [ -z "$dir" ] && continue - # ensure directories exist - if ! mkdir -p "$dir"; then - echo "Couldn't create necessary directory: $dir" - exit 1 - fi - - if [ "$DO_CHOWN" = "1" ]; then - # ensure proper directories permissions - chown -R "$USER:$GROUP" "$dir" - elif [ "$(stat -c %u "$dir")" != "$USER" ]; then - echo "Necessary directory '$dir' isn't owned by user with id '$USER'" - exit 1 - fi -done - -# if clickhouse user is defined - create it (user "default" already exists out of box) -if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CLICKHOUSE_PASSWORD" ]; then - echo "$0: create new user '$CLICKHOUSE_USER' instead 'default'" - cat < /etc/clickhouse-server/users.d/default-user.xml - - - - - - - - <${CLICKHOUSE_USER}> - default - - ::/0 - - ${CLICKHOUSE_PASSWORD} - default - - - -EOT -fi - -if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then - # Listen only on localhost until the initialization is done - $gosu /usr/bin/clickhouse-server --config-file="${CLICKHOUSE_CONFIG}" -- --listen_host=127.0.0.1 & - pid="$!" - - # check if clickhouse is ready to accept connections - # will try to send ping clickhouse via http_port (max 6 retries, with 1 sec timeout and 1 sec delay between retries) - tries=6 - while ! wget --spider -T 1 -q "http://localhost:$HTTP_PORT/ping" 2>/dev/null; do - if [ "$tries" -le "0" ]; then - echo >&2 'ClickHouse init process failed.' - exit 1 - fi - tries=$(( tries-1 )) - sleep 1 - done - - if [ -n "$CLICKHOUSE_PASSWORD" ]; then - printf -v WITH_PASSWORD '%s %q' "--password" "$CLICKHOUSE_PASSWORD" - fi - - clickhouseclient="clickhouse-client --multiquery -u $CLICKHOUSE_USER $WITH_PASSWORD " - - # create default database, if defined - if [ -n "$CLICKHOUSE_DB" ]; then - echo "$0: create database '$CLICKHOUSE_DB'" - "$clickhouseclient" -q "CREATE DATABASE IF NOT EXISTS $CLICKHOUSE_DB"; - fi - - for f in /docker-entrypoint-initdb.d/*; do - case "$f" in - *.sh) - if [ -x "$f" ]; then - echo "$0: running $f" - "$f" - else - echo "$0: sourcing $f" - . "$f" - fi - ;; - *.sql) echo "$0: running $f"; "$clickhouseclient" < "$f" ; echo ;; - *.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "$clickhouseclient"; echo ;; - *) echo "$0: ignoring $f" ;; - esac - echo - done - - if ! kill -s TERM "$pid" || ! wait "$pid"; then - echo >&2 'Finishing of ClickHouse init process failed.' - exit 1 - fi -fi - -# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments -if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then - exec $gosu /usr/bin/clickhouse-server --config-file="${CLICKHOUSE_CONFIG}" "$@" -fi - -# Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image -exec "$@" diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh old mode 100644 new mode 100755 index 6048fdffe38..8a4d02a6014 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -1,7 +1,10 @@ #!/bin/bash +set -eo pipefail +shopt -s nullglob + DO_CHOWN=1 -if [ "$CLICKHOUSE_DO_NOT_CHOWN" = 1 ]; then +if [ "${CLICKHOUSE_DO_NOT_CHOWN:-0}" = "1" ]; then DO_CHOWN=0 fi @@ -9,10 +12,17 @@ CLICKHOUSE_UID="${CLICKHOUSE_UID:-"$(id -u clickhouse)"}" CLICKHOUSE_GID="${CLICKHOUSE_GID:-"$(id -g clickhouse)"}" # support --user -if [ x"$UID" == x0 ]; then +if [ "$(id -u)" = "0" ]; then USER=$CLICKHOUSE_UID GROUP=$CLICKHOUSE_GID - gosu="gosu $USER:$GROUP" + if command -v gosu &> /dev/null; then + gosu="gosu $USER:$GROUP" + elif command -v su-exec &> /dev/null; then + gosu="su-exec $USER:$GROUP" + else + echo "No gosu/su-exec detected!" + exit 1 + fi else USER="$(id -u)" GROUP="$(id -g)" @@ -23,18 +33,23 @@ fi # set some vars CLICKHOUSE_CONFIG="${CLICKHOUSE_CONFIG:-/etc/clickhouse-server/config.xml}" +if ! $gosu test -f "$CLICKHOUSE_CONFIG" -a -r "$CLICKHOUSE_CONFIG"; then + echo "Configuration file '$dir' isn't readable by user with id '$USER'" + exit 1 +fi + # port is needed to check if clickhouse-server is ready for connections -HTTP_PORT="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=http_port)" +HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port)" # get CH directories locations -DATA_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=path || true)" -TMP_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=tmp_path || true)" -USER_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=user_files_path || true)" -LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.log || true)" -LOG_DIR="$(dirname $LOG_PATH || true)" -ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.errorlog || true)" -ERROR_LOG_DIR="$(dirname $ERROR_LOG_PATH || true)" -FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=format_schema_path || true)" +DATA_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=path || true)" +TMP_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=tmp_path || true)" +USER_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=user_files_path || true)" +LOG_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=logger.log || true)" +LOG_DIR="$(dirname "$LOG_PATH" || true)" +ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=logger.errorlog || true)" +ERROR_LOG_DIR="$(dirname "$ERROR_LOG_PATH" || true)" +FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=format_schema_path || true)" CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}" CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}" @@ -58,8 +73,8 @@ do if [ "$DO_CHOWN" = "1" ]; then # ensure proper directories permissions chown -R "$USER:$GROUP" "$dir" - elif [ "$(stat -c %u "$dir")" != "$USER" ]; then - echo "Necessary directory '$dir' isn't owned by user with id '$USER'" + elif ! $gosu test -d "$dir" -a -w "$dir" -a -r "$dir"; then + echo "Necessary directory '$dir' isn't accessible by user with id '$USER'" exit 1 fi done @@ -90,21 +105,22 @@ fi if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then # Listen only on localhost until the initialization is done - $gosu /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG -- --listen_host=127.0.0.1 & + $gosu /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" -- --listen_host=127.0.0.1 & pid="$!" # check if clickhouse is ready to accept connections - # will try to send ping clickhouse via http_port (max 12 retries by default, with 1 sec delay) - if ! wget --spider --quiet --prefer-family=IPv6 --tries="${CLICKHOUSE_INIT_TIMEOUT:-12}" --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then - echo >&2 'ClickHouse init process failed.' - exit 1 - fi + # will try to send ping clickhouse via http_port (max 12 retries by default, with 1 sec timeout and 1 sec delay between retries) + tries=${CLICKHOUSE_INIT_TIMEOUT:-12} + while ! wget --spider -T 1 -q "http://127.0.0.1:$HTTP_PORT/ping" 2>/dev/null; do + if [ "$tries" -le "0" ]; then + echo >&2 'ClickHouse init process failed.' + exit 1 + fi + tries=$(( tries-1 )) + sleep 1 + done - if [ ! -z "$CLICKHOUSE_PASSWORD" ]; then - printf -v WITH_PASSWORD '%s %q' "--password" "$CLICKHOUSE_PASSWORD" - fi - - clickhouseclient=( clickhouse-client --multiquery -u $CLICKHOUSE_USER $WITH_PASSWORD ) + clickhouseclient=( clickhouse-client --multiquery -u "$CLICKHOUSE_USER" --password "$CLICKHOUSE_PASSWORD" ) echo @@ -122,10 +138,11 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then "$f" else echo "$0: sourcing $f" + # shellcheck source=/dev/null . "$f" fi ;; - *.sql) echo "$0: running $f"; cat "$f" | "${clickhouseclient[@]}" ; echo ;; + *.sql) echo "$0: running $f"; "${clickhouseclient[@]}" < "$f" ; echo ;; *.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "${clickhouseclient[@]}"; echo ;; *) echo "$0: ignoring $f" ;; esac @@ -140,7 +157,7 @@ fi # if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then - exec $gosu /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG "$@" + exec $gosu /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" "$@" fi # Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image From f8dc5cc74001508b3ae6f2fd662fed3b7a8c4386 Mon Sep 17 00:00:00 2001 From: filimonov <1549571+filimonov@users.noreply.github.com> Date: Mon, 11 Jan 2021 18:35:20 +0100 Subject: [PATCH 27/78] Update alpine-build.sh --- docker/server/alpine-build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/server/alpine-build.sh b/docker/server/alpine-build.sh index fcad1739c64..0142149b5bd 100755 --- a/docker/server/alpine-build.sh +++ b/docker/server/alpine-build.sh @@ -58,4 +58,4 @@ docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libresolv.so.2 "${CONTAIN docker cp -L "${ubuntu20image}":/lib64/ld-linux-x86-64.so.2 "${CONTAINER_ROOT_FOLDER}/lib64" docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "${DOCKER_IMAGE}:${VERSION}-alpine" --pull -rm -rf "$CONTAINER_ROOT_FOLDER" \ No newline at end of file +rm -rf "$CONTAINER_ROOT_FOLDER" From 2244bc83518ddc6229dbe8677c57208fadaaa3f0 Mon Sep 17 00:00:00 2001 From: Olga Revyakina Date: Mon, 11 Jan 2021 22:09:36 +0300 Subject: [PATCH 28/78] Links removed in Russian --- docs/ru/sql-reference/statements/insert-into.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md index f93d726d04c..3cacce08685 100644 --- a/docs/ru/sql-reference/statements/insert-into.md +++ b/docs/ru/sql-reference/statements/insert-into.md @@ -13,7 +13,7 @@ toc_title: INSERT INTO INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` -Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)` или синтаксис `(COLUMNS(c1, c2, c3))`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или [модификаторами](../../sql-reference/statements/select/index.md#select-modifiers), такими как [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier). +Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)` или синтаксис `(COLUMNS(c1, c2, c3))`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как `APPLY`, `EXCEPT`, `REPLACE`. В качестве примера рассмотрим таблицу: From 9c3c1d13aba526a69cf94d8ba09e6e4d0eb8969d Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Wed, 6 Jan 2021 13:20:01 +0000 Subject: [PATCH 29/78] Add bitmapContains support for all UInt types --- src/Functions/FunctionsBitmap.h | 34 ++++++++++++------- ..._bitmapContains_with_primary_key.reference | 3 ++ .../00974_bitmapContains_with_primary_key.sql | 3 ++ 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/src/Functions/FunctionsBitmap.h b/src/Functions/FunctionsBitmap.h index 93da4906658..601a7524213 100644 --- a/src/Functions/FunctionsBitmap.h +++ b/src/Functions/FunctionsBitmap.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -14,6 +15,7 @@ #include #include + // TODO include this last because of a broken roaring header. See the comment // inside. #include @@ -724,10 +726,11 @@ public: throw Exception( "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - const auto * arg_type1 = typeid_cast *>(arguments[1].get()); - if (!(arg_type1)) + + WhichDataType which(arguments[1].get()); + if (!(which.isUInt8() || which.isUInt16() || which.isUInt32() || which.isUInt64())) throw Exception( - "Second argument for function " + getName() + " must be UInt32 but it has type " + arguments[1]->getName() + ".", + "Second argument for function " + getName() + " must be UInt but it has type " + arguments[1]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return std::make_shared>(); @@ -765,27 +768,32 @@ private: { const IColumn * column_ptrs[2]; bool is_column_const[2]; - const PaddedPODArray * container0; - const PaddedPODArray * container1; - for (size_t i = 0; i < 2; ++i) - { - column_ptrs[i] = arguments[i].column.get(); - is_column_const[i] = isColumnConst(*column_ptrs[i]); - } + const PaddedPODArray * container0; + const PaddedPODArray * container1; + + column_ptrs[0] = arguments[0].column.get(); + is_column_const[0] = isColumnConst(*column_ptrs[0]); + if (is_column_const[0]) container0 = &typeid_cast(typeid_cast(column_ptrs[0])->getDataColumnPtr().get())->getData(); else container0 = &typeid_cast(column_ptrs[0])->getData(); + + // we can always cast the second column to ColumnUInt64 + auto super_type = std::make_shared(); + column_ptrs[1] = castColumn(arguments[1], super_type).get(); + is_column_const[1] = isColumnConst(*column_ptrs[1]); + if (is_column_const[1]) - container1 = &typeid_cast(typeid_cast(column_ptrs[1])->getDataColumnPtr().get())->getData(); + container1 = &typeid_cast(typeid_cast(column_ptrs[1])->getDataColumnPtr().get())->getData(); else - container1 = &typeid_cast(column_ptrs[1])->getData(); + container1 = &typeid_cast(column_ptrs[1])->getData(); for (size_t i = 0; i < input_rows_count; ++i) { const AggregateDataPtr data_ptr_0 = is_column_const[0] ? (*container0)[0] : (*container0)[i]; - const UInt32 data1 = is_column_const[1] ? (*container1)[0] : (*container1)[i]; + const UInt64 data1 = is_column_const[1] ? (*container1)[0] : (*container1)[i]; const AggregateFunctionGroupBitmapData & bitmap_data_0 = *reinterpret_cast *>(data_ptr_0); vec_to[i] = bitmap_data_0.rbs.rb_contains(data1); diff --git a/tests/queries/0_stateless/00974_bitmapContains_with_primary_key.reference b/tests/queries/0_stateless/00974_bitmapContains_with_primary_key.reference index d00491fd7e5..98fb6a68656 100644 --- a/tests/queries/0_stateless/00974_bitmapContains_with_primary_key.reference +++ b/tests/queries/0_stateless/00974_bitmapContains_with_primary_key.reference @@ -1 +1,4 @@ 1 +1 +1 +1 diff --git a/tests/queries/0_stateless/00974_bitmapContains_with_primary_key.sql b/tests/queries/0_stateless/00974_bitmapContains_with_primary_key.sql index 81dd7cab9f4..520b4a03057 100644 --- a/tests/queries/0_stateless/00974_bitmapContains_with_primary_key.sql +++ b/tests/queries/0_stateless/00974_bitmapContains_with_primary_key.sql @@ -1,5 +1,8 @@ DROP TABLE IF EXISTS test; CREATE TABLE test (num UInt64, str String) ENGINE = MergeTree ORDER BY num; INSERT INTO test (num) VALUES (1), (2), (10), (15), (23); +SELECT count(*) FROM test WHERE bitmapContains(bitmapBuild([1, 5, 7, 9]), toUInt8(num)); +SELECT count(*) FROM test WHERE bitmapContains(bitmapBuild([1, 5, 7, 9]), toUInt16(num)); SELECT count(*) FROM test WHERE bitmapContains(bitmapBuild([1, 5, 7, 9]), toUInt32(num)); +SELECT count(*) FROM test WHERE bitmapContains(bitmapBuild([1, 5, 7, 9]), toUInt64(num)); DROP TABLE test; From 72e9484dac9cf850a948ecb77de9675b055af1f8 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Wed, 6 Jan 2021 23:41:12 +0800 Subject: [PATCH 30/78] Fix a big bug --- src/Functions/FunctionsBitmap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionsBitmap.h b/src/Functions/FunctionsBitmap.h index 601a7524213..130906ac13f 100644 --- a/src/Functions/FunctionsBitmap.h +++ b/src/Functions/FunctionsBitmap.h @@ -781,8 +781,8 @@ private: container0 = &typeid_cast(column_ptrs[0])->getData(); // we can always cast the second column to ColumnUInt64 - auto super_type = std::make_shared(); - column_ptrs[1] = castColumn(arguments[1], super_type).get(); + auto uint64_column = castColumn(arguments[1], std::make_shared()); + column_ptrs[1] = uint64_column.get(); is_column_const[1] = isColumnConst(*column_ptrs[1]); if (is_column_const[1]) From 20ce82b86d07a13235a0d04fa72c732561e69f96 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Mon, 11 Jan 2021 11:51:53 +0800 Subject: [PATCH 31/78] Better exceptions --- src/Functions/FunctionsBitmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/FunctionsBitmap.h b/src/Functions/FunctionsBitmap.h index 130906ac13f..5999886014c 100644 --- a/src/Functions/FunctionsBitmap.h +++ b/src/Functions/FunctionsBitmap.h @@ -730,7 +730,7 @@ public: WhichDataType which(arguments[1].get()); if (!(which.isUInt8() || which.isUInt16() || which.isUInt32() || which.isUInt64())) throw Exception( - "Second argument for function " + getName() + " must be UInt but it has type " + arguments[1]->getName() + ".", + "Second argument for function " + getName() + " must be one of [UInt8, UInt16, UInt32, UInt64] but it has type " + arguments[1]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return std::make_shared>(); From e6351cf9665e664a877af7d7a05c55621d347cab Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Tue, 12 Jan 2021 17:13:43 +0800 Subject: [PATCH 32/78] Add UInt64 Support for bitmapTransform/bitmapSubsetInRange/bitmapSubsetLimit --- .../AggregateFunctionGroupBitmapData.h | 8 +- src/Functions/FunctionsBitmap.h | 95 +++++++++++-------- .../0_stateless/00829_bitmap_function.sql | 48 +++++----- 3 files changed, 86 insertions(+), 65 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h index 3acaa29de7e..39ab846e61e 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h +++ b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h @@ -419,7 +419,7 @@ public: if (isSmall()) return small.find(x) != small.end(); else - return rb->contains(x); + return rb->contains(static_cast(x)); } /** @@ -613,7 +613,7 @@ public: /** * Replace value */ - void rb_replace(const UInt32 * from_vals, const UInt32 * to_vals, size_t num) + void rb_replace(const UInt64 * from_vals, const UInt64 * to_vals, size_t num) { if (isSmall()) toLarge(); @@ -622,9 +622,9 @@ public: { if (from_vals[i] == to_vals[i]) continue; - bool changed = rb->removeChecked(from_vals[i]); + bool changed = rb->removeChecked(static_cast(from_vals[i])); if (changed) - rb->add(to_vals[i]); + rb->add(static_cast(to_vals[i])); } } }; diff --git a/src/Functions/FunctionsBitmap.h b/src/Functions/FunctionsBitmap.h index 5999886014c..48d75381eeb 100644 --- a/src/Functions/FunctionsBitmap.h +++ b/src/Functions/FunctionsBitmap.h @@ -284,18 +284,16 @@ public: "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - const auto * arg_type1 = typeid_cast *>(arguments[1].get()); - if (!(arg_type1)) - throw Exception( - "Second argument for function " + getName() + " must be UInt32 but it has type " + arguments[1]->getName() + ".", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - const auto * arg_type2 = typeid_cast *>(arguments[1].get()); - if (!(arg_type2)) - throw Exception( - "Third argument for function " + getName() + " must be UInt32 but it has type " + arguments[2]->getName() + ".", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - + for (size_t i = 1; i < 3; ++i) + { + WhichDataType which(arguments[i].get()); + if (!(which.isUInt8() || which.isUInt16() || which.isUInt32() || which.isUInt64())) + { + throw Exception( + "The second or thrid argument for function " + getName() + " must be one of [UInt8, UInt16, UInt32, UInt64] but it has type " + arguments[1]->getName() + ".", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + } return arguments[0]; } @@ -329,13 +327,23 @@ private: bool is_column_const[3]; const ColumnAggregateFunction * col_agg_func; const PaddedPODArray * container0; - const PaddedPODArray * container1, * container2; + const PaddedPODArray * container1, * container2; + ColumnPtr column_holder[2]; for (size_t i = 0; i < 3; ++i) { - column_ptrs[i] = arguments[i].column.get(); + if (i > 0) + { + column_holder[i - 1] = castColumn(arguments[i], std::make_shared()); + column_ptrs[i] = column_holder[i-1].get(); + } + else + { + column_ptrs[i] = arguments[i].column.get(); + } is_column_const[i] = isColumnConst(*column_ptrs[i]); } + if (is_column_const[0]) col_agg_func = typeid_cast(typeid_cast(column_ptrs[0])->getDataColumnPtr().get()); else @@ -343,13 +351,13 @@ private: container0 = &col_agg_func->getData(); if (is_column_const[1]) - container1 = &typeid_cast(typeid_cast(column_ptrs[1])->getDataColumnPtr().get())->getData(); + container1 = &typeid_cast(typeid_cast(column_ptrs[1])->getDataColumnPtr().get())->getData(); else - container1 = &typeid_cast(column_ptrs[1])->getData(); + container1 = &typeid_cast(column_ptrs[1])->getData(); if (is_column_const[2]) - container2 = &typeid_cast(typeid_cast(column_ptrs[2])->getDataColumnPtr().get())->getData(); + container2 = &typeid_cast(typeid_cast(column_ptrs[2])->getDataColumnPtr().get())->getData(); else - container2 = &typeid_cast(column_ptrs[2])->getData(); + container2 = &typeid_cast(column_ptrs[2])->getData(); auto col_to = ColumnAggregateFunction::create(col_agg_func->getAggregateFunction()); col_to->reserve(input_rows_count); @@ -359,8 +367,8 @@ private: const AggregateDataPtr data_ptr_0 = is_column_const[0] ? (*container0)[0] : (*container0)[i]; const AggregateFunctionGroupBitmapData & bitmap_data_0 = *reinterpret_cast*>(data_ptr_0); - const UInt32 range_start = is_column_const[1] ? (*container1)[0] : (*container1)[i]; - const UInt32 range_end = is_column_const[2] ? (*container2)[0] : (*container2)[i]; + const UInt64 range_start = is_column_const[1] ? (*container1)[0] : (*container1)[i]; + const UInt64 range_end = is_column_const[2] ? (*container2)[0] : (*container2)[i]; col_to->insertDefault(); AggregateFunctionGroupBitmapData & bitmap_data_2 @@ -376,7 +384,7 @@ struct BitmapSubsetInRangeImpl public: static constexpr auto name = "bitmapSubsetInRange"; template - static void apply(const AggregateFunctionGroupBitmapData & bitmap_data_0, UInt32 range_start, UInt32 range_end, AggregateFunctionGroupBitmapData & bitmap_data_2) + static void apply(const AggregateFunctionGroupBitmapData & bitmap_data_0, UInt64 range_start, UInt64 range_end, AggregateFunctionGroupBitmapData & bitmap_data_2) { bitmap_data_0.rbs.rb_range(range_start, range_end, bitmap_data_2.rbs); } @@ -387,7 +395,7 @@ struct BitmapSubsetLimitImpl public: static constexpr auto name = "bitmapSubsetLimit"; template - static void apply(const AggregateFunctionGroupBitmapData & bitmap_data_0, UInt32 range_start, UInt32 range_end, AggregateFunctionGroupBitmapData & bitmap_data_2) + static void apply(const AggregateFunctionGroupBitmapData & bitmap_data_0, UInt64 range_start, UInt64 range_end, AggregateFunctionGroupBitmapData & bitmap_data_2) { bitmap_data_0.rbs.rb_limit(range_start, range_end, bitmap_data_2.rbs); } @@ -420,14 +428,14 @@ public: for (size_t i = 0; i < 2; ++i) { const auto * array_type = typeid_cast(arguments[i + 1].get()); - String msg(i == 0 ? "Second" : "Third"); - msg += " argument for function " + getName() + " must be an UInt32 array but it has type " + arguments[i + 1]->getName() + "."; + String msg = "The second or thrid argument for function " + getName() + " must be an one of [Array(UInt8), Array(UInt16), Array(UInt32), Array(UInt64)] but it has type " + arguments[i + 1]->getName() + "."; + if (!array_type) throw Exception(msg, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); auto nested_type = array_type->getNestedType(); WhichDataType which(nested_type); - if (!which.isUInt32()) + if (!(which.isUInt8() || which.isUInt16() || which.isUInt32() || which.isUInt64())) throw Exception(msg, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } return arguments[0]; @@ -463,13 +471,26 @@ private: bool is_column_const[3]; const ColumnAggregateFunction * col_agg_func; const PaddedPODArray * container0; - const ColumnArray * array; + const ColumnArray * array1; + const ColumnArray * array2; + + ColumnPtr column_holder[2]; for (size_t i = 0; i < 3; ++i) { - column_ptrs[i] = arguments[i].column.get(); + if (i > 0) + { + auto array_type = std::make_shared(std::make_shared()); + column_holder[i - 1] = castColumn(arguments[i], array_type); + column_ptrs[i] = column_holder[i-1].get(); + } + else + { + column_ptrs[i] = arguments[i].column.get(); + } is_column_const[i] = isColumnConst(*column_ptrs[i]); } + if (is_column_const[0]) { col_agg_func = typeid_cast(typeid_cast(column_ptrs[0])->getDataColumnPtr().get()); @@ -481,21 +502,20 @@ private: container0 = &col_agg_func->getData(); if (is_column_const[1]) - array = typeid_cast(typeid_cast(column_ptrs[1])->getDataColumnPtr().get()); + array1 = typeid_cast(typeid_cast(column_ptrs[1])->getDataColumnPtr().get()); else - { - array = typeid_cast(arguments[1].column.get()); - } - const ColumnArray::Offsets & from_offsets = array->getOffsets(); - const ColumnVector::Container & from_container = typeid_cast *>(&array->getData())->getData(); + array1 = typeid_cast(column_ptrs[1]); + + const ColumnArray::Offsets & from_offsets = array1->getOffsets(); + const ColumnVector::Container & from_container = typeid_cast *>(&array1->getData())->getData(); if (is_column_const[2]) - array = typeid_cast(typeid_cast(column_ptrs[2])->getDataColumnPtr().get()); + array2 = typeid_cast(typeid_cast(column_ptrs[2])->getDataColumnPtr().get()); else - array = typeid_cast(arguments[2].column.get()); + array2 = typeid_cast(column_ptrs[2]); - const ColumnArray::Offsets & to_offsets = array->getOffsets(); - const ColumnVector::Container & to_container = typeid_cast *>(&array->getData())->getData(); + const ColumnArray::Offsets & to_offsets = array2->getOffsets(); + const ColumnVector::Container & to_container = typeid_cast *>(&array2->getData())->getData(); auto col_to = ColumnAggregateFunction::create(col_agg_func->getAggregateFunction()); col_to->reserve(input_rows_count); @@ -528,6 +548,7 @@ private: to_start = i == 0 ? 0 : to_offsets[i - 1]; to_end = to_offsets[i]; } + if (from_end - from_start != to_end - to_start) throw Exception("From array size and to array size mismatch", ErrorCodes::LOGICAL_ERROR); diff --git a/tests/queries/0_stateless/00829_bitmap_function.sql b/tests/queries/0_stateless/00829_bitmap_function.sql index 3ed2ae5530e..b9e9664a56e 100644 --- a/tests/queries/0_stateless/00829_bitmap_function.sql +++ b/tests/queries/0_stateless/00829_bitmap_function.sql @@ -137,10 +137,10 @@ DROP TABLE IF EXISTS bitmap_column_expr_test3; CREATE TABLE bitmap_column_expr_test3 ( tag_id String, - z AggregateFunction(groupBitmap, UInt32), + z AggregateFunction(groupBitmap, UInt64), replace Nested ( - from UInt32, - to UInt32 + from UInt16, + to UInt64 ) ) ENGINE = MergeTree @@ -149,10 +149,10 @@ ORDER BY tag_id; DROP TABLE IF EXISTS numbers10; CREATE VIEW numbers10 AS SELECT number FROM system.numbers LIMIT 10; -INSERT INTO bitmap_column_expr_test3(tag_id, z, replace.from, replace.to) SELECT 'tag1', groupBitmapState(toUInt32(number)), cast([] as Array(UInt32)), cast([] as Array(UInt32)) FROM numbers10; -INSERT INTO bitmap_column_expr_test3(tag_id, z, replace.from, replace.to) SELECT 'tag2', groupBitmapState(toUInt32(number)), cast([0] as Array(UInt32)), cast([2] as Array(UInt32)) FROM numbers10; -INSERT INTO bitmap_column_expr_test3(tag_id, z, replace.from, replace.to) SELECT 'tag3', groupBitmapState(toUInt32(number)), cast([0,7] as Array(UInt32)), cast([3,101] as Array(UInt32)) FROM numbers10; -INSERT INTO bitmap_column_expr_test3(tag_id, z, replace.from, replace.to) SELECT 'tag4', groupBitmapState(toUInt32(number)), cast([5,999,2] as Array(UInt32)), cast([2,888,20] as Array(UInt32)) FROM numbers10; +INSERT INTO bitmap_column_expr_test3(tag_id, z, replace.from, replace.to) SELECT 'tag1', groupBitmapState(toUInt64(number)), cast([] as Array(UInt16)), cast([] as Array(UInt64)) FROM numbers10; +INSERT INTO bitmap_column_expr_test3(tag_id, z, replace.from, replace.to) SELECT 'tag2', groupBitmapState(toUInt64(number)), cast([0] as Array(UInt16)), cast([2] as Array(UInt64)) FROM numbers10; +INSERT INTO bitmap_column_expr_test3(tag_id, z, replace.from, replace.to) SELECT 'tag3', groupBitmapState(toUInt64(number)), cast([0,7] as Array(UInt16)), cast([3,101] as Array(UInt64)) FROM numbers10; +INSERT INTO bitmap_column_expr_test3(tag_id, z, replace.from, replace.to) SELECT 'tag4', groupBitmapState(toUInt64(number)), cast([5,999,2] as Array(UInt16)), cast([2,888,20] as Array(UInt64)) FROM numbers10; SELECT tag_id, bitmapToArray(z), replace.from, replace.to, bitmapToArray(bitmapTransform(z, replace.from, replace.to)) FROM bitmap_column_expr_test3 ORDER BY tag_id; @@ -232,11 +232,11 @@ select bitmapHasAll(bitmapBuild([ -- bitmapContains: ---- Empty -SELECT bitmapContains(bitmapBuild(emptyArrayUInt32()), toUInt32(0)); -SELECT bitmapContains(bitmapBuild(emptyArrayUInt16()), toUInt32(5)); +SELECT bitmapContains(bitmapBuild(emptyArrayUInt32()), toUInt8(0)); +SELECT bitmapContains(bitmapBuild(emptyArrayUInt16()), toUInt16(5)); ---- Small select bitmapContains(bitmapBuild([1,5,7,9]),toUInt32(0)); -select bitmapContains(bitmapBuild([1,5,7,9]),toUInt32(9)); +select bitmapContains(bitmapBuild([1,5,7,9]),toUInt64(9)); ---- Large select bitmapContains(bitmapBuild([ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33, @@ -250,31 +250,31 @@ select bitmapContains(bitmapBuild([ -- bitmapSubsetInRange: ---- Empty -SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild(emptyArrayUInt32()), toUInt32(0), toUInt32(10))); -SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild(emptyArrayUInt16()), toUInt32(0), toUInt32(10))); +SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild(emptyArrayUInt32()), toUInt64(0), toUInt32(10))); +SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild(emptyArrayUInt16()), toUInt32(0), toUInt64(10))); ---- Small -select bitmapToArray(bitmapSubsetInRange(bitmapBuild([1,5,7,9]), toUInt32(0), toUInt32(4))); -select bitmapToArray(bitmapSubsetInRange(bitmapBuild([1,5,7,9]), toUInt32(10), toUInt32(10))); -select bitmapToArray(bitmapSubsetInRange(bitmapBuild([1,5,7,9]), toUInt32(3), toUInt32(7))); +select bitmapToArray(bitmapSubsetInRange(bitmapBuild([1,5,7,9]), toUInt8(0), toUInt16(4))); +select bitmapToArray(bitmapSubsetInRange(bitmapBuild([1,5,7,9]), toUInt32(10), toUInt64(10))); +select bitmapToArray(bitmapSubsetInRange(bitmapBuild([1,5,7,9]), toUInt64(3), toUInt32(7))); ---- Large select bitmapToArray(bitmapSubsetInRange(bitmapBuild([ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33, - 100,200,500]), toUInt32(0), toUInt32(100))); + 100,200,500]), toUInt8(0), toUInt32(100))); select bitmapToArray(bitmapSubsetInRange(bitmapBuild([ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33, - 100,200,500]), toUInt32(30), toUInt32(200))); + 100,200,500]), toUInt64(30), toUInt32(200))); select bitmapToArray(bitmapSubsetInRange(bitmapBuild([ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33, - 100,200,500]), toUInt32(100), toUInt32(200))); + 100,200,500]), toUInt32(100), toUInt64(200))); -- bitmapSubsetLimit: ---- Empty -SELECT bitmapToArray(bitmapSubsetLimit(bitmapBuild(emptyArrayUInt32()), toUInt32(0), toUInt32(10))); -SELECT bitmapToArray(bitmapSubsetLimit(bitmapBuild(emptyArrayUInt16()), toUInt32(0), toUInt32(10))); +SELECT bitmapToArray(bitmapSubsetLimit(bitmapBuild(emptyArrayUInt32()), toUInt8(0), toUInt32(10))); +SELECT bitmapToArray(bitmapSubsetLimit(bitmapBuild(emptyArrayUInt16()), toUInt32(0), toUInt64(10))); ---- Small -select bitmapToArray(bitmapSubsetLimit(bitmapBuild([1,5,7,9]), toUInt32(0), toUInt32(4))); -select bitmapToArray(bitmapSubsetLimit(bitmapBuild([1,5,7,9]), toUInt32(10), toUInt32(10))); -select bitmapToArray(bitmapSubsetLimit(bitmapBuild([1,5,7,9]), toUInt32(3), toUInt32(7))); +select bitmapToArray(bitmapSubsetLimit(bitmapBuild([1,5,7,9]), toUInt8(0), toUInt32(4))); +select bitmapToArray(bitmapSubsetLimit(bitmapBuild([1,5,7,9]), toUInt32(10), toUInt64(10))); +select bitmapToArray(bitmapSubsetLimit(bitmapBuild([1,5,7,9]), toUInt16(3), toUInt32(7))); ---- Large select bitmapToArray(bitmapSubsetLimit(bitmapBuild([ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33, @@ -284,7 +284,7 @@ select bitmapToArray(bitmapSubsetLimit(bitmapBuild([ 100,200,500]), toUInt32(30), toUInt32(200))); select bitmapToArray(bitmapSubsetLimit(bitmapBuild([ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33, - 100,200,500]), toUInt32(100), toUInt32(200))); + 100,200,500]), toUInt32(100), toUInt16(200))); -- bitmapMin: ---- Empty From 7cfbc90fa525c4c974d2af1369dd991a48461700 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Tue, 12 Jan 2021 10:00:03 +0000 Subject: [PATCH 33/78] Fix typo --- src/Functions/FunctionsBitmap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionsBitmap.h b/src/Functions/FunctionsBitmap.h index 48d75381eeb..cd5e23acbf3 100644 --- a/src/Functions/FunctionsBitmap.h +++ b/src/Functions/FunctionsBitmap.h @@ -290,7 +290,7 @@ public: if (!(which.isUInt8() || which.isUInt16() || which.isUInt32() || which.isUInt64())) { throw Exception( - "The second or thrid argument for function " + getName() + " must be one of [UInt8, UInt16, UInt32, UInt64] but it has type " + arguments[1]->getName() + ".", + "The second or third argument for function " + getName() + " must be one of [UInt8, UInt16, UInt32, UInt64] but it has type " + arguments[1]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } } @@ -428,7 +428,7 @@ public: for (size_t i = 0; i < 2; ++i) { const auto * array_type = typeid_cast(arguments[i + 1].get()); - String msg = "The second or thrid argument for function " + getName() + " must be an one of [Array(UInt8), Array(UInt16), Array(UInt32), Array(UInt64)] but it has type " + arguments[i + 1]->getName() + "."; + String msg = "The second or third argument for function " + getName() + " must be an one of [Array(UInt8), Array(UInt16), Array(UInt32), Array(UInt64)] but it has type " + arguments[i + 1]->getName() + "."; if (!array_type) throw Exception(msg, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); From aa6e827d8b2882553f06fe6aadc21205d40b9ab5 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 12 Jan 2021 16:54:11 +0300 Subject: [PATCH 34/78] Update TreeRewriter.cpp --- src/Interpreters/TreeRewriter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 17b182c4594..554fedeed64 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -802,7 +802,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( result.analyzed_join->table_join); collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases); - /// rewrite filters for select query, must goes after getArrayJoinedColumns + /// rewrite filters for select query, must go after getArrayJoinedColumns if (settings.optimize_respect_aliases && result.metadata_snapshot) { replaceAliasColumnsInQuery(query, result.metadata_snapshot->getColumns(), result.getArrayJoinSourceNameSet(), context); From ff6c3c75c2f4577f54dda7c47f6ad7db278e1784 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 12 Jan 2021 17:34:50 +0300 Subject: [PATCH 35/78] add protection from unsafe allocations --- base/daemon/BaseDaemon.cpp | 3 +++ src/Common/MemoryTracker.cpp | 12 ++++++++++++ src/Common/MemoryTracker.h | 18 ++++++++++++++++++ src/Common/QueryProfiler.cpp | 2 ++ src/Common/ThreadFuzzer.cpp | 1 + src/Common/ThreadPool.cpp | 4 +++- .../System/StorageSystemStackTrace.cpp | 1 + 7 files changed, 40 insertions(+), 1 deletion(-) diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index 830e7857a1f..4cf8a8d7ce9 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -112,11 +112,13 @@ static void writeSignalIDtoSignalPipe(int sig) /** Signal handler for HUP / USR1 */ static void closeLogsSignalHandler(int sig, siginfo_t *, void *) { + DENY_ALLOCATIONS_IN_SCOPE; writeSignalIDtoSignalPipe(sig); } static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *) { + DENY_ALLOCATIONS_IN_SCOPE; writeSignalIDtoSignalPipe(sig); } @@ -125,6 +127,7 @@ static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *) */ static void signalHandler(int sig, siginfo_t * info, void * context) { + DENY_ALLOCATIONS_IN_SCOPE; auto saved_errno = errno; /// We must restore previous value of errno in signal handler. char buf[signal_pipe_buf_size]; diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 4b0e1f9cada..e62f15d4fd1 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -12,6 +12,10 @@ #include #include +#ifdef MEMORY_TRACKER_DEBUG_CHECKS +thread_local bool _memory_tracker_always_throw_logical_error_on_allocation = false; +#endif + namespace { @@ -165,6 +169,14 @@ void MemoryTracker::alloc(Int64 size) } } +#ifdef MEMORY_TRACKER_DEBUG_CHECKS + if (unlikely(_memory_tracker_always_throw_logical_error_on_allocation)) + { + _memory_tracker_always_throw_logical_error_on_allocation = false; + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Memory tracker: allocations not allowed."); + } +#endif + std::bernoulli_distribution fault(fault_probability); if (unlikely(fault_probability && fault(thread_local_rng)) && memoryTrackerCanThrow(level, true)) { diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h index bfe03d19a27..b67f9e368e2 100644 --- a/src/Common/MemoryTracker.h +++ b/src/Common/MemoryTracker.h @@ -5,6 +5,24 @@ #include #include +#if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER) || defined(UNDEFINED_BEHAVIOR_SANITIZER) +#define MEMORY_TRACKER_DEBUG_CHECKS +#endif + +#ifdef MEMORY_TRACKER_DEBUG_CHECKS +#include +extern thread_local bool _memory_tracker_always_throw_logical_error_on_allocation; +#define ALLOCATIONS_IN_SCOPE_IMPL_CONCAT(n, val) \ + bool _allocations_flag_prev_val##n = _memory_tracker_always_throw_logical_error_on_allocation; \ + _memory_tracker_always_throw_logical_error_on_allocation = val; \ + SCOPE_EXIT({ _memory_tracker_always_throw_logical_error_on_allocation = _allocations_flag_prev_val##n; }) +#define ALLOCATIONS_IN_SCOPE_IMPL(n, val) ALLOCATIONS_IN_SCOPE_IMPL_CONCAT(n, val) +#define DENY_ALLOCATIONS_IN_SCOPE ALLOCATIONS_IN_SCOPE_IMPL(__LINE__, true) +#define ALLOW_ALLOCATIONS_IN_SCOPE ALLOCATIONS_IN_SCOPE_IMPL(__LINE__, false) +#else +#define DENY_ALLOCATIONS_IN_SCOPE static_assert(true) +#define ALLOW_ALLOCATIONS_IN_SCOPE static_assert(true) +#endif /** Tracks memory consumption. * It throws an exception if amount of consumed memory become greater than certain limit. diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp index 504d884dce0..bd1cab42be3 100644 --- a/src/Common/QueryProfiler.cpp +++ b/src/Common/QueryProfiler.cpp @@ -181,6 +181,7 @@ QueryProfilerReal::QueryProfilerReal(const UInt64 thread_id, const UInt32 period void QueryProfilerReal::signalHandler(int sig, siginfo_t * info, void * context) { + DENY_ALLOCATIONS_IN_SCOPE; writeTraceInfo(TraceType::Real, sig, info, context); } @@ -190,6 +191,7 @@ QueryProfilerCpu::QueryProfilerCpu(const UInt64 thread_id, const UInt32 period) void QueryProfilerCpu::signalHandler(int sig, siginfo_t * info, void * context) { + DENY_ALLOCATIONS_IN_SCOPE; writeTraceInfo(TraceType::CPU, sig, info, context); } diff --git a/src/Common/ThreadFuzzer.cpp b/src/Common/ThreadFuzzer.cpp index 88ff53534e6..a538ba7a49a 100644 --- a/src/Common/ThreadFuzzer.cpp +++ b/src/Common/ThreadFuzzer.cpp @@ -197,6 +197,7 @@ static void injection( void ThreadFuzzer::signalHandler(int) { + DENY_ALLOCATIONS_IN_SCOPE; auto saved_errno = errno; auto & fuzzer = ThreadFuzzer::instance(); diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index 7fc0d65aa5b..4b6834bd235 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -208,6 +208,7 @@ size_t ThreadPoolImpl::active() const template void ThreadPoolImpl::worker(typename std::list::iterator thread_it) { + DENY_ALLOCATIONS_IN_SCOPE; CurrentMetrics::Increment metric_all_threads( std::is_same_v ? CurrentMetrics::GlobalThread : CurrentMetrics::LocalThread); @@ -223,7 +224,7 @@ void ThreadPoolImpl::worker(typename std::list::iterator thread_ if (!jobs.empty()) { - job = std::move(jobs.top().job); + job = std::move(const_cast(jobs.top().job)); jobs.pop(); } else @@ -237,6 +238,7 @@ void ThreadPoolImpl::worker(typename std::list::iterator thread_ { try { + ALLOW_ALLOCATIONS_IN_SCOPE; CurrentMetrics::Increment metric_active_threads( std::is_same_v ? CurrentMetrics::GlobalThreadActive : CurrentMetrics::LocalThreadActive); diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp index 0b5e82a1f3d..abb2fdf54ed 100644 --- a/src/Storages/System/StorageSystemStackTrace.cpp +++ b/src/Storages/System/StorageSystemStackTrace.cpp @@ -60,6 +60,7 @@ namespace void signalHandler(int, siginfo_t * info, void * context) { + DENY_ALLOCATIONS_IN_SCOPE; auto saved_errno = errno; /// We must restore previous value of errno in signal handler. /// In case malicious user is sending signals manually (for unknown reason). From c51df126f123bfec3e97926b6b082d8f86e855cc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Jan 2021 19:46:18 +0300 Subject: [PATCH 36/78] Disable "optimize_move_functions_out_of_any" by default #18973 --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 0c352544523..d3575b1a307 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -372,7 +372,7 @@ class IColumn; M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \ M(Bool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \ M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \ - M(Bool, optimize_move_functions_out_of_any, true, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \ + M(Bool, optimize_move_functions_out_of_any, false, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \ M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \ M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \ M(Bool, optimize_duplicate_order_by_and_distinct, true, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \ From a5e14e94422961cf87f3f2c447f2996809bc8f65 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Jan 2021 22:53:58 +0300 Subject: [PATCH 37/78] Add a test --- tests/queries/0_stateless/01650_any_null_if.reference | 1 + tests/queries/0_stateless/01650_any_null_if.sql | 1 + 2 files changed, 2 insertions(+) create mode 100644 tests/queries/0_stateless/01650_any_null_if.reference create mode 100644 tests/queries/0_stateless/01650_any_null_if.sql diff --git a/tests/queries/0_stateless/01650_any_null_if.reference b/tests/queries/0_stateless/01650_any_null_if.reference new file mode 100644 index 00000000000..e965047ad7c --- /dev/null +++ b/tests/queries/0_stateless/01650_any_null_if.reference @@ -0,0 +1 @@ +Hello diff --git a/tests/queries/0_stateless/01650_any_null_if.sql b/tests/queries/0_stateless/01650_any_null_if.sql new file mode 100644 index 00000000000..487a42cf578 --- /dev/null +++ b/tests/queries/0_stateless/01650_any_null_if.sql @@ -0,0 +1 @@ +SELECT any(nullIf(s, '')) FROM (SELECT arrayJoin(['', 'Hello']) AS s); From a6296912f355f3be682fb885b2626ccbe22ce97e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Jan 2021 22:58:54 +0300 Subject: [PATCH 38/78] Fix incomplete code --- src/Interpreters/RewriteAnyFunctionVisitor.cpp | 3 +++ tests/queries/0_stateless/01650_any_null_if.sql | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/src/Interpreters/RewriteAnyFunctionVisitor.cpp b/src/Interpreters/RewriteAnyFunctionVisitor.cpp index e8f05962862..f7c128a5749 100644 --- a/src/Interpreters/RewriteAnyFunctionVisitor.cpp +++ b/src/Interpreters/RewriteAnyFunctionVisitor.cpp @@ -78,6 +78,9 @@ void RewriteAnyFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast, Da auto & func_arguments = func.arguments->children; + if (func_arguments.size() != 1) + return; + const auto * first_arg_func = func_arguments[0]->as(); if (!first_arg_func || first_arg_func->arguments->children.empty()) return; diff --git a/tests/queries/0_stateless/01650_any_null_if.sql b/tests/queries/0_stateless/01650_any_null_if.sql index 487a42cf578..17f57e92032 100644 --- a/tests/queries/0_stateless/01650_any_null_if.sql +++ b/tests/queries/0_stateless/01650_any_null_if.sql @@ -1 +1,6 @@ SELECT any(nullIf(s, '')) FROM (SELECT arrayJoin(['', 'Hello']) AS s); + +SET optimize_move_functions_out_of_any = 0; +EXPLAIN SYNTAX select any(nullIf('', ''), 'some text'); -- { serverError 42 } +SET optimize_move_functions_out_of_any = 1; +EXPLAIN SYNTAX select any(nullIf('', ''), 'some text'); -- { serverError 42 } From 781a1110bce705638a933bb8aafc4ee38fc448a4 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 12 Jan 2021 23:24:18 +0300 Subject: [PATCH 39/78] Update ColumnAliasesVisitor.h --- src/Interpreters/ColumnAliasesVisitor.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/ColumnAliasesVisitor.h b/src/Interpreters/ColumnAliasesVisitor.h index ea69cb2ab32..2cd9ad796cd 100644 --- a/src/Interpreters/ColumnAliasesVisitor.h +++ b/src/Interpreters/ColumnAliasesVisitor.h @@ -32,10 +32,10 @@ using DataTypePtr = std::shared_ptr; /// 1. Rewrite the filters in query when enable optimize_respect_aliases /// this could help with `optimize_trivial_count`, Partition Prune in `KeyCondition` and secondary indexes. -/// eg: select max(value) from test_table where day2 = today(), filters will be: ((toDate(timestamp) + 1) + 1) = today() . +/// eg: select max(value) from test_table where day2 = today(), filters will be: ((toDate(timestamp) + 1) + 1) = today() . /// 2. Alias on alias for `required_columns` extracted in `InterpreterSelectQuery.cpp`, it could help get all dependent physical columns for query. -/// eg: select day2 from test_table. `required_columns` can got require columns from the temporary rewritten AST `((toDate(timestamp) + 1) + 1)`. +/// eg: select day2 from test_table. `required_columns` can got require columns from the temporary rewritten AST `((toDate(timestamp) + 1) + 1)`. /// 3. Help with `optimize_aggregation_in_order` and `optimize_read_in_order` in `ReadInOrderOptimizer.cpp`: /// For queries with alias columns in `orderBy` and `groupBy`, these ASTs will not change. From e1afae449283b96fc99bd52c826fa278de0276cb Mon Sep 17 00:00:00 2001 From: Nikita Mikhailov Date: Wed, 13 Jan 2021 00:49:11 +0300 Subject: [PATCH 40/78] just test --- src/Interpreters/TableJoin.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 5db914bc457..6622ea2dcd6 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -234,8 +234,8 @@ void TableJoin::addJoinedColumnsAndCorrectNullability(ColumnsWithTypeAndName & c /// Materialize column. /// Column is not empty if it is constant, but after Join all constants will be materialized. /// So, we need remove constants from header. - if (col.column) - col.column = nullptr; + // if (col.column) + // col.column = nullptr; if (leftBecomeNullable(col.type)) col.type = makeNullable(col.type); From 7516f1460faf53f9bdf3c789adcf27735652fa3a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 23 Nov 2020 12:08:32 +0300 Subject: [PATCH 41/78] Add a test for ANY JOIN and PREWHERE (cherry picked from commit 92c5cfe2439d2b13342a5eabdaf7b25d3ace6038) --- .../01582_any_join_supertype.reference | 3 +++ .../0_stateless/01582_any_join_supertype.sql | 25 +++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 tests/queries/0_stateless/01582_any_join_supertype.reference create mode 100644 tests/queries/0_stateless/01582_any_join_supertype.sql diff --git a/tests/queries/0_stateless/01582_any_join_supertype.reference b/tests/queries/0_stateless/01582_any_join_supertype.reference new file mode 100644 index 00000000000..b4f45c4e403 --- /dev/null +++ b/tests/queries/0_stateless/01582_any_join_supertype.reference @@ -0,0 +1,3 @@ +1 +2020-01-01 10:00:00 +2020-01-01 10:00:00 diff --git a/tests/queries/0_stateless/01582_any_join_supertype.sql b/tests/queries/0_stateless/01582_any_join_supertype.sql new file mode 100644 index 00000000000..67b7d3ad087 --- /dev/null +++ b/tests/queries/0_stateless/01582_any_join_supertype.sql @@ -0,0 +1,25 @@ +DROP TABLE IF EXISTS foo; +DROP TABLE IF EXISTS bar; + +CREATE TABLE foo (server_date Date, server_time Datetime('Europe/Moscow'), dimension_1 String) ENGINE = MergeTree() PARTITION BY toYYYYMM(server_date) ORDER BY (server_date); +CREATE TABLE bar (server_date Date, dimension_1 String) ENGINE = MergeTree() PARTITION BY toYYYYMM(server_date) ORDER BY (server_date); + +INSERT INTO foo VALUES ('2020-01-01', '2020-01-01 12:00:00', 'test1'), ('2020-01-01', '2020-01-01 13:00:00', 'test2'); +INSERT INTO bar VALUES ('2020-01-01', 'test2'), ('2020-01-01', 'test3'); + +SET optimize_move_to_prewhere = 1; + +SELECT count() +FROM foo ANY INNER JOIN bar USING (dimension_1) +WHERE (foo.server_date <= '2020-11-07') AND (toDate(foo.server_time, 'Asia/Yekaterinburg') <= '2020-11-07'); + +SELECT toDateTime(foo.server_time, 'UTC') +FROM foo +ANY INNER JOIN bar USING (dimension_1) +WHERE toDate(foo.server_time, 'UTC') <= toDate('2020-04-30'); + +SELECT toDateTime(foo.server_time, 'UTC') FROM foo +SEMI JOIN bar USING (dimension_1) WHERE toDate(foo.server_time, 'UTC') <= toDate('2020-04-30'); + +DROP TABLE foo; +DROP TABLE bar; From d58a8c98aa1528da30a081cedfc5af9585198dec Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 23 Nov 2020 12:10:38 +0300 Subject: [PATCH 42/78] More tests (cherry picked from commit 235541a532baf374f1745c3ae732ff865b4c8850) --- .../0_stateless/01582_any_join_supertype.reference | 2 ++ .../queries/0_stateless/01582_any_join_supertype.sql | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/tests/queries/0_stateless/01582_any_join_supertype.reference b/tests/queries/0_stateless/01582_any_join_supertype.reference index b4f45c4e403..a45cb12d3af 100644 --- a/tests/queries/0_stateless/01582_any_join_supertype.reference +++ b/tests/queries/0_stateless/01582_any_join_supertype.reference @@ -1,3 +1,5 @@ 1 2020-01-01 10:00:00 2020-01-01 10:00:00 +1 +2020-01-01 10:00:00 diff --git a/tests/queries/0_stateless/01582_any_join_supertype.sql b/tests/queries/0_stateless/01582_any_join_supertype.sql index 67b7d3ad087..6b06d78c83c 100644 --- a/tests/queries/0_stateless/01582_any_join_supertype.sql +++ b/tests/queries/0_stateless/01582_any_join_supertype.sql @@ -8,6 +8,7 @@ INSERT INTO foo VALUES ('2020-01-01', '2020-01-01 12:00:00', 'test1'), ('2020-01 INSERT INTO bar VALUES ('2020-01-01', 'test2'), ('2020-01-01', 'test3'); SET optimize_move_to_prewhere = 1; +SET any_join_distinct_right_table_keys = 0; SELECT count() FROM foo ANY INNER JOIN bar USING (dimension_1) @@ -21,5 +22,16 @@ WHERE toDate(foo.server_time, 'UTC') <= toDate('2020-04-30'); SELECT toDateTime(foo.server_time, 'UTC') FROM foo SEMI JOIN bar USING (dimension_1) WHERE toDate(foo.server_time, 'UTC') <= toDate('2020-04-30'); +SET any_join_distinct_right_table_keys = 1; + +SELECT count() +FROM foo ANY INNER JOIN bar USING (dimension_1) +WHERE (foo.server_date <= '2020-11-07') AND (toDate(foo.server_time, 'Asia/Yekaterinburg') <= '2020-11-07'); + +SELECT toDateTime(foo.server_time, 'UTC') +FROM foo +ANY INNER JOIN bar USING (dimension_1) +WHERE toDate(foo.server_time, 'UTC') <= toDate('2020-04-30'); + DROP TABLE foo; DROP TABLE bar; From d5181b984a5eabcf1ce0fec6454bfb0e0b2bd396 Mon Sep 17 00:00:00 2001 From: feng lv Date: Wed, 13 Jan 2021 08:38:19 +0000 Subject: [PATCH 43/78] fix some wrong words in comment --- src/Interpreters/ActionsDAG.h | 2 +- src/Interpreters/ExpressionActions.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index aad6d335eec..6a26927374e 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -80,7 +80,7 @@ public: }; /// Index is used to: - /// * find Node buy it's result_name + /// * find Node by it's result_name /// * specify order of columns in result /// It represents a set of available columns. /// Removing of column from index is equivalent to removing of column from final result. diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index cbd92eb57e3..1b5c48fd43e 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -62,7 +62,7 @@ public: using Actions = std::vector; - /// This map helps to find input position bu it's name. + /// This map helps to find input position by it's name. /// Key is a view to input::result_name. /// Result is a list because it is allowed for inputs to have same names. using NameToInputMap = std::unordered_map>; From 73e96250a802b18e9c2339945369bb5239c3d154 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 13 Jan 2021 15:19:41 +0300 Subject: [PATCH 44/78] add comments --- src/Common/MemoryTracker.h | 6 +++++- src/Common/ThreadPool.cpp | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h index b67f9e368e2..9a2c3a399ea 100644 --- a/src/Common/MemoryTracker.h +++ b/src/Common/MemoryTracker.h @@ -5,10 +5,14 @@ #include #include -#if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER) || defined(UNDEFINED_BEHAVIOR_SANITIZER) +#if !defined(NDEBUG) #define MEMORY_TRACKER_DEBUG_CHECKS #endif +/// DENY_ALLOCATIONS_IN_SCOPE macro makes MemoryTracker throw LOGICAL_ERROR on any allocation attempt +/// until the end of the scope. It's useful to ensure that no allocations happen in signal handlers and +/// outside of try/catch block of thread functions. ALLOW_ALLOCATIONS_IN_SCOPE cancels effect of +/// DENY_ALLOCATIONS_IN_SCOPE in the inner scope. In Release builds these macros do nothing. #ifdef MEMORY_TRACKER_DEBUG_CHECKS #include extern thread_local bool _memory_tracker_always_throw_logical_error_on_allocation; diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index 4b6834bd235..92d02ae0f3e 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -224,6 +224,8 @@ void ThreadPoolImpl::worker(typename std::list::iterator thread_ if (!jobs.empty()) { + /// std::priority_queue does not provide interface for getting non-const reference to an element + /// to prevent us from modifying its priority. We have to use const_cast to force move semantics on JobWithPriority::job. job = std::move(const_cast(jobs.top().job)); jobs.pop(); } From 12f3b22623abe434f6a6e47347e5d9eba71df569 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 13 Jan 2021 15:20:10 +0300 Subject: [PATCH 45/78] Do not skip streams after serializeBinaryBulkStatePrefix. --- src/Storages/StorageTinyLog.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index fe8a25ba13b..f5e60f06fb0 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -294,11 +294,15 @@ IDataType::OutputStreamGetter TinyLogBlockOutputStream::createStreamGetter( void TinyLogBlockOutputStream::writeData(const String & name, const IDataType & type, const IColumn & column, WrittenStreams & written_streams) { IDataType::SerializeBinaryBulkSettings settings; - settings.getter = createStreamGetter(name, written_streams); if (serialize_states.count(name) == 0) + { + WrittenStreams prefix_written_streams; + settings.getter = createStreamGetter(name, prefix_written_streams); type.serializeBinaryBulkStatePrefix(settings, serialize_states[name]); + } + settings.getter = createStreamGetter(name, written_streams); type.serializeBinaryBulkWithMultipleStreams(column, 0, 0, settings, serialize_states[name]); } From 35a3bdc1a6c15b8f329c737998f357de002af729 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 13 Jan 2021 15:42:30 +0300 Subject: [PATCH 46/78] Added test. --- .../01651_lc_insert_tiny_log.reference | 4 ++++ .../0_stateless/01651_lc_insert_tiny_log.sql | 15 +++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 tests/queries/0_stateless/01651_lc_insert_tiny_log.reference create mode 100644 tests/queries/0_stateless/01651_lc_insert_tiny_log.sql diff --git a/tests/queries/0_stateless/01651_lc_insert_tiny_log.reference b/tests/queries/0_stateless/01651_lc_insert_tiny_log.reference new file mode 100644 index 00000000000..c4932303260 --- /dev/null +++ b/tests/queries/0_stateless/01651_lc_insert_tiny_log.reference @@ -0,0 +1,4 @@ +10000000 +10000000 1274991808 +20000000 +20000000 2549983616 diff --git a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql new file mode 100644 index 00000000000..45c59163371 --- /dev/null +++ b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql @@ -0,0 +1,15 @@ +drop table if exists perf_lc_num; + +CREATE TABLE perf_lc_num(  num UInt8,  arr Array(LowCardinality(Int64)) default [num]  ) ENGINE = TinyLog; + +INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); + +select sum(length(arr)) from perf_lc_num; +select sum(length(arr)), sum(num) from perf_lc_num; + +INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); + +select sum(length(arr)) from perf_lc_num; +select sum(length(arr)), sum(num) from perf_lc_num; + +drop table if exists perf_lc_num; From c84a8c4b165d2967994ba1b2a1945a66fb9f5754 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 13 Jan 2021 15:47:34 +0300 Subject: [PATCH 47/78] Fix manipulators for common string types --- src/IO/Operators.h | 6 ++- src/IO/WriteHelpers.h | 29 ++++++++++++- src/IO/tests/gtest_manip.cpp | 82 ++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 2 deletions(-) create mode 100644 src/IO/tests/gtest_manip.cpp diff --git a/src/IO/Operators.h b/src/IO/Operators.h index d1500aedd22..02a346e2f2f 100644 --- a/src/IO/Operators.h +++ b/src/IO/Operators.h @@ -61,7 +61,11 @@ template WriteBuffer & operator<< (QuoteManipWriteBuffer buf, template WriteBuffer & operator<< (DoubleQuoteManipWriteBuffer buf, const T & x) { writeDoubleQuoted(x, buf.get()); return buf; } template WriteBuffer & operator<< (BinaryManipWriteBuffer buf, const T & x) { writeBinary(x, buf.get()); return buf; } -inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, const char * x) { writeAnyEscapedString<'\''>(x, x + strlen(x), buf.get()); return buf; } +inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, const String & x) { writeEscapedString(x, buf); return buf; } +inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, const std::string_view & x) { writeEscapedString(x, buf); return buf; } +inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, const StringRef & x) { writeEscapedString(x, buf); return buf; } +inline WriteBuffer & operator<< (EscapeManipWriteBuffer buf, const char * x) { writeEscapedString(x, strlen(x), buf); return buf; } + inline WriteBuffer & operator<< (QuoteManipWriteBuffer buf, const char * x) { writeAnyQuotedString<'\''>(x, x + strlen(x), buf.get()); return buf; } inline WriteBuffer & operator<< (DoubleQuoteManipWriteBuffer buf, const char * x) { writeAnyQuotedString<'"'>(x, x + strlen(x), buf.get()); return buf; } inline WriteBuffer & operator<< (BinaryManipWriteBuffer buf, const char * x) { writeStringBinary(x, buf.get()); return buf; } diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 624a6c3496a..e6acd0b8880 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -483,6 +483,10 @@ inline void writeEscapedString(const StringRef & ref, WriteBuffer & buf) writeEscapedString(ref.data, ref.size, buf); } +inline void writeEscapedString(const std::string_view & ref, WriteBuffer & buf) +{ + writeEscapedString(ref.data(), ref.size(), buf); +} template void writeAnyQuotedString(const char * begin, const char * end, WriteBuffer & buf) @@ -512,17 +516,31 @@ inline void writeQuotedString(const String & s, WriteBuffer & buf) writeAnyQuotedString<'\''>(s, buf); } - inline void writeQuotedString(const StringRef & ref, WriteBuffer & buf) { writeAnyQuotedString<'\''>(ref, buf); } +inline void writeQuotedString(const std::string_view & ref, WriteBuffer & buf) +{ + writeAnyQuotedString<'\''>(ref.data(), ref.data() + ref.size(), buf); +} + +inline void writeDoubleQuotedString(const String & s, WriteBuffer & buf) +{ + writeAnyQuotedString<'"'>(s, buf); +} + inline void writeDoubleQuotedString(const StringRef & s, WriteBuffer & buf) { writeAnyQuotedString<'"'>(s, buf); } +inline void writeDoubleQuotedString(const std::string_view & s, WriteBuffer & buf) +{ + writeAnyQuotedString<'"'>(s.data(), s.data() + s.size(), buf); +} + /// Outputs a string in backquotes. inline void writeBackQuotedString(const StringRef & s, WriteBuffer & buf) { @@ -901,6 +919,7 @@ writeBinary(const T & x, WriteBuffer & buf) { writePODBinary(x, buf); } inline void writeBinary(const String & x, WriteBuffer & buf) { writeStringBinary(x, buf); } inline void writeBinary(const StringRef & x, WriteBuffer & buf) { writeStringBinary(x, buf); } +inline void writeBinary(const std::string_view & x, WriteBuffer & buf) { writeStringBinary(x, buf); } inline void writeBinary(const Int128 & x, WriteBuffer & buf) { writePODBinary(x, buf); } inline void writeBinary(const UInt128 & x, WriteBuffer & buf) { writePODBinary(x, buf); } inline void writeBinary(const DummyUInt256 & x, WriteBuffer & buf) { writePODBinary(x, buf); } @@ -1001,6 +1020,10 @@ writeQuoted(const T & x, WriteBuffer & buf) { writeText(x, buf); } inline void writeQuoted(const String & x, WriteBuffer & buf) { writeQuotedString(x, buf); } +inline void writeQuoted(const std::string_view & x, WriteBuffer & buf) { writeQuotedString(x, buf); } + +inline void writeQuoted(const StringRef & x, WriteBuffer & buf) { writeQuotedString(x, buf); } + inline void writeQuoted(const LocalDate & x, WriteBuffer & buf) { writeChar('\'', buf); @@ -1043,6 +1066,10 @@ writeDoubleQuoted(const T & x, WriteBuffer & buf) { writeText(x, buf); } inline void writeDoubleQuoted(const String & x, WriteBuffer & buf) { writeDoubleQuotedString(x, buf); } +inline void writeDoubleQuoted(const std::string_view & x, WriteBuffer & buf) { writeDoubleQuotedString(x, buf); } + +inline void writeDoubleQuoted(const StringRef & x, WriteBuffer & buf) { writeDoubleQuotedString(x, buf); } + inline void writeDoubleQuoted(const LocalDate & x, WriteBuffer & buf) { writeChar('"', buf); diff --git a/src/IO/tests/gtest_manip.cpp b/src/IO/tests/gtest_manip.cpp new file mode 100644 index 00000000000..4b0f7389c6f --- /dev/null +++ b/src/IO/tests/gtest_manip.cpp @@ -0,0 +1,82 @@ +#include + +#include +#include +#include +#include +#include +#include + +using namespace DB; + +template +void checkString(const T & str, U manip, const std::string & expected) +{ + WriteBufferFromOwnString buf; + + buf << manip << str; + EXPECT_EQ(expected, buf.str()) << "str type:" << typeid(str).name(); +} + +TEST(OperatorsManipTest, EscapingTest) +{ + checkString("Hello 'world'", escape, "Hello \\'world\\'"); + checkString("Hello \\world\\", escape, "Hello \\\\world\\\\"); + + std::string s1 = "Hello 'world'"; + checkString(s1, escape, "Hello \\'world\\'"); + std::string s2 = "Hello \\world\\"; + checkString(s2, escape, "Hello \\\\world\\\\"); + + std::string_view sv1 = s1; + checkString(sv1, escape, "Hello \\'world\\'"); + std::string_view sv2 = s2; + checkString(sv2, escape, "Hello \\\\world\\\\"); + + StringRef sr1 = s1; + checkString(sr1, escape, "Hello \\'world\\'"); + StringRef sr2 = s2; + checkString(sr2, escape, "Hello \\\\world\\\\"); +} + +TEST(OperatorsManipTest, QuouteTest) +{ + checkString("Hello 'world'", quote, "'Hello \\'world\\''"); + + std::string s1 = "Hello 'world'"; + checkString(s1, quote, "'Hello \\'world\\''"); + + std::string_view sv1 = s1; + checkString(sv1, quote, "'Hello \\'world\\''"); + + StringRef sr1 = s1; + checkString(sr1, quote, "'Hello \\'world\\''"); +} + +TEST(OperatorsManipTest, DoubleQuouteTest) +{ + checkString("Hello 'world'", double_quote, "\"Hello 'world'\""); + + std::string s1 = "Hello 'world'"; + checkString(s1, double_quote, "\"Hello 'world'\""); + + std::string_view sv1 = s1; + checkString(sv1, double_quote, "\"Hello 'world'\""); + + StringRef sr1 = s1; + checkString(sr1, double_quote, "\"Hello 'world'\""); +} + +TEST(OperatorsManipTest, binary) +{ + checkString("Hello", binary, "\x5Hello"); + + std::string s1 = "Hello"; + checkString(s1, binary, "\x5Hello"); + + std::string_view sv1 = s1; + checkString(sv1, binary, "\x5Hello"); + + StringRef sr1 = s1; + checkString(sr1, binary, "\x5Hello"); +} From 073379627c7617a5e37f9130a6a19a77967e97f2 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Wed, 13 Jan 2021 20:47:40 +0800 Subject: [PATCH 48/78] Update FunctionsBitmap.h --- src/Functions/FunctionsBitmap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionsBitmap.h b/src/Functions/FunctionsBitmap.h index cd5e23acbf3..b02ec3e83e7 100644 --- a/src/Functions/FunctionsBitmap.h +++ b/src/Functions/FunctionsBitmap.h @@ -290,7 +290,7 @@ public: if (!(which.isUInt8() || which.isUInt16() || which.isUInt32() || which.isUInt64())) { throw Exception( - "The second or third argument for function " + getName() + " must be one of [UInt8, UInt16, UInt32, UInt64] but it has type " + arguments[1]->getName() + ".", + "The second and third arguments for function " + getName() + " must be one of [UInt8, UInt16, UInt32, UInt64] but it has type " + arguments[1]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } } @@ -428,7 +428,7 @@ public: for (size_t i = 0; i < 2; ++i) { const auto * array_type = typeid_cast(arguments[i + 1].get()); - String msg = "The second or third argument for function " + getName() + " must be an one of [Array(UInt8), Array(UInt16), Array(UInt32), Array(UInt64)] but it has type " + arguments[i + 1]->getName() + "."; + String msg = "The second and third arguments for function " + getName() + " must be an one of [Array(UInt8), Array(UInt16), Array(UInt32), Array(UInt64)] but it has type " + arguments[i + 1]->getName() + "."; if (!array_type) throw Exception(msg, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); From e9a18fbd3a6881d8dbef5d7b650f91f9b12f5a01 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Wed, 13 Jan 2021 20:49:06 +0800 Subject: [PATCH 49/78] Update FunctionsBitmap.h --- src/Functions/FunctionsBitmap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionsBitmap.h b/src/Functions/FunctionsBitmap.h index b02ec3e83e7..4d9621338f8 100644 --- a/src/Functions/FunctionsBitmap.h +++ b/src/Functions/FunctionsBitmap.h @@ -290,7 +290,7 @@ public: if (!(which.isUInt8() || which.isUInt16() || which.isUInt32() || which.isUInt64())) { throw Exception( - "The second and third arguments for function " + getName() + " must be one of [UInt8, UInt16, UInt32, UInt64] but it has type " + arguments[1]->getName() + ".", + "The second and third arguments for function " + getName() + " must be one of [UInt8, UInt16, UInt32, UInt64] but one of them has type " + arguments[1]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } } @@ -428,7 +428,7 @@ public: for (size_t i = 0; i < 2; ++i) { const auto * array_type = typeid_cast(arguments[i + 1].get()); - String msg = "The second and third arguments for function " + getName() + " must be an one of [Array(UInt8), Array(UInt16), Array(UInt32), Array(UInt64)] but it has type " + arguments[i + 1]->getName() + "."; + String msg = "The second and third arguments for function " + getName() + " must be an one of [Array(UInt8), Array(UInt16), Array(UInt32), Array(UInt64)] but one of them has type " + arguments[i + 1]->getName() + "."; if (!array_type) throw Exception(msg, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); From c5cebbacb1298dd022626a2c7ee52d99f70eb9b6 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 13 Jan 2021 16:29:39 +0300 Subject: [PATCH 50/78] Update test. --- .../01651_lc_insert_tiny_log.reference | 8 +++++ .../0_stateless/01651_lc_insert_tiny_log.sql | 32 +++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/tests/queries/0_stateless/01651_lc_insert_tiny_log.reference b/tests/queries/0_stateless/01651_lc_insert_tiny_log.reference index c4932303260..3da44c57b27 100644 --- a/tests/queries/0_stateless/01651_lc_insert_tiny_log.reference +++ b/tests/queries/0_stateless/01651_lc_insert_tiny_log.reference @@ -2,3 +2,11 @@ 10000000 1274991808 20000000 20000000 2549983616 +10000000 +10000000 1274991808 +20000000 +20000000 2549983616 +10000000 +10000000 1274991808 +20000000 +20000000 2549983616 diff --git a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql index 45c59163371..22532529812 100644 --- a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql +++ b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql @@ -13,3 +13,35 @@ select sum(length(arr)) from perf_lc_num; select sum(length(arr)), sum(num) from perf_lc_num; drop table if exists perf_lc_num; + + +CREATE TABLE perf_lc_num(  num UInt8,  arr Array(LowCardinality(Int64)) default [num]  ) ENGINE = Log; + +INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); + +select sum(length(arr)) from perf_lc_num; +select sum(length(arr)), sum(num) from perf_lc_num; + +INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); + +select sum(length(arr)) from perf_lc_num; +select sum(length(arr)), sum(num) from perf_lc_num; + +drop table if exists perf_lc_num; + + +CREATE TABLE perf_lc_num(  num UInt8,  arr Array(LowCardinality(Int64)) default [num]  ) ENGINE = StripeLog; + +INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); + +select sum(length(arr)) from perf_lc_num; +select sum(length(arr)), sum(num) from perf_lc_num; + +INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); + +select sum(length(arr)) from perf_lc_num; +select sum(length(arr)), sum(num) from perf_lc_num; + +drop table if exists perf_lc_num; + + From da0cb8d47f7732fa2c0c523418d25cdad0e885d2 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 13 Jan 2021 16:32:27 +0300 Subject: [PATCH 51/78] Added comment. --- src/Storages/StorageTinyLog.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index f5e60f06fb0..6e3e9c612bb 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -297,6 +297,8 @@ void TinyLogBlockOutputStream::writeData(const String & name, const IDataType & if (serialize_states.count(name) == 0) { + /// Some stream getters may be called form `serializeBinaryBulkStatePrefix`. + /// Use different WrittenStreams set, or we get nullptr for them in `serializeBinaryBulkWithMultipleStreams` WrittenStreams prefix_written_streams; settings.getter = createStreamGetter(name, prefix_written_streams); type.serializeBinaryBulkStatePrefix(settings, serialize_states[name]); From 94c0433d9f67fbba32eb7cbf506e12f60dc1803e Mon Sep 17 00:00:00 2001 From: tavplubix Date: Wed, 13 Jan 2021 16:55:34 +0300 Subject: [PATCH 52/78] Update InterpreterInsertQuery.cpp --- src/Interpreters/InterpreterInsertQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 742c9f6736f..ab5fe4eae9f 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -106,7 +106,7 @@ Block InterpreterInsertQuery::getSampleBlock( /// The table does not have a column with that name if (!table_sample.has(current_name)) - throw Exception("No such column " + current_name + " in table " + query.table_id.getNameForLogs(), + throw Exception("No such column " + current_name + " in table " + table->getStorageID().getNameForLogs(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); if (!allow_materialized && !table_sample_non_materialized.has(current_name)) From ae9490883d4336c412a8465510d8453158cbf76d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Jan 2021 17:10:20 +0300 Subject: [PATCH 53/78] Update test --- tests/queries/0_stateless/01591_window_functions.reference | 2 +- tests/queries/0_stateless/01591_window_functions.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference index ce56860ed8b..aad784b1ac1 100644 --- a/tests/queries/0_stateless/01591_window_functions.reference +++ b/tests/queries/0_stateless/01591_window_functions.reference @@ -122,7 +122,7 @@ select * from (select * from numbers(5) order by rand()) order by count() over ( 2 3 4 -select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number + 1)) over (order by min(number) desc) desc; +select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number) + 1) over (order by min(number) desc) desc; -- different windows -- an explain test would also be helpful, but it's too immature now and I don't diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql index 082a6652a65..30094690a92 100644 --- a/tests/queries/0_stateless/01591_window_functions.sql +++ b/tests/queries/0_stateless/01591_window_functions.sql @@ -41,7 +41,7 @@ select * from (select * from numbers(5) order by rand()) order by count() over ( -- Aggregate functions as window function arguments. This query is semantically -- the same as the above one, only we replace `number` with -- `any(number) group by number` and so on. -select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number + 1)) over (order by min(number) desc) desc; +select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number) + 1) over (order by min(number) desc) desc; -- different windows -- an explain test would also be helpful, but it's too immature now and I don't From ccbb11db8fa38a5165c91cb92ac88522098a7277 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Jan 2021 17:11:17 +0300 Subject: [PATCH 54/78] Update test --- .../01470_columns_transformers.reference | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/queries/0_stateless/01470_columns_transformers.reference b/tests/queries/0_stateless/01470_columns_transformers.reference index a103d62167b..ae0adb3ba60 100644 --- a/tests/queries/0_stateless/01470_columns_transformers.reference +++ b/tests/queries/0_stateless/01470_columns_transformers.reference @@ -27,9 +27,9 @@ SELECT avg(k) FROM columns_transformers SELECT - toDate(any(i)), - toDate(any(j)), - toDate(any(k)) + any(toDate(i)), + any(toDate(j)), + any(toDate(k)) FROM columns_transformers AS a SELECT length(toString(j)), @@ -44,9 +44,9 @@ SELECT avg(k) FROM columns_transformers SELECT - toDate(any(i)), - toDate(any(j)), - toDate(any(k)) + any(toDate(i)), + any(toDate(j)), + any(toDate(k)) FROM columns_transformers AS a SELECT sum(i + 1 AS i), @@ -59,9 +59,9 @@ SELECT avg(k) FROM columns_transformers SELECT - toDate(any(i)), - toDate(any(j)), - toDate(any(k)) + any(toDate(i)), + any(toDate(j)), + any(toDate(k)) FROM columns_transformers AS a SELECT (i + 1) + 1 AS i, From 5579e3876cbd12b5d2aa228b74c6e6608ff71654 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Jan 2021 17:12:05 +0300 Subject: [PATCH 55/78] Update test --- tests/queries/0_stateless/01398_any_with_alias.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/01398_any_with_alias.sql b/tests/queries/0_stateless/01398_any_with_alias.sql index 32e67f3f4b6..a65b8132c67 100644 --- a/tests/queries/0_stateless/01398_any_with_alias.sql +++ b/tests/queries/0_stateless/01398_any_with_alias.sql @@ -1,3 +1,5 @@ +SET optimize_move_functions_out_of_any = 1; + SELECT any(number * number) AS n FROM numbers(100) FORMAT CSVWithNames; EXPLAIN SYNTAX SELECT any(number * number) AS n FROM numbers(100); From 9fc7b5273208e783b83c0160035cd5b52345d08c Mon Sep 17 00:00:00 2001 From: Nikita Mikhailov Date: Wed, 13 Jan 2021 17:41:34 +0300 Subject: [PATCH 56/78] done --- .../01622_defaults_for_file_engine.reference | 1 + .../01622_defaults_for_file_engine.sql | 7 ++++ .../01622_defaults_for_url_engine.reference | 1 + .../01622_defaults_for_url_engine.sh | 36 +++++++++++++++++++ 4 files changed, 45 insertions(+) create mode 100644 tests/queries/0_stateless/01622_defaults_for_file_engine.reference create mode 100644 tests/queries/0_stateless/01622_defaults_for_file_engine.sql create mode 100644 tests/queries/0_stateless/01622_defaults_for_url_engine.reference create mode 100755 tests/queries/0_stateless/01622_defaults_for_url_engine.sh diff --git a/tests/queries/0_stateless/01622_defaults_for_file_engine.reference b/tests/queries/0_stateless/01622_defaults_for_file_engine.reference new file mode 100644 index 00000000000..75d3f7636fd --- /dev/null +++ b/tests/queries/0_stateless/01622_defaults_for_file_engine.reference @@ -0,0 +1 @@ +1 7 diff --git a/tests/queries/0_stateless/01622_defaults_for_file_engine.sql b/tests/queries/0_stateless/01622_defaults_for_file_engine.sql new file mode 100644 index 00000000000..51392871e26 --- /dev/null +++ b/tests/queries/0_stateless/01622_defaults_for_file_engine.sql @@ -0,0 +1,7 @@ +insert into table function file("data1622.json", "TSV", "value String") VALUES ('{"a":1}'); +drop table if exists json; +create table json(a int, b int default 7) engine File(JSONEachRow, 'data1622.json'); +set input_format_defaults_for_omitted_fields = 1; +select * from json; +truncate table json; +drop table if exists json; diff --git a/tests/queries/0_stateless/01622_defaults_for_url_engine.reference b/tests/queries/0_stateless/01622_defaults_for_url_engine.reference new file mode 100644 index 00000000000..7326d960397 --- /dev/null +++ b/tests/queries/0_stateless/01622_defaults_for_url_engine.reference @@ -0,0 +1 @@ +Ok diff --git a/tests/queries/0_stateless/01622_defaults_for_url_engine.sh b/tests/queries/0_stateless/01622_defaults_for_url_engine.sh new file mode 100755 index 00000000000..c957786a4f7 --- /dev/null +++ b/tests/queries/0_stateless/01622_defaults_for_url_engine.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + + +PORT="$(($RANDOM%63000+2001))" + +TEMP_FILE="$CURDIR/01622_defaults_for_url_engine.tmp" + +function thread1 +{ + while true; do + echo -e "HTTP/1.1 200 OK\n\n{\"a\": 1}" | nc -l -p $1 -q 1; + done +} + +function thread2 +{ + for iter in {1..100}; do + $CLICKHOUSE_CLIENT -q "SELECT * FROM url('http://127.0.0.1:$1/', JSONEachRow, 'a int, b int default 7') format Values" + done +} + +# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout +export -f thread1; +export -f thread2; + +TIMEOUT=5 + +timeout $TIMEOUT bash -c "thread1 $PORT" > /dev/null 2>&1 & +timeout $TIMEOUT bash -c "thread2 $PORT" 2> /dev/null > $TEMP_FILE & + +wait + +grep -q '(1,7)' $TEMP_FILE && echo "Ok" \ No newline at end of file From 33d007dc46753b520cd1f070c3747f43f288b8e3 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 13 Jan 2021 17:45:19 +0300 Subject: [PATCH 57/78] Fix function ignore with LowCardinality argument. --- src/Functions/ignore.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Functions/ignore.cpp b/src/Functions/ignore.cpp index 7f78973907e..6b02c3a462d 100644 --- a/src/Functions/ignore.cpp +++ b/src/Functions/ignore.cpp @@ -30,6 +30,10 @@ public: bool useDefaultImplementationForNulls() const override { return false; } + /// We should never return LowCardinality result, cause we declare that result is always constant zero. + /// (in getResultIfAlwaysReturnsConstantAndHasArguments) + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + String getName() const override { return name; From a0874d93c3778624e024701068d2c33d3606a1bc Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 13 Jan 2021 17:47:08 +0300 Subject: [PATCH 58/78] Add test. --- .../0_stateless/01652_ignore_and_low_cardinality.reference | 0 tests/queries/0_stateless/01652_ignore_and_low_cardinality.sql | 3 +++ 2 files changed, 3 insertions(+) create mode 100644 tests/queries/0_stateless/01652_ignore_and_low_cardinality.reference create mode 100644 tests/queries/0_stateless/01652_ignore_and_low_cardinality.sql diff --git a/tests/queries/0_stateless/01652_ignore_and_low_cardinality.reference b/tests/queries/0_stateless/01652_ignore_and_low_cardinality.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01652_ignore_and_low_cardinality.sql b/tests/queries/0_stateless/01652_ignore_and_low_cardinality.sql new file mode 100644 index 00000000000..34049a98e4f --- /dev/null +++ b/tests/queries/0_stateless/01652_ignore_and_low_cardinality.sql @@ -0,0 +1,3 @@ +set allow_suspicious_low_cardinality_types = 1; +CREATE TABLE lc_null_int8_defnull (val LowCardinality(Nullable(Int8)) DEFAULT NULL) ENGINE = MergeTree order by tuple(); +SELECT ignore(10, ignore(*), ignore(ignore(-2, 1025, *)), NULL, *), * FROM lc_null_int8_defnull AS values; From ded35fe5b0ec81904adad9a742a70d5a5b176c4c Mon Sep 17 00:00:00 2001 From: Nikita Mikhailov Date: Wed, 13 Jan 2021 18:15:32 +0300 Subject: [PATCH 59/78] delete --- src/Interpreters/TableJoin.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 6622ea2dcd6..2d3bffa8234 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -230,16 +230,8 @@ void TableJoin::addJoinedColumn(const NameAndTypePair & joined_column) void TableJoin::addJoinedColumnsAndCorrectNullability(ColumnsWithTypeAndName & columns) const { for (auto & col : columns) - { - /// Materialize column. - /// Column is not empty if it is constant, but after Join all constants will be materialized. - /// So, we need remove constants from header. - // if (col.column) - // col.column = nullptr; - if (leftBecomeNullable(col.type)) col.type = makeNullable(col.type); - } for (const auto & col : columns_added_by_join) { From 3d62d6503bb8449fd1129a5b22050266b20a9ca7 Mon Sep 17 00:00:00 2001 From: Nikita Mikhailov Date: Wed, 13 Jan 2021 18:34:16 +0300 Subject: [PATCH 60/78] better --- docker/test/fasttest/run.sh | 1 + tests/queries/0_stateless/01622_defaults_for_url_engine.sh | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index a918cc44420..0c8bb842126 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -329,6 +329,7 @@ function run_tests # nc - command not found 01601_proxy_protocol + 01622_defaults_for_url_engine ) time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt" diff --git a/tests/queries/0_stateless/01622_defaults_for_url_engine.sh b/tests/queries/0_stateless/01622_defaults_for_url_engine.sh index c957786a4f7..164aee5f845 100755 --- a/tests/queries/0_stateless/01622_defaults_for_url_engine.sh +++ b/tests/queries/0_stateless/01622_defaults_for_url_engine.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh @@ -17,7 +18,7 @@ function thread1 function thread2 { - for iter in {1..100}; do + while true; do $CLICKHOUSE_CLIENT -q "SELECT * FROM url('http://127.0.0.1:$1/', JSONEachRow, 'a int, b int default 7') format Values" done } From 2683bbc0f7b9e17c83bd6effb3d88f7712a42b14 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 13 Jan 2021 18:35:35 +0300 Subject: [PATCH 61/78] Disable lint for test --- src/IO/tests/gtest_manip.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/IO/tests/gtest_manip.cpp b/src/IO/tests/gtest_manip.cpp index 4b0f7389c6f..c3989ca0d7a 100644 --- a/src/IO/tests/gtest_manip.cpp +++ b/src/IO/tests/gtest_manip.cpp @@ -21,22 +21,22 @@ void checkString(const T & str, U manip, const std::string & expected) TEST(OperatorsManipTest, EscapingTest) { checkString("Hello 'world'", escape, "Hello \\'world\\'"); - checkString("Hello \\world\\", escape, "Hello \\\\world\\\\"); + checkString("Hello \\world\\", escape, "Hello \\\\world\\\\"); // NOLINT std::string s1 = "Hello 'world'"; checkString(s1, escape, "Hello \\'world\\'"); std::string s2 = "Hello \\world\\"; - checkString(s2, escape, "Hello \\\\world\\\\"); + checkString(s2, escape, "Hello \\\\world\\\\"); // NOLINT std::string_view sv1 = s1; checkString(sv1, escape, "Hello \\'world\\'"); std::string_view sv2 = s2; - checkString(sv2, escape, "Hello \\\\world\\\\"); + checkString(sv2, escape, "Hello \\\\world\\\\"); // NOLINT StringRef sr1 = s1; checkString(sr1, escape, "Hello \\'world\\'"); StringRef sr2 = s2; - checkString(sr2, escape, "Hello \\\\world\\\\"); + checkString(sr2, escape, "Hello \\\\world\\\\"); // NOLINT } TEST(OperatorsManipTest, QuouteTest) From ec12a089f3989ee461f0a804a892408d97292045 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Jan 2021 21:11:56 +0300 Subject: [PATCH 62/78] Fix return type of groupUniqArray --- tests/queries/0_stateless/01651_group_uniq_array_enum.reference | 2 ++ tests/queries/0_stateless/01651_group_uniq_array_enum.sql | 2 ++ 2 files changed, 4 insertions(+) create mode 100644 tests/queries/0_stateless/01651_group_uniq_array_enum.reference create mode 100644 tests/queries/0_stateless/01651_group_uniq_array_enum.sql diff --git a/tests/queries/0_stateless/01651_group_uniq_array_enum.reference b/tests/queries/0_stateless/01651_group_uniq_array_enum.reference new file mode 100644 index 00000000000..2858c6f9a95 --- /dev/null +++ b/tests/queries/0_stateless/01651_group_uniq_array_enum.reference @@ -0,0 +1,2 @@ +['Hello','World','Упячка'] +['Hello','World','World','Упячка','Упячка','Упячка'] diff --git a/tests/queries/0_stateless/01651_group_uniq_array_enum.sql b/tests/queries/0_stateless/01651_group_uniq_array_enum.sql new file mode 100644 index 00000000000..b20cb17dd21 --- /dev/null +++ b/tests/queries/0_stateless/01651_group_uniq_array_enum.sql @@ -0,0 +1,2 @@ +SELECT arraySort(groupUniqArray(x)) FROM (SELECT CAST(arrayJoin([1, 2, 3, 2, 3, 3]) AS Enum('Hello' = 1, 'World' = 2, 'Упячка' = 3)) AS x); +SELECT arraySort(groupArray(x)) FROM (SELECT CAST(arrayJoin([1, 2, 3, 2, 3, 3]) AS Enum('Hello' = 1, 'World' = 2, 'Упячка' = 3)) AS x); From d27e04997dbd4460574e6170fe2e78167972bc39 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Jan 2021 21:13:52 +0300 Subject: [PATCH 63/78] More tests --- src/AggregateFunctions/AggregateFunctionGroupArray.h | 4 +--- .../AggregateFunctionGroupUniqArray.h | 2 +- .../0_stateless/01651_group_uniq_array_enum.reference | 1 + .../0_stateless/01651_group_uniq_array_enum.sql | 11 +++++++++++ 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.h b/src/AggregateFunctions/AggregateFunctionGroupArray.h index e8c4d70ea26..27a8cf0b1ee 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.h @@ -112,7 +112,6 @@ class GroupArrayNumericImpl final { using Data = GroupArrayNumericData; static constexpr bool limit_num_elems = Trait::has_limit; - DataTypePtr & data_type; UInt64 max_elems; UInt64 seed; @@ -121,7 +120,6 @@ public: const DataTypePtr & data_type_, UInt64 max_elems_ = std::numeric_limits::max(), UInt64 seed_ = 123456) : IAggregateFunctionDataHelper, GroupArrayNumericImpl>( {data_type_}, {}) - , data_type(this->argument_types[0]) , max_elems(max_elems_) , seed(seed_) { @@ -129,7 +127,7 @@ public: String getName() const override { return getNameByTrait(); } - DataTypePtr getReturnType() const override { return std::make_shared(data_type); } + DataTypePtr getReturnType() const override { return std::make_shared(this->argument_types[0]); } void insert(Data & a, const T & v, Arena * arena) const { diff --git a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h index 2ee9d0f6e1c..1dc7dcde9c3 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h +++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h @@ -56,7 +56,7 @@ public: DataTypePtr getReturnType() const override { - return std::make_shared(std::make_shared>()); + return std::make_shared(this->argument_types[0]); } void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override diff --git a/tests/queries/0_stateless/01651_group_uniq_array_enum.reference b/tests/queries/0_stateless/01651_group_uniq_array_enum.reference index 2858c6f9a95..ba4401b6afd 100644 --- a/tests/queries/0_stateless/01651_group_uniq_array_enum.reference +++ b/tests/queries/0_stateless/01651_group_uniq_array_enum.reference @@ -1,2 +1,3 @@ ['Hello','World','Упячка'] ['Hello','World','World','Упячка','Упячка','Упячка'] +['world','hello'] Array(Enum8(\'world\' = 0, \'hello\' = 1)) ['world','hello'] Array(Enum8(\'world\' = 0, \'hello\' = 1)) diff --git a/tests/queries/0_stateless/01651_group_uniq_array_enum.sql b/tests/queries/0_stateless/01651_group_uniq_array_enum.sql index b20cb17dd21..19de51f9681 100644 --- a/tests/queries/0_stateless/01651_group_uniq_array_enum.sql +++ b/tests/queries/0_stateless/01651_group_uniq_array_enum.sql @@ -1,2 +1,13 @@ SELECT arraySort(groupUniqArray(x)) FROM (SELECT CAST(arrayJoin([1, 2, 3, 2, 3, 3]) AS Enum('Hello' = 1, 'World' = 2, 'Упячка' = 3)) AS x); SELECT arraySort(groupArray(x)) FROM (SELECT CAST(arrayJoin([1, 2, 3, 2, 3, 3]) AS Enum('Hello' = 1, 'World' = 2, 'Упячка' = 3)) AS x); + +SELECT + arraySort(groupUniqArray(val)) AS uniq, + toTypeName(uniq), + arraySort(groupArray(val)) AS arr, + toTypeName(arr) +FROM +( + SELECT CAST(number % 2, 'Enum(\'hello\' = 1, \'world\' = 0)') AS val + FROM numbers(2) +); From e4dd7d949cf1a3fedb6969452c6770f0c3b6fb12 Mon Sep 17 00:00:00 2001 From: Nikita Mikhailov Date: Wed, 13 Jan 2021 22:26:37 +0300 Subject: [PATCH 64/78] better test --- .../0_stateless/01622_defaults_for_file_engine.reference | 2 +- tests/queries/0_stateless/01622_defaults_for_file_engine.sql | 2 +- tests/queries/0_stateless/01622_defaults_for_url_engine.sh | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/01622_defaults_for_file_engine.reference b/tests/queries/0_stateless/01622_defaults_for_file_engine.reference index 75d3f7636fd..f04ee88dabd 100644 --- a/tests/queries/0_stateless/01622_defaults_for_file_engine.reference +++ b/tests/queries/0_stateless/01622_defaults_for_file_engine.reference @@ -1 +1 @@ -1 7 +1 7 8 diff --git a/tests/queries/0_stateless/01622_defaults_for_file_engine.sql b/tests/queries/0_stateless/01622_defaults_for_file_engine.sql index 51392871e26..203486fe71c 100644 --- a/tests/queries/0_stateless/01622_defaults_for_file_engine.sql +++ b/tests/queries/0_stateless/01622_defaults_for_file_engine.sql @@ -1,6 +1,6 @@ insert into table function file("data1622.json", "TSV", "value String") VALUES ('{"a":1}'); drop table if exists json; -create table json(a int, b int default 7) engine File(JSONEachRow, 'data1622.json'); +create table json(a int, b int default 7, c default a + b) engine File(JSONEachRow, 'data1622.json'); set input_format_defaults_for_omitted_fields = 1; select * from json; truncate table json; diff --git a/tests/queries/0_stateless/01622_defaults_for_url_engine.sh b/tests/queries/0_stateless/01622_defaults_for_url_engine.sh index 164aee5f845..9c56f0415ff 100755 --- a/tests/queries/0_stateless/01622_defaults_for_url_engine.sh +++ b/tests/queries/0_stateless/01622_defaults_for_url_engine.sh @@ -19,7 +19,7 @@ function thread1 function thread2 { while true; do - $CLICKHOUSE_CLIENT -q "SELECT * FROM url('http://127.0.0.1:$1/', JSONEachRow, 'a int, b int default 7') format Values" + $CLICKHOUSE_CLIENT -q "SELECT * FROM url('http://127.0.0.1:$1/', JSONEachRow, 'a int, b int default 7, c default a + b') format Values" done } @@ -34,4 +34,4 @@ timeout $TIMEOUT bash -c "thread2 $PORT" 2> /dev/null > $TEMP_FILE & wait -grep -q '(1,7)' $TEMP_FILE && echo "Ok" \ No newline at end of file +grep -q '(1,7,8)' $TEMP_FILE && echo "Ok" \ No newline at end of file From e40f7df4d74e81de83a71b3e284618106bed9880 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 14 Jan 2021 01:22:15 +0300 Subject: [PATCH 65/78] Update InterpreterExistsQuery.cpp --- src/Interpreters/InterpreterExistsQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterExistsQuery.cpp b/src/Interpreters/InterpreterExistsQuery.cpp index aeb5c0f9bcf..b94f29f9194 100644 --- a/src/Interpreters/InterpreterExistsQuery.cpp +++ b/src/Interpreters/InterpreterExistsQuery.cpp @@ -58,7 +58,7 @@ BlockInputStreamPtr InterpreterExistsQuery::executeImpl() String database = context.resolveDatabase(exists_query->database); context.checkAccess(AccessType::SHOW_TABLES, database, exists_query->table); auto tbl = DatabaseCatalog::instance().tryGetTable({database, exists_query->table}, context); - result = tbl != nullptr && tbl->isView(); + result = tbl != nullptr && tbl->isView(); } else if ((exists_query = query_ptr->as())) { From 1d9a3a492a21ca83662e473466c46e5aa1051669 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 14 Jan 2021 02:38:36 +0400 Subject: [PATCH 66/78] Update 01622_defaults_for_url_engine.sh --- tests/queries/0_stateless/01622_defaults_for_url_engine.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01622_defaults_for_url_engine.sh b/tests/queries/0_stateless/01622_defaults_for_url_engine.sh index 9c56f0415ff..e7deace8b46 100755 --- a/tests/queries/0_stateless/01622_defaults_for_url_engine.sh +++ b/tests/queries/0_stateless/01622_defaults_for_url_engine.sh @@ -19,7 +19,7 @@ function thread1 function thread2 { while true; do - $CLICKHOUSE_CLIENT -q "SELECT * FROM url('http://127.0.0.1:$1/', JSONEachRow, 'a int, b int default 7, c default a + b') format Values" + $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 -q "SELECT * FROM url('http://127.0.0.1:$1/', JSONEachRow, 'a int, b int default 7, c default a + b') format Values" done } @@ -34,4 +34,4 @@ timeout $TIMEOUT bash -c "thread2 $PORT" 2> /dev/null > $TEMP_FILE & wait -grep -q '(1,7,8)' $TEMP_FILE && echo "Ok" \ No newline at end of file +grep -q '(1,7,8)' $TEMP_FILE && echo "Ok" From 1244fed8bb721f16547dc450b5c201401dba7002 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Wed, 13 Jan 2021 19:01:17 -0400 Subject: [PATCH 67/78] Update external-dicts-dict-lifetime.md --- .../external-dictionaries/external-dicts-dict-lifetime.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index 4dbf4be9f96..7b83d51e01e 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -54,10 +54,11 @@ LIFETIME(MIN 300 MAX 360) При обновлении словарей сервер ClickHouse применяет различную логику в зависимости от типа [источника](external-dicts-dict-sources.md): > - У текстового файла проверяется время модификации. Если время изменилось по отношению к запомненному ранее, то словарь обновляется. -> - Для таблиц типа MyISAM, время модификации проверяется запросом `SHOW TABLE STATUS`. +> - Для таблиц типа MySQL, время модификации проверяется запросом `SHOW TABLE STATUS` (для MySQL 8 необходимо отключить кеширование мета-информации в MySQL `set global information_schema_stats_expiry=0`; +. > - Словари из других источников по умолчанию обновляются каждый раз. -Для источников MySQL (InnoDB), ODBC и ClickHouse можно настроить запрос, который позволит обновлять словари только в случае их фактического изменения, а не каждый раз. Чтобы это сделать необходимо выполнить следующие условия/действия: +Для других источников (ODBC, ClickHouse и т.д.) можно настроить запрос, который позволит обновлять словари только в случае их фактического изменения, а не каждый раз. Чтобы это сделать необходимо выполнить следующие условия/действия: > - В таблице словаря должно быть поле, которое гарантированно изменяется при обновлении данных в источнике. > - В настройках источника указывается запрос, который получает изменяющееся поле. Результат запроса сервер ClickHouse интерпретирует как строку и если эта строка изменилась по отношению к предыдущему состоянию, то словарь обновляется. Запрос следует указывать в поле `` настроек [источника](external-dicts-dict-sources.md). From a4e02065f35ed01cd788e8cd4d3d653b064dd549 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Wed, 13 Jan 2021 19:06:28 -0400 Subject: [PATCH 68/78] Update external-dicts-dict-lifetime.md --- .../external-dictionaries/external-dicts-dict-lifetime.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index c1dd11b1cc6..7708152093f 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -55,10 +55,10 @@ In this case, ClickHouse can reload the dictionary earlier if the dictionary con When upgrading the dictionaries, the ClickHouse server applies different logic depending on the type of [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md): - For a text file, it checks the time of modification. If the time differs from the previously recorded time, the dictionary is updated. -- For MyISAM tables, the time of modification is checked using a `SHOW TABLE STATUS` query. +- For MySQL tables, the time of modification is checked using a `SHOW TABLE STATUS` query (in case of MySQL 8 you need to disable meta-information caching in MySQL by `set global information_schema_stats_expiry=0`. - Dictionaries from other sources are updated every time by default. -For MySQL (InnoDB), ODBC and ClickHouse sources, you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps: +For other sources (ODBC, ClickHouse, etc), you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps: - The dictionary table must have a field that always changes when the source data is updated. - The settings of the source must specify a query that retrieves the changing field. The ClickHouse server interprets the query result as a row, and if this row has changed relative to its previous state, the dictionary is updated. Specify the query in the `` field in the settings for the [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md). From 38ce7c0c9c2c4c3b2aa6d04294d8ca8ba7686312 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Wed, 13 Jan 2021 19:08:51 -0400 Subject: [PATCH 69/78] Update external-dicts-dict-lifetime.md --- .../external-dictionaries/external-dicts-dict-lifetime.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index 7b83d51e01e..3c797ab2980 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -54,8 +54,7 @@ LIFETIME(MIN 300 MAX 360) При обновлении словарей сервер ClickHouse применяет различную логику в зависимости от типа [источника](external-dicts-dict-sources.md): > - У текстового файла проверяется время модификации. Если время изменилось по отношению к запомненному ранее, то словарь обновляется. -> - Для таблиц типа MySQL, время модификации проверяется запросом `SHOW TABLE STATUS` (для MySQL 8 необходимо отключить кеширование мета-информации в MySQL `set global information_schema_stats_expiry=0`; -. +> - Для таблиц типа MySQL, время модификации проверяется запросом `SHOW TABLE STATUS` (для MySQL 8 необходимо отключить кеширование мета-информации в MySQL `set global information_schema_stats_expiry=0`. > - Словари из других источников по умолчанию обновляются каждый раз. Для других источников (ODBC, ClickHouse и т.д.) можно настроить запрос, который позволит обновлять словари только в случае их фактического изменения, а не каждый раз. Чтобы это сделать необходимо выполнить следующие условия/действия: From 7bfacb79dd75ae5001d87a7a809f74fd39ca35c2 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Wed, 13 Jan 2021 19:11:12 -0400 Subject: [PATCH 70/78] Update external-dicts-dict-lifetime.md --- .../external-dictionaries/external-dicts-dict-lifetime.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index 3c797ab2980..ec0fb8e0ee5 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -54,7 +54,7 @@ LIFETIME(MIN 300 MAX 360) При обновлении словарей сервер ClickHouse применяет различную логику в зависимости от типа [источника](external-dicts-dict-sources.md): > - У текстового файла проверяется время модификации. Если время изменилось по отношению к запомненному ранее, то словарь обновляется. -> - Для таблиц типа MySQL, время модификации проверяется запросом `SHOW TABLE STATUS` (для MySQL 8 необходимо отключить кеширование мета-информации в MySQL `set global information_schema_stats_expiry=0`. +> - Для MySQL источника, время модификации проверяется запросом `SHOW TABLE STATUS` (для MySQL 8 необходимо отключить кеширование мета-информации в MySQL `set global information_schema_stats_expiry=0`. > - Словари из других источников по умолчанию обновляются каждый раз. Для других источников (ODBC, ClickHouse и т.д.) можно настроить запрос, который позволит обновлять словари только в случае их фактического изменения, а не каждый раз. Чтобы это сделать необходимо выполнить следующие условия/действия: From 86e9160930dcd3c7e8a103fd179943b5630b2067 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Wed, 13 Jan 2021 19:11:29 -0400 Subject: [PATCH 71/78] Update external-dicts-dict-lifetime.md --- .../external-dictionaries/external-dicts-dict-lifetime.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index 7708152093f..20486ebbcc8 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -55,7 +55,7 @@ In this case, ClickHouse can reload the dictionary earlier if the dictionary con When upgrading the dictionaries, the ClickHouse server applies different logic depending on the type of [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md): - For a text file, it checks the time of modification. If the time differs from the previously recorded time, the dictionary is updated. -- For MySQL tables, the time of modification is checked using a `SHOW TABLE STATUS` query (in case of MySQL 8 you need to disable meta-information caching in MySQL by `set global information_schema_stats_expiry=0`. +- For MySQL source, the time of modification is checked using a `SHOW TABLE STATUS` query (in case of MySQL 8 you need to disable meta-information caching in MySQL by `set global information_schema_stats_expiry=0`. - Dictionaries from other sources are updated every time by default. For other sources (ODBC, ClickHouse, etc), you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps: From f04db6648d1ccec7e99785a715093d6ad73526c1 Mon Sep 17 00:00:00 2001 From: RegulusZ <704709463@qq.com> Date: Thu, 14 Jan 2021 15:52:18 +0800 Subject: [PATCH 72/78] Update float.md Change the 32 line to correct meaning on Chinese --- docs/zh/sql-reference/data-types/float.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/sql-reference/data-types/float.md b/docs/zh/sql-reference/data-types/float.md index e0924eb0178..0fe6d2cf000 100644 --- a/docs/zh/sql-reference/data-types/float.md +++ b/docs/zh/sql-reference/data-types/float.md @@ -29,7 +29,7 @@ SELECT 1 - 0.9 - 当一行行阅读浮点数的时候,浮点数的结果可能不是机器最近显示的数值。 -## 南和Inf {#data_type-float-nan-inf} +## NaN和Inf {#data_type-float-nan-inf} 与标准SQL相比,ClickHouse 支持以下类别的浮点数: From 53299cc17b37fa38ace0dd5e838907629a32f197 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 14 Jan 2021 11:26:04 +0300 Subject: [PATCH 73/78] Update ColumnAliasesVisitor.cpp --- src/Interpreters/ColumnAliasesVisitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/ColumnAliasesVisitor.cpp b/src/Interpreters/ColumnAliasesVisitor.cpp index 24be7be8188..dcc4c3d75d4 100644 --- a/src/Interpreters/ColumnAliasesVisitor.cpp +++ b/src/Interpreters/ColumnAliasesVisitor.cpp @@ -19,7 +19,7 @@ bool ColumnAliasesMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &) { if (const auto * f = node->as()) { - /// "lambda" visit children itself. + /// "lambda" visits children itself. if (f->name == "lambda") return false; } From 445283c72691280a577ad495b7f60f13c5b92508 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 14 Jan 2021 11:52:19 +0300 Subject: [PATCH 74/78] Update index.md --- docs/en/sql-reference/statements/select/index.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md index 0c6a3449853..ed69198ed4d 100644 --- a/docs/en/sql-reference/statements/select/index.md +++ b/docs/en/sql-reference/statements/select/index.md @@ -44,7 +44,6 @@ Specifics of each optional clause are covered in separate sections, which are li - [LIMIT BY clause](../../../sql-reference/statements/select/limit-by.md) - [HAVING clause](../../../sql-reference/statements/select/having.md) - [SELECT clause](#select-clause) -- [ALL clause](../../../sql-reference/statements/select/all.md) - [DISTINCT clause](../../../sql-reference/statements/select/distinct.md) - [LIMIT clause](../../../sql-reference/statements/select/limit.md) - [UNION clause](../../../sql-reference/statements/select/union.md) From eedaf3f49cdeeff4130ec74ec63ef4ae7c8123a3 Mon Sep 17 00:00:00 2001 From: TiunovNN Date: Thu, 14 Jan 2021 13:08:07 +0300 Subject: [PATCH 75/78] Update architecture.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Некорретный перевод в разделе "Агрегатные функции" --- docs/ru/development/architecture.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/development/architecture.md b/docs/ru/development/architecture.md index 53c007e078f..de8fba1bc4b 100644 --- a/docs/ru/development/architecture.md +++ b/docs/ru/development/architecture.md @@ -133,7 +133,7 @@ ClickHouse имеет сильную типизацию, поэтому нет ## Агрегатные функции {#aggregate-functions} -Агрегатные функции - это функции с состоянием (stateful). Они накапливают переданные значения в некотором состоянии и позволяют получать результаты из этого состояния. Работа с ними осуществляется с помощью интерфейса `IAggregateFunction`. Состояния могут быть как простыми (состояние для `AggregateFunctionCount` это всего лишь один человек `UInt64` значение) так и довольно сложными (состояние `AggregateFunctionUniqCombined` представляет собой комбинацию линейного массива, хэш-таблицы и вероятностной структуры данных `HyperLogLog`). +Агрегатные функции - это функции с состоянием (stateful). Они накапливают переданные значения в некотором состоянии и позволяют получать результаты из этого состояния. Работа с ними осуществляется с помощью интерфейса `IAggregateFunction`. Состояния могут быть как простыми (состояние для `AggregateFunctionCount` это всего лишь одна переменная типа `UInt64`) так и довольно сложными (состояние `AggregateFunctionUniqCombined` представляет собой комбинацию линейного массива, хэш-таблицы и вероятностной структуры данных `HyperLogLog`). Состояния распределяются в `Arena` (пул памяти) для работы с несколькими состояниями при выполнении запроса `GROUP BY` высокой кардинальности (большим числом уникальных данных). Состояния могут иметь нетривиальный конструктор и деструктор: например, сложные агрегатные состояния могут сами аллоцировать дополнительную память. Потому к созданию и уничтожению состояний, правильной передаче владения и порядку уничтожения следует уделять больше внимание. From 66fd7604101b51028a364645a5e1beb98aa57717 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 14 Jan 2021 13:45:57 +0300 Subject: [PATCH 76/78] Update test. --- .../0_stateless/01652_ignore_and_low_cardinality.reference | 1 + tests/queries/0_stateless/01652_ignore_and_low_cardinality.sql | 3 +++ 2 files changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/01652_ignore_and_low_cardinality.reference b/tests/queries/0_stateless/01652_ignore_and_low_cardinality.reference index e69de29bb2d..573541ac970 100644 --- a/tests/queries/0_stateless/01652_ignore_and_low_cardinality.reference +++ b/tests/queries/0_stateless/01652_ignore_and_low_cardinality.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/01652_ignore_and_low_cardinality.sql b/tests/queries/0_stateless/01652_ignore_and_low_cardinality.sql index 34049a98e4f..b3d3ad81834 100644 --- a/tests/queries/0_stateless/01652_ignore_and_low_cardinality.sql +++ b/tests/queries/0_stateless/01652_ignore_and_low_cardinality.sql @@ -1,3 +1,6 @@ set allow_suspicious_low_cardinality_types = 1; CREATE TABLE lc_null_int8_defnull (val LowCardinality(Nullable(Int8)) DEFAULT NULL) ENGINE = MergeTree order by tuple(); SELECT ignore(10, ignore(*), ignore(ignore(-2, 1025, *)), NULL, *), * FROM lc_null_int8_defnull AS values; + + +SELECT ignore(toLowCardinality(1), toLowCardinality(2), 3); From 399650af313544b26393bd4bb2793269fc34623c Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 14 Jan 2021 15:29:31 +0300 Subject: [PATCH 77/78] Update insert-into.md --- docs/ru/sql-reference/statements/insert-into.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md index 3cacce08685..e3cea4aecc5 100644 --- a/docs/ru/sql-reference/statements/insert-into.md +++ b/docs/ru/sql-reference/statements/insert-into.md @@ -13,7 +13,7 @@ toc_title: INSERT INTO INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` -Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)` или синтаксис `(COLUMNS(c1, c2, c3))`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как `APPLY`, `EXCEPT`, `REPLACE`. +Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как `APPLY`, `EXCEPT`, `REPLACE`. В качестве примера рассмотрим таблицу: From ff4fe7698e468bfc381cb1c5c824895e2642d96f Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 14 Jan 2021 15:29:44 +0300 Subject: [PATCH 78/78] Update insert-into.md --- docs/en/sql-reference/statements/insert-into.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md index c38f8bc4641..7acf4018812 100644 --- a/docs/en/sql-reference/statements/insert-into.md +++ b/docs/en/sql-reference/statements/insert-into.md @@ -13,7 +13,7 @@ Basic query format: INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` -You can specify a list of columns to insert using the `(c1, c2, c3)` or `(COLUMNS(c1, c2, c3))` syntax. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier). +You can specify a list of columns to insert using the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier). For example, consider the table: