From 4220b9002cc814db1db52f1f3921493dcae4fc45 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 28 Oct 2024 00:35:40 +0800 Subject: [PATCH 01/14] Support reverse sorting key --- src/Interpreters/MutationsInterpreter.cpp | 10 ++- src/Parsers/ASTFunction.cpp | 8 +++ src/Parsers/ExpressionElementParsers.cpp | 23 +++++++ src/Parsers/ExpressionElementParsers.h | 8 +++ src/Parsers/ExpressionListParsers.cpp | 6 ++ src/Parsers/ExpressionListParsers.h | 6 ++ src/Parsers/ParserCreateQuery.cpp | 27 +++++++- .../Optimizations/optimizeReadInOrder.cpp | 10 +-- .../QueryPlan/ReadFromMergeTree.cpp | 17 ++++- src/Storages/KeyDescription.cpp | 19 +++++- src/Storages/KeyDescription.h | 3 + src/Storages/MergeTree/MergeTask.cpp | 8 ++- src/Storages/MergeTree/MergeTreeData.cpp | 38 ++++++++++- src/Storages/MergeTree/MergeTreeData.h | 4 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 25 +++++-- .../MergeTree/MergeTreeDataWriter.cpp | 16 ++++- src/Storages/MergeTree/MergeTreeSettings.cpp | 1 + src/Storages/StorageInMemoryMetadata.cpp | 7 ++ src/Storages/StorageInMemoryMetadata.h | 3 + .../03257_reverse_sorting_key.reference | 68 +++++++++++++++++++ .../0_stateless/03257_reverse_sorting_key.sql | 39 +++++++++++ 21 files changed, 321 insertions(+), 25 deletions(-) create mode 100644 tests/queries/0_stateless/03257_reverse_sorting_key.reference create mode 100644 tests/queries/0_stateless/03257_reverse_sorting_key.sql diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 0f25d5ac21c..2f8bb8c16ac 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -1436,6 +1436,7 @@ size_t MutationsInterpreter::evaluateCommandsSize() std::optional MutationsInterpreter::getStorageSortDescriptionIfPossible(const Block & header) const { Names sort_columns = metadata_snapshot->getSortingKeyColumns(); + std::vector reverse_flags = metadata_snapshot->getSortingKeyReverseFlags(); SortDescription sort_description; size_t sort_columns_size = sort_columns.size(); sort_description.reserve(sort_columns_size); @@ -1443,9 +1444,16 @@ std::optional MutationsInterpreter::getStorageSortDescriptionIf for (size_t i = 0; i < sort_columns_size; ++i) { if (header.has(sort_columns[i])) - sort_description.emplace_back(sort_columns[i], 1, 1); + { + if (reverse_flags[i]) + sort_description.emplace_back(sort_columns[i], -1, 1); + else + sort_description.emplace_back(sort_columns[i], 1, 1); + } else + { return {}; + } } return sort_description; diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index 53d44e2f325..7c26e4dde30 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -284,6 +284,14 @@ static bool formatNamedArgWithHiddenValue(IAST * arg, const IAST::FormatSettings void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { + /// Internal function for reversing MergeTree primary keys + if (name == "__descendingKey") + { + arguments->formatImpl(settings, state, frame); + settings.ostr << (settings.hilite ? hilite_keyword : "") << " DESC" << (settings.hilite ? hilite_none : ""); + return; + } + frame.expression_list_prepend_whitespace = false; if (kind == Kind::CODEC || kind == Kind::STATISTICS || kind == Kind::BACKUP_NAME) frame.allow_operators = false; diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 31efcb16f02..7fef956179e 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -2162,6 +2162,29 @@ bool ParserWithOptionalAlias::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } +bool ParserStorageOrderByElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserExpression elem_p; + ParserKeyword ascending(Keyword::ASCENDING); + ParserKeyword descending(Keyword::DESCENDING); + ParserKeyword asc(Keyword::ASC); + ParserKeyword desc(Keyword::DESC); + + ASTPtr expr_elem; + if (!elem_p.parse(pos, expr_elem, expected)) + return false; + + int direction = 1; + + if (descending.ignore(pos, expected) || desc.ignore(pos, expected)) + direction = -1; + else + ascending.ignore(pos, expected) || asc.ignore(pos, expected); + + node = direction == 1 ? std::move(expr_elem) : makeASTFunction("__descendingKey", std::move(expr_elem)); + return true; +} + bool ParserOrderByElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index 903111f32db..467813b7aa0 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -432,6 +432,14 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +/** Element of storage ORDER BY expression - same as expression element, but in addition, ASC[ENDING] | DESC[ENDING] could be specified + */ +class ParserStorageOrderByElement : public IParserBase +{ +protected: + const char * getName() const override { return "element of storage ORDER BY expression"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; /** Element of ORDER BY expression - same as expression element, but in addition, ASC[ENDING] | DESC[ENDING] could be specified * and optionally, NULLS LAST|FIRST diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 6d4a2e57d10..5396340bc36 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -325,6 +325,12 @@ bool ParserNotEmptyExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected return nested_parser.parse(pos, node, expected) && !node->children.empty(); } +bool ParserStorageOrderByExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + return ParserList(std::make_unique(), std::make_unique(TokenType::Comma), false) + .parse(pos, node, expected); +} + bool ParserOrderByExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { return ParserList(std::make_unique(), std::make_unique(TokenType::Comma), false) diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 6ab38416f32..0fc5875156d 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -249,6 +249,12 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +class ParserStorageOrderByExpressionList : public IParserBase +{ +protected: + const char * getName() const override { return "storage order by expression"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; class ParserOrderByExpressionList : public IParserBase { diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 76d399558be..b3b26baec6a 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -505,9 +505,13 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_sample_by(Keyword::SAMPLE_BY); ParserKeyword s_ttl(Keyword::TTL); ParserKeyword s_settings(Keyword::SETTINGS); + ParserToken s_lparen(TokenType::OpeningRoundBracket); + ParserToken s_rparen(TokenType::ClosingRoundBracket); ParserIdentifierWithOptionalParameters ident_with_optional_params_p; ParserExpression expression_p; + ParserStorageOrderByExpressionList order_list_p; + ParserStorageOrderByElement order_elem_p; ParserSetQuery settings_p(/* parse_only_internals_ = */ true); ParserTTLExpressionList parser_ttl_list; ParserStringLiteral string_literal_parser; @@ -556,11 +560,32 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!order_by && s_order_by.ignore(pos, expected)) { - if (expression_p.parse(pos, order_by, expected)) + auto old_pos = pos; + if (pos->type == TokenType::BareWord && std::string_view(pos->begin, pos->size()) == "tuple") + ++pos; + + if (s_lparen.ignore(pos, expected)) + { + auto tuple_function = std::make_shared(); + tuple_function->name = "tuple"; + if (order_list_p.parse(pos, order_by, expected)) + tuple_function->arguments = std::move(order_by); + else + tuple_function->arguments = std::make_shared(); + tuple_function->children.push_back(tuple_function->arguments); + order_by = std::move(tuple_function); + s_rparen.check(pos, expected); + storage_like = true; + continue; + } + + pos = old_pos; + if (order_elem_p.parse(pos, order_by, expected)) { storage_like = true; continue; } + return false; } diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index e64a88de62e..25564c55255 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -360,8 +360,8 @@ InputOrderInfoPtr buildInputOrderFromSortDescription( } /// This is a result direction we will read from MergeTree - /// 1 - in order, - /// -1 - in reverse order, + /// 1 - in same order of keys, + /// -1 - in reverse order of keys, /// 0 - usual read, don't apply optimization /// /// So far, 0 means any direction is possible. It is ok for constant prefix. @@ -372,6 +372,7 @@ InputOrderInfoPtr buildInputOrderFromSortDescription( while (next_description_column < description.size() && next_sort_key < sorting_key.column_names.size()) { const auto & sorting_key_column = sorting_key.column_names[next_sort_key]; + int reverse_indicator = sorting_key.reverse_flags[next_sort_key] ? -1 : 1; const auto & sort_column_description = description[next_description_column]; /// If required order depend on collation, it cannot be matched with primary key order. @@ -405,8 +406,7 @@ InputOrderInfoPtr buildInputOrderFromSortDescription( if (sort_column_description.column_name != sorting_key_column) break; - current_direction = sort_column_description.direction; - + current_direction = sort_column_description.direction * reverse_indicator; //std::cerr << "====== (no dag) Found direct match" << std::endl; @@ -433,7 +433,7 @@ InputOrderInfoPtr buildInputOrderFromSortDescription( /// 'SELECT x, y FROM table WHERE x = 42 ORDER BY x + 1, y + 1' /// Here, 'x + 1' would be a fixed point. But it is reasonable to read-in-order. - current_direction = sort_column_description.direction; + current_direction = sort_column_description.direction * reverse_indicator; if (match.monotonicity) { current_direction *= match.monotonicity->direction; diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 3186df6a6b3..9bc259dd292 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -221,10 +221,15 @@ static bool checkAllPartsOnRemoteFS(const RangesInDataParts & parts) /// build sort description for output stream static SortDescription getSortDescriptionForOutputHeader( - const Header & output_header, const Names & sorting_key_columns, const int sort_direction, InputOrderInfoPtr input_order_info, PrewhereInfoPtr prewhere_info, bool enable_vertical_final) + const Header & output_header, + const Names & sorting_key_columns, + const int sort_direction, + InputOrderInfoPtr input_order_info, + PrewhereInfoPtr prewhere_info, + bool enable_vertical_final) { /// Updating sort description can be done after PREWHERE actions are applied to the header. - /// Aftert PREWHERE actions are applied, column names in header can differ from storage column names due to aliases + /// After PREWHERE actions are applied, column names in header can differ from storage column names due to aliases /// To mitigate it, we're trying to build original header and use it to deduce sorting description /// TODO: this approach is fragile, it'd be more robust to update sorting description for the whole plan during plan optimization Block original_header = output_header.cloneEmpty(); @@ -1386,6 +1391,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( continue; Names sort_columns = storage_snapshot->metadata->getSortingKeyColumns(); + std::vector reverse_flags = storage_snapshot->metadata->getSortingKeyReverseFlags(); SortDescription sort_description; sort_description.compile_sort_description = settings[Setting::compile_sort_description]; sort_description.min_count_to_compile_sort_description = settings[Setting::min_count_to_compile_sort_description]; @@ -1396,7 +1402,12 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( Names partition_key_columns = storage_snapshot->metadata->getPartitionKey().column_names; for (size_t i = 0; i < sort_columns_size; ++i) - sort_description.emplace_back(sort_columns[i], 1, 1); + { + if (reverse_flags[i]) + sort_description.emplace_back(sort_columns[i], -1, 1); + else + sort_description.emplace_back(sort_columns[i], 1, 1); + } for (auto & pipe : pipes) addMergingFinal( diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index 7e43966556e..1b7549f83ca 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -27,6 +27,7 @@ KeyDescription::KeyDescription(const KeyDescription & other) , expression_list_ast(other.expression_list_ast ? other.expression_list_ast->clone() : nullptr) , sample_block(other.sample_block) , column_names(other.column_names) + , reverse_flags(other.reverse_flags) , data_types(other.data_types) , additional_column(other.additional_column) { @@ -57,6 +58,7 @@ KeyDescription & KeyDescription::operator=(const KeyDescription & other) sample_block = other.sample_block; column_names = other.column_names; + reverse_flags = other.reverse_flags; data_types = other.data_types; /// additional_column is constant property It should never be lost. @@ -132,11 +134,26 @@ KeyDescription KeyDescription::getSortingKeyFromAST( } const auto & children = result.expression_list_ast->children; + ASTPtr expr = std::make_shared(); for (const auto & child : children) + { + if (auto * func = child->as()) + { + if (func->name == "__descendingKey") + { + auto & key = func->arguments->children.front(); + result.column_names.emplace_back(key->getColumnName()); + result.reverse_flags.emplace_back(true); + expr->children.push_back(key->clone()); + continue; + } + } result.column_names.emplace_back(child->getColumnName()); + result.reverse_flags.emplace_back(false); + expr->children.push_back(child->clone()); + } { - auto expr = result.expression_list_ast->clone(); auto syntax_result = TreeRewriter(context).analyze(expr, columns.getAllPhysical()); /// In expression we also need to store source columns result.expression = ExpressionAnalyzer(expr, syntax_result, context).getActions(false); diff --git a/src/Storages/KeyDescription.h b/src/Storages/KeyDescription.h index 527a36124aa..4fc4b79b807 100644 --- a/src/Storages/KeyDescription.h +++ b/src/Storages/KeyDescription.h @@ -27,6 +27,9 @@ struct KeyDescription /// Column names in key definition, example: x, toStartOfMonth(date), a * b. Names column_names; + /// Indicator of key column being sorted reversely, example: x DESC, y -> {1, 0}. + std::vector reverse_flags; + /// Types from sample block ordered in columns order. DataTypes data_types; diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index e3ace824115..02365009bcd 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -1680,6 +1680,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() const /// Merge { Names sort_columns = global_ctx->metadata_snapshot->getSortingKeyColumns(); + std::vector reverse_flags = global_ctx->metadata_snapshot->getSortingKeyReverseFlags(); sort_description.compile_sort_description = global_ctx->data->getContext()->getSettingsRef()[Setting::compile_sort_description]; sort_description.min_count_to_compile_sort_description = global_ctx->data->getContext()->getSettingsRef()[Setting::min_count_to_compile_sort_description]; @@ -1689,7 +1690,12 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() const Names partition_key_columns = global_ctx->metadata_snapshot->getPartitionKey().column_names; for (size_t i = 0; i < sort_columns_size; ++i) - sort_description.emplace_back(sort_columns[i], 1, 1); + { + if (reverse_flags[i]) + sort_description.emplace_back(sort_columns[i], -1, 1); + else + sort_description.emplace_back(sort_columns[i], 1, 1); + } const bool is_vertical_merge = (global_ctx->chosen_merge_algorithm == MergeAlgorithm::Vertical); /// If merge is vertical we cannot calculate it diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 8611681a976..a13abb19cca 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -181,6 +181,7 @@ namespace Setting namespace MergeTreeSetting { + extern const MergeTreeSettingsBool allow_experimental_reverse_key; extern const MergeTreeSettingsBool allow_nullable_key; extern const MergeTreeSettingsBool allow_remote_fs_zero_copy_replication; extern const MergeTreeSettingsBool allow_suspicious_indices; @@ -450,6 +451,7 @@ MergeTreeData::MergeTreeData( bool sanity_checks = mode <= LoadingStrictnessLevel::CREATE; allow_nullable_key = !sanity_checks || (*settings)[MergeTreeSetting::allow_nullable_key]; + allow_reverse_key = !sanity_checks || (*settings)[MergeTreeSetting::allow_experimental_reverse_key]; /// Check sanity of MergeTreeSettings. Only when table is created. if (sanity_checks) @@ -651,12 +653,28 @@ void MergeTreeData::checkProperties( const StorageInMemoryMetadata & old_metadata, bool attach, bool allow_empty_sorting_key, + bool allow_reverse_sorting_key, bool allow_nullable_key_, ContextPtr local_context) const { if (!new_metadata.sorting_key.definition_ast && !allow_empty_sorting_key) throw Exception(ErrorCodes::BAD_ARGUMENTS, "ORDER BY cannot be empty"); + if (!allow_reverse_sorting_key) + { + size_t num_sorting_keys = new_metadata.sorting_key.column_names.size(); + for (size_t i = 0; i < num_sorting_keys; ++i) + { + if (new_metadata.sorting_key.reverse_flags[i]) + { + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Sorting key {} is reversed, but merge tree setting `allow_experimental_reverse_key` is disabled", + new_metadata.sorting_key.column_names[i]); + } + } + } + KeyDescription new_sorting_key = new_metadata.sorting_key; KeyDescription new_primary_key = new_metadata.primary_key; @@ -785,7 +803,14 @@ void MergeTreeData::checkProperties( /// We cannot alter a projection so far. So here we do not try to find a projection in old metadata. bool is_aggregate = projection.type == ProjectionDescription::Type::Aggregate; - checkProperties(*projection.metadata, *projection.metadata, attach, is_aggregate, true /* allow_nullable_key */, local_context); + checkProperties( + *projection.metadata, + *projection.metadata, + attach, + is_aggregate, + allow_reverse_key, + true /* allow_nullable_key */, + local_context); projections_names.insert(projection.name); } } @@ -805,7 +830,14 @@ void MergeTreeData::setProperties( bool attach, ContextPtr local_context) { - checkProperties(new_metadata, old_metadata, attach, false, allow_nullable_key, local_context); + checkProperties( + new_metadata, + old_metadata, + attach, + false, + allow_reverse_key, + allow_nullable_key, + local_context); setInMemoryMetadata(new_metadata); setVirtuals(createVirtuals(new_metadata)); } @@ -3632,7 +3664,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context } checkColumnFilenamesForCollision(new_metadata, /*throw_on_error=*/ true); - checkProperties(new_metadata, old_metadata, false, false, allow_nullable_key, local_context); + checkProperties(new_metadata, old_metadata, false, false, allow_reverse_key, allow_nullable_key, local_context); checkTTLExpressions(new_metadata, old_metadata); if (!columns_to_check_conversion.empty()) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 7a9730e8627..b7b65714af1 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -1330,6 +1330,7 @@ protected: const StorageInMemoryMetadata & old_metadata, bool attach, bool allow_empty_sorting_key, + bool allow_reverse_sorting_key, bool allow_nullable_key_, ContextPtr local_context) const; @@ -1710,7 +1711,8 @@ private: virtual void startBackgroundMovesIfNeeded() = 0; - bool allow_nullable_key{}; + bool allow_nullable_key = false; + bool allow_reverse_key = false; void addPartContributionToDataVolume(const DataPartPtr & part); void removePartContributionToDataVolume(const DataPartPtr & part); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index d7305045a56..4d847e768c6 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1083,14 +1083,21 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( const auto & primary_key = metadata_snapshot->getPrimaryKey(); auto index_columns = std::make_shared(); + std::vector reverse_flags; const auto & key_indices = key_condition.getKeyIndices(); DataTypes key_types; for (size_t i : key_indices) { if (i < index->size()) + { index_columns->emplace_back(index->at(i), primary_key.data_types[i], primary_key.column_names[i]); + reverse_flags.push_back(primary_key.reverse_flags[i]); + } else + { index_columns->emplace_back(); /// The column of the primary key was not loaded in memory - we'll skip it. + reverse_flags.push_back(false); + } key_types.emplace_back(primary_key.data_types[i]); } @@ -1138,28 +1145,32 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( { for (size_t i = 0; i < used_key_size; ++i) { + auto & left = reverse_flags[i] ? index_right[i] : index_left[i]; + auto & right = reverse_flags[i] ? index_left[i] : index_right[i]; if ((*index_columns)[i].column) - create_field_ref(range.begin, i, index_left[i]); + create_field_ref(range.begin, i, left); else - index_left[i] = NEGATIVE_INFINITY; + left = NEGATIVE_INFINITY; - index_right[i] = POSITIVE_INFINITY; + right = POSITIVE_INFINITY; } } else { for (size_t i = 0; i < used_key_size; ++i) { + auto & left = reverse_flags[i] ? index_right[i] : index_left[i]; + auto & right = reverse_flags[i] ? index_left[i] : index_right[i]; if ((*index_columns)[i].column) { - create_field_ref(range.begin, i, index_left[i]); - create_field_ref(range.end, i, index_right[i]); + create_field_ref(range.begin, i, left); + create_field_ref(range.end, i, right); } else { /// If the PK column was not loaded in memory - exclude it from the analysis. - index_left[i] = NEGATIVE_INFINITY; - index_right[i] = POSITIVE_INFINITY; + left = NEGATIVE_INFINITY; + right = POSITIVE_INFINITY; } } } diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 67fef759ed4..56e868836d8 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -517,12 +517,18 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( data.getSortingKeyAndSkipIndicesExpression(metadata_snapshot, indices)->execute(block); Names sort_columns = metadata_snapshot->getSortingKeyColumns(); + std::vector reverse_flags = metadata_snapshot->getSortingKeyReverseFlags(); SortDescription sort_description; size_t sort_columns_size = sort_columns.size(); sort_description.reserve(sort_columns_size); for (size_t i = 0; i < sort_columns_size; ++i) - sort_description.emplace_back(sort_columns[i], 1, 1); + { + if (reverse_flags[i]) + sort_description.emplace_back(sort_columns[i], -1, 1); + else + sort_description.emplace_back(sort_columns[i], 1, 1); + } ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterBlocks); @@ -785,12 +791,18 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( data.getSortingKeyAndSkipIndicesExpression(metadata_snapshot, {})->execute(block); Names sort_columns = metadata_snapshot->getSortingKeyColumns(); + std::vector reverse_flags = metadata_snapshot->getSortingKeyReverseFlags(); SortDescription sort_description; size_t sort_columns_size = sort_columns.size(); sort_description.reserve(sort_columns_size); for (size_t i = 0; i < sort_columns_size; ++i) - sort_description.emplace_back(sort_columns[i], 1, 1); + { + if (reverse_flags[i]) + sort_description.emplace_back(sort_columns[i], -1, 1); + else + sort_description.emplace_back(sort_columns[i], 1, 1); + } ProfileEvents::increment(ProfileEvents::MergeTreeDataProjectionWriterBlocks); diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index 8c6aafe48f2..930c3d5f586 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -194,6 +194,7 @@ namespace ErrorCodes DECLARE(String, storage_policy, "default", "Name of storage disk policy", 0) \ DECLARE(String, disk, "", "Name of storage disk. Can be specified instead of storage policy.", 0) \ DECLARE(Bool, allow_nullable_key, false, "Allow Nullable types as primary keys.", 0) \ + DECLARE(Bool, allow_experimental_reverse_key, false, "Allow descending sorting key in MergeTree tables (experimental feature).", 0) \ DECLARE(Bool, remove_empty_parts, true, "Remove empty parts after they were pruned by TTL, mutation, or collapsing merge algorithm.", 0) \ DECLARE(Bool, assign_part_uuids, false, "Generate UUIDs for parts. Before enabling check that all replicas support new format.", 0) \ DECLARE(Int64, max_partitions_to_read, -1, "Limit the max number of partitions that can be accessed in one query. <= 0 means unlimited. This setting is the default that can be overridden by the query-level setting with the same name.", 0) \ diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 4c74c8f56d1..d9cfdf68999 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -513,6 +513,13 @@ Names StorageInMemoryMetadata::getSortingKeyColumns() const return {}; } +std::vector StorageInMemoryMetadata::getSortingKeyReverseFlags() const +{ + if (hasSortingKey()) + return sorting_key.reverse_flags; + return {}; +} + const KeyDescription & StorageInMemoryMetadata::getSamplingKey() const { return sampling_key; diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 64ae499ec6e..d9ce3abcf49 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -222,6 +222,9 @@ struct StorageInMemoryMetadata /// Returns columns names in sorting key specified by user in ORDER BY /// expression. For example: 'a', 'x * y', 'toStartOfMonth(date)', etc. Names getSortingKeyColumns() const; + /// Returns reverse indicators of columns in sorting key specified by user in ORDER BY + /// expression. For example: ('a' DESC, 'x * y', 'toStartOfMonth(date)' DESC) -> {1, 0, 1}. + std::vector getSortingKeyReverseFlags() const; /// Returns column names that need to be read for FINAL to work. Names getColumnsRequiredForFinal() const { return getColumnsRequiredForSortingKey(); } diff --git a/tests/queries/0_stateless/03257_reverse_sorting_key.reference b/tests/queries/0_stateless/03257_reverse_sorting_key.reference new file mode 100644 index 00000000000..5371ee21f44 --- /dev/null +++ b/tests/queries/0_stateless/03257_reverse_sorting_key.reference @@ -0,0 +1,68 @@ +3 +8 +(Expression) +ExpressionTransform + (Limit) + Limit + (Sorting) + MergeSortingTransform + LimitsCheckingTransform + PartialSortingTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 +99 +98 +97 +96 +95 +(Expression) +ExpressionTransform + (Limit) + Limit + (Sorting) + (Expression) + ExpressionTransform + (ReadFromMergeTree) + ReverseTransform + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InReverseOrder) 0 → 1 +0 +1 +2 +3 +4 +3 1003 +6 +(Expression) +ExpressionTransform + (Limit) + Limit + (Sorting) + FinishSortingTransform + PartialSortingTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 +0 1090 +0 1080 +0 1070 +0 1060 +0 1050 +(Expression) +ExpressionTransform + (Limit) + Limit + (Sorting) + FinishSortingTransform + PartialSortingTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 +0 1000 +0 1010 +0 1020 +0 1030 +0 1040 diff --git a/tests/queries/0_stateless/03257_reverse_sorting_key.sql b/tests/queries/0_stateless/03257_reverse_sorting_key.sql new file mode 100644 index 00000000000..caef4ccccb0 --- /dev/null +++ b/tests/queries/0_stateless/03257_reverse_sorting_key.sql @@ -0,0 +1,39 @@ +drop table if exists x1; + +drop table if exists x2; + +create table x1 (i Nullable(int)) engine MergeTree order by i desc settings allow_nullable_key = 1, index_granularity = 2, allow_experimental_reverse_key = 1; + +insert into x1 select * from numbers(100); + +select * from x1 where i = 3; + +select count() from x1 where i between 3 and 10; + +explain pipeline select * from x1 order by i desc limit 5; + +select * from x1 order by i desc limit 5; + +explain pipeline select * from x1 order by i limit 5; + +select * from x1 order by i limit 5; + +create table x2 (i Nullable(int), j Nullable(int)) engine MergeTree order by (i, j desc) settings allow_nullable_key = 1, index_granularity = 2, allow_experimental_reverse_key = 1; + +insert into x2 select number % 10, number + 1000 from numbers(100); + +select * from x2 where j = 1003; + +select count() from x2 where i between 3 and 10 and j between 1003 and 1008; + +explain pipeline select * from x2 order by i, j desc limit 5; + +select * from x2 order by i, j desc limit 5; + +explain pipeline select * from x2 order by i, j limit 5; + +select * from x2 order by i, j limit 5; + +drop table x1; + +drop table x2; From a5c2c8ba4f0c0fb5792ebaacf13d1739b2f16733 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 28 Oct 2024 09:21:22 +0800 Subject: [PATCH 02/14] Fix parsing --- src/Parsers/ASTFunction.cpp | 2 +- src/Parsers/ParserCreateQuery.cpp | 22 ++++++++++++++-------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index 7c26e4dde30..ae4ba0cc336 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -284,7 +284,7 @@ static bool formatNamedArgWithHiddenValue(IAST * arg, const IAST::FormatSettings void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - /// Internal function for reversing MergeTree primary keys + /// Internal function for reversing MergeTree sorting keys if (name == "__descendingKey") { arguments->formatImpl(settings, state, frame); diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index b3b26baec6a..0b84cf6548c 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -560,7 +560,20 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!order_by && s_order_by.ignore(pos, expected)) { - auto old_pos = pos; + if (expression_p.parse(pos, order_by, expected)) + { + storage_like = true; + continue; + } + + /// Check possible ASC|DESC suffix for single key + if (order_elem_p.parse(pos, order_by, expected)) + { + storage_like = true; + continue; + } + + /// Check possible ASC|DESC suffix for a list of keys if (pos->type == TokenType::BareWord && std::string_view(pos->begin, pos->size()) == "tuple") ++pos; @@ -579,13 +592,6 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) continue; } - pos = old_pos; - if (order_elem_p.parse(pos, order_by, expected)) - { - storage_like = true; - continue; - } - return false; } From e83fd1eb576e9ed7b4b402231d77ff013527407d Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 28 Oct 2024 09:23:31 +0800 Subject: [PATCH 03/14] Add doc --- .../settings/merge-tree-settings.md | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 45c4cdf9458..aa0fd4b4bb1 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -1081,7 +1081,7 @@ Default value: 0 bytes. Note that if both `min_free_disk_bytes_to_perform_insert` and `min_free_disk_ratio_to_perform_insert` are specified, ClickHouse will count on the value that will allow to perform inserts on a bigger amount of free memory. -## min_free_disk_ratio_to_perform_insert +## min_free_disk_ratio_to_perform_insert The minimum free to total disk space ratio to perform an `INSERT`. Must be a floating point value between 0 and 1. Note that this setting: - takes into account the `keep_free_space_bytes` setting. @@ -1095,3 +1095,28 @@ Possible values: Default value: 0.0 Note that if both `min_free_disk_ratio_to_perform_insert` and `min_free_disk_bytes_to_perform_insert` are specified, ClickHouse will count on the value that will allow to perform inserts on a bigger amount of free memory. + +## allow_experimental_reverse_key + +Enables support for descending sort order in MergeTree sorting keys. This setting is particularly useful for time series analysis and Top-N queries, allowing data to be stored in reverse chronological order to optimize query performance. + +With `allow_experimental_reverse_key` enabled, you can define descending sort orders within the `ORDER BY` clause of a MergeTree table. This enables the use of more efficient `ReadInOrder` optimizations instead of `ReadInReverseOrder` for descending queries. + +**Example** + +```sql +CREATE TABLE example +( + time DateTime, + key Int32, + value String +) ENGINE = MergeTree +ORDER BY (time DESC, key) -- Descending order on 'time' field +SETTINGS allow_experimental_reverse_key = 1; + +SELECT * FROM example WHERE key = 'xxx' ORDER BY time DESC LIMIT 10; +``` + +By using `ORDER BY time DESC` in the query, `ReadInOrder` is applied. + +**Default Value:** false From f2046602083db07ecf4b35685937470202017ab6 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 28 Oct 2024 09:44:11 +0800 Subject: [PATCH 04/14] Also normalize for min_max projection --- src/Storages/ProjectionsDescription.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 9654b4ef37a..cf9ff121966 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -205,8 +205,15 @@ ProjectionDescription ProjectionDescription::getMinMaxCountProjection( } if (!primary_key_asts.empty()) { - select_expression_list->children.push_back(makeASTFunction("min", primary_key_asts.front()->clone())); - select_expression_list->children.push_back(makeASTFunction("max", primary_key_asts.front()->clone())); + ASTPtr first_key = primary_key_asts.front(); + if (auto * func = first_key->as()) + { + if (func->name == "__descendingKey") + first_key = func->arguments->children.front(); + } + + select_expression_list->children.push_back(makeASTFunction("min", first_key->clone())); + select_expression_list->children.push_back(makeASTFunction("max", first_key->clone())); } select_expression_list->children.push_back(makeASTFunction("count")); select_query->setExpression(ASTProjectionSelectQuery::Expression::SELECT, std::move(select_expression_list)); From 58fc2987ca973a8727b7faf9e1ddde6a766f5a58 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 28 Oct 2024 10:07:12 +0800 Subject: [PATCH 05/14] Different implementation --- src/Interpreters/ActionsVisitor.cpp | 7 +++++++ src/Interpreters/ExpressionAnalyzer.cpp | 8 +++++++- src/Parsers/ParserCreateQuery.cpp | 6 ------ src/Planner/PlannerActionsVisitor.cpp | 5 +++++ src/Storages/KeyDescription.cpp | 7 ++----- 5 files changed, 21 insertions(+), 12 deletions(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 65c3fe8cfcf..f91d2a6e7d8 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -882,6 +882,13 @@ void checkFunctionHasEmptyNullsAction(const ASTFunction & node) void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data) { + /// Special function __descendingKey + if (node.name == "__descendingKey") + { + visit(node.arguments->children.front(), data); + return; + } + auto column_name = ast->getColumnName(); if (data.hasColumn(column_name)) return; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 4e5cf7d2549..8b2f24e7579 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1788,8 +1788,14 @@ ActionsDAG ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool remove_unuse else asts = ASTs(1, query); - for (const auto & ast : asts) + for (auto ast : asts) { + if (auto * func = ast->as()) + { + if (func->name == "__descendingKey") + ast = func->arguments->children.front(); + } + std::string name = ast->getColumnName(); std::string alias; if (add_aliases) diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 0b84cf6548c..a434f6631dd 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -560,12 +560,6 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!order_by && s_order_by.ignore(pos, expected)) { - if (expression_p.parse(pos, order_by, expected)) - { - storage_like = true; - continue; - } - /// Check possible ASC|DESC suffix for single key if (order_elem_p.parse(pos, order_by, expected)) { diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index aea304e0ecc..008532c0c77 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -923,6 +923,11 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitFunction(const QueryTreeNodePtr & node) { const auto & function_node = node->as(); + + /// Special function __descendingKey + if (function_node.getFunctionName() == "__descendingKey") + return visitFunction(function_node.getChildren().front()); + if (function_node.getFunctionName() == "indexHint") return visitIndexHintFunction(node); diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index 1b7549f83ca..aa9ee7313a4 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -134,26 +134,23 @@ KeyDescription KeyDescription::getSortingKeyFromAST( } const auto & children = result.expression_list_ast->children; - ASTPtr expr = std::make_shared(); for (const auto & child : children) { if (auto * func = child->as()) { if (func->name == "__descendingKey") { - auto & key = func->arguments->children.front(); - result.column_names.emplace_back(key->getColumnName()); + result.column_names.emplace_back(func->arguments->children.front()->getColumnName()); result.reverse_flags.emplace_back(true); - expr->children.push_back(key->clone()); continue; } } result.column_names.emplace_back(child->getColumnName()); result.reverse_flags.emplace_back(false); - expr->children.push_back(child->clone()); } { + auto expr = result.expression_list_ast->clone(); auto syntax_result = TreeRewriter(context).analyze(expr, columns.getAllPhysical()); /// In expression we also need to store source columns result.expression = ExpressionAnalyzer(expr, syntax_result, context).getActions(false); From fb8300e1510d2f9f9f8b9619d080ba355b7e7ba8 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 28 Oct 2024 16:00:34 +0800 Subject: [PATCH 06/14] Fix test --- tests/queries/0_stateless/03257_reverse_sorting_key.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/03257_reverse_sorting_key.sql b/tests/queries/0_stateless/03257_reverse_sorting_key.sql index caef4ccccb0..ab177197446 100644 --- a/tests/queries/0_stateless/03257_reverse_sorting_key.sql +++ b/tests/queries/0_stateless/03257_reverse_sorting_key.sql @@ -1,3 +1,5 @@ +set optimize_read_in_order = 1; + drop table if exists x1; drop table if exists x2; From b9a2f683ba051ab0ad46bde9979e0aa30961e5ba Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 29 Oct 2024 10:38:59 +0800 Subject: [PATCH 07/14] Reimplemented with more tests --- src/Interpreters/ActionsVisitor.cpp | 7 -- src/Interpreters/ExpressionAnalyzer.cpp | 8 +-- src/Parsers/ParserCreateQuery.cpp | 66 +++++++++++-------- src/Parsers/ParserCreateQuery.h | 6 ++ src/Parsers/ParserProjectionSelectQuery.cpp | 4 +- src/Planner/PlannerActionsVisitor.cpp | 4 -- src/Storages/KeyDescription.cpp | 24 +++++-- src/Storages/KeyDescription.h | 5 +- .../ReplicatedMergeTreeTableMetadata.cpp | 12 ++-- src/Storages/ProjectionsDescription.cpp | 11 +--- src/Storages/StorageKeeperMap.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 4 +- ...57_reverse_sorting_key_zookeeper.reference | 2 + .../03257_reverse_sorting_key_zookeeper.sql | 16 +++++ 14 files changed, 99 insertions(+), 72 deletions(-) create mode 100644 tests/queries/0_stateless/03257_reverse_sorting_key_zookeeper.reference create mode 100644 tests/queries/0_stateless/03257_reverse_sorting_key_zookeeper.sql diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index f91d2a6e7d8..65c3fe8cfcf 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -882,13 +882,6 @@ void checkFunctionHasEmptyNullsAction(const ASTFunction & node) void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data) { - /// Special function __descendingKey - if (node.name == "__descendingKey") - { - visit(node.arguments->children.front(), data); - return; - } - auto column_name = ast->getColumnName(); if (data.hasColumn(column_name)) return; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 8b2f24e7579..4e5cf7d2549 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1788,14 +1788,8 @@ ActionsDAG ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool remove_unuse else asts = ASTs(1, query); - for (auto ast : asts) + for (const auto & ast : asts) { - if (auto * func = ast->as()) - { - if (func->name == "__descendingKey") - ast = func->arguments->children.front(); - } - std::string name = ast->getColumnName(); std::string alias; if (add_aliases) diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index a434f6631dd..56f661eb46e 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -494,6 +494,44 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E return true; } +bool ParserStorageOrderByClause::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserStorageOrderByExpressionList order_list_p; + ParserStorageOrderByElement order_elem_p; + ParserToken s_lparen(TokenType::OpeningRoundBracket); + ParserToken s_rparen(TokenType::ClosingRoundBracket); + + ASTPtr order_by; + + /// Check possible ASC|DESC suffix for single key + if (order_elem_p.parse(pos, order_by, expected)) + { + node = order_by; + return true; + } + + /// Check possible ASC|DESC suffix for a list of keys + if (pos->type == TokenType::BareWord && std::string_view(pos->begin, pos->size()) == "tuple") + ++pos; + + if (s_lparen.ignore(pos, expected)) + { + auto tuple_function = std::make_shared(); + tuple_function->name = "tuple"; + if (order_list_p.parse(pos, order_by, expected)) + tuple_function->arguments = std::move(order_by); + else + tuple_function->arguments = std::make_shared(); + tuple_function->children.push_back(tuple_function->arguments); + order_by = std::move(tuple_function); + s_rparen.check(pos, expected); + + node = order_by; + return true; + } + + return false; +} bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { @@ -505,13 +543,10 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_sample_by(Keyword::SAMPLE_BY); ParserKeyword s_ttl(Keyword::TTL); ParserKeyword s_settings(Keyword::SETTINGS); - ParserToken s_lparen(TokenType::OpeningRoundBracket); - ParserToken s_rparen(TokenType::ClosingRoundBracket); ParserIdentifierWithOptionalParameters ident_with_optional_params_p; ParserExpression expression_p; - ParserStorageOrderByExpressionList order_list_p; - ParserStorageOrderByElement order_elem_p; + ParserStorageOrderByClause order_by_p; ParserSetQuery settings_p(/* parse_only_internals_ = */ true); ParserTTLExpressionList parser_ttl_list; ParserStringLiteral string_literal_parser; @@ -560,32 +595,11 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!order_by && s_order_by.ignore(pos, expected)) { - /// Check possible ASC|DESC suffix for single key - if (order_elem_p.parse(pos, order_by, expected)) + if (order_by_p.parse(pos, order_by, expected)) { storage_like = true; continue; } - - /// Check possible ASC|DESC suffix for a list of keys - if (pos->type == TokenType::BareWord && std::string_view(pos->begin, pos->size()) == "tuple") - ++pos; - - if (s_lparen.ignore(pos, expected)) - { - auto tuple_function = std::make_shared(); - tuple_function->name = "tuple"; - if (order_list_p.parse(pos, order_by, expected)) - tuple_function->arguments = std::move(order_by); - else - tuple_function->arguments = std::make_shared(); - tuple_function->children.push_back(tuple_function->arguments); - order_by = std::move(tuple_function); - s_rparen.check(pos, expected); - storage_like = true; - continue; - } - return false; } diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 82da2e7ea0b..76e20ca107b 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -88,6 +88,12 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +class ParserStorageOrderByClause : public IParserBase +{ +protected: + const char * getName() const override { return "storage order by clause"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; template class IParserColumnDeclaration : public IParserBase diff --git a/src/Parsers/ParserProjectionSelectQuery.cpp b/src/Parsers/ParserProjectionSelectQuery.cpp index 9ddd536f847..cd29fcfddb3 100644 --- a/src/Parsers/ParserProjectionSelectQuery.cpp +++ b/src/Parsers/ParserProjectionSelectQuery.cpp @@ -22,7 +22,7 @@ bool ParserProjectionSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ParserNotEmptyExpressionList exp_list_for_with_clause(false); ParserNotEmptyExpressionList exp_list_for_select_clause(true); /// Allows aliases without AS keyword. - ParserExpression order_expression_p; + ParserStorageOrderByExpressionList order_list_p; ASTPtr with_expression_list; ASTPtr select_expression_list; @@ -59,7 +59,7 @@ bool ParserProjectionSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & if (s_order_by.ignore(pos, expected)) { ASTPtr expr_list; - if (!ParserList(std::make_unique(), std::make_unique(TokenType::Comma)).parse(pos, expr_list, expected)) + if (!order_list_p.parse(pos, expr_list, expected)) return false; if (expr_list->children.size() == 1) diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 008532c0c77..09e5db2ce88 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -924,10 +924,6 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi { const auto & function_node = node->as(); - /// Special function __descendingKey - if (function_node.getFunctionName() == "__descendingKey") - return visitFunction(function_node.getChildren().front()); - if (function_node.getFunctionName() == "indexHint") return visitIndexHintFunction(node); diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index aa9ee7313a4..ca00ad8c936 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include @@ -24,6 +24,7 @@ namespace ErrorCodes KeyDescription::KeyDescription(const KeyDescription & other) : definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr) + , original_expression_list_ast(other.original_expression_list_ast ? other.original_expression_list_ast->clone() : nullptr) , expression_list_ast(other.expression_list_ast ? other.expression_list_ast->clone() : nullptr) , sample_block(other.sample_block) , column_names(other.column_names) @@ -45,12 +46,16 @@ KeyDescription & KeyDescription::operator=(const KeyDescription & other) else definition_ast.reset(); + if (other.original_expression_list_ast) + original_expression_list_ast = other.original_expression_list_ast->clone(); + else + original_expression_list_ast.reset(); + if (other.expression_list_ast) expression_list_ast = other.expression_list_ast->clone(); else expression_list_ast.reset(); - if (other.expression) expression = other.expression->clone(); else @@ -124,29 +129,33 @@ KeyDescription KeyDescription::getSortingKeyFromAST( { KeyDescription result; result.definition_ast = definition_ast; - result.expression_list_ast = extractKeyExpressionList(definition_ast); + result.original_expression_list_ast = extractKeyExpressionList(definition_ast); if (additional_column) { result.additional_column = additional_column; ASTPtr column_identifier = std::make_shared(*additional_column); - result.expression_list_ast->children.push_back(column_identifier); + result.original_expression_list_ast->children.push_back(column_identifier); } - const auto & children = result.expression_list_ast->children; + result.expression_list_ast = std::make_shared(); + const auto & children = result.original_expression_list_ast->children; for (const auto & child : children) { if (auto * func = child->as()) { if (func->name == "__descendingKey") { - result.column_names.emplace_back(func->arguments->children.front()->getColumnName()); + auto & real_key = func->arguments->children.front(); + result.column_names.emplace_back(real_key->getColumnName()); result.reverse_flags.emplace_back(true); + result.expression_list_ast->children.push_back(real_key->clone()); continue; } } result.column_names.emplace_back(child->getColumnName()); result.reverse_flags.emplace_back(false); + result.expression_list_ast->children.push_back(child->clone()); } { @@ -173,6 +182,7 @@ KeyDescription KeyDescription::getSortingKeyFromAST( KeyDescription KeyDescription::buildEmptyKey() { KeyDescription result; + result.original_expression_list_ast = std::make_shared(); result.expression_list_ast = std::make_shared(); result.expression = std::make_shared(ActionsDAG(), ExpressionActionsSettings{}); return result; @@ -184,7 +194,7 @@ KeyDescription KeyDescription::parse(const String & str, const ColumnsDescriptio if (str.empty()) return result; - ParserExpression parser; + ParserStorageOrderByClause parser; ASTPtr ast = parseQuery(parser, "(" + str + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); FunctionNameNormalizer::visit(ast.get()); diff --git a/src/Storages/KeyDescription.h b/src/Storages/KeyDescription.h index 4fc4b79b807..d3f8b3fad2b 100644 --- a/src/Storages/KeyDescription.h +++ b/src/Storages/KeyDescription.h @@ -14,7 +14,10 @@ struct KeyDescription /// primary key in merge tree can be part of sorting key) ASTPtr definition_ast; - /// ASTExpressionList with key fields, example: (x, toStartOfMonth(date))). + /// Original user defined ASTExpressionList with key fields, example: (x DESC, toStartOfMonth(date))). + ASTPtr original_expression_list_ast; + + /// Same as above but without special function __descendingKey, a.k.a, ASC|DESC suffix. ASTPtr expression_list_ast; /// Expression from expression_list_ast created by ExpressionAnalyzer. Useful, diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 10185115da4..9e44a9479d9 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -78,14 +78,14 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr /// - When we have only ORDER BY, than store it in "primary key:" row of /metadata /// - When we have both, than store PRIMARY KEY in "primary key:" row and ORDER BY in "sorting key:" row of /metadata - primary_key = formattedASTNormalized(metadata_snapshot->getPrimaryKey().expression_list_ast); + primary_key = formattedASTNormalized(metadata_snapshot->getPrimaryKey().original_expression_list_ast); if (metadata_snapshot->isPrimaryKeyDefined()) { - /// We don't use preparsed AST `sorting_key.expression_list_ast` because + /// We don't use preparsed AST `sorting_key.original_expression_list_ast` because /// it contain version column for VersionedCollapsingMergeTree, which /// is not stored in ZooKeeper for compatibility reasons. So the best /// compatible way is just to convert definition_ast to list and - /// serialize it. In all other places key.expression_list_ast should be + /// serialize it. In all other places key.original_expression_list_ast should be /// used. sorting_key = formattedASTNormalized(extractKeyExpressionList(metadata_snapshot->getSortingKey().definition_ast)); } @@ -93,7 +93,7 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr data_format_version = data.format_version; if (data.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) - partition_key = formattedASTNormalized(metadata_snapshot->getPartitionKey().expression_list_ast); + partition_key = formattedASTNormalized(metadata_snapshot->getPartitionKey().original_expression_list_ast); ttl_table = formattedASTNormalized(metadata_snapshot->getTableTTLs().definition_ast); @@ -301,7 +301,7 @@ void ReplicatedMergeTreeTableMetadata::checkImmutableFieldsEquals(const Replicat /// NOTE: You can make a less strict check of match expressions so that tables do not break from small changes /// in formatAST code. - String parsed_zk_primary_key = formattedAST(KeyDescription::parse(from_zk.primary_key, columns, context).expression_list_ast); + String parsed_zk_primary_key = formattedAST(KeyDescription::parse(from_zk.primary_key, columns, context).original_expression_list_ast); if (primary_key != parsed_zk_primary_key) throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in primary key. " "Stored in ZooKeeper: {}, parsed from ZooKeeper: {}, local: {}", @@ -313,7 +313,7 @@ void ReplicatedMergeTreeTableMetadata::checkImmutableFieldsEquals(const Replicat "Stored in ZooKeeper: {}, local: {}", DB::toString(from_zk.data_format_version.toUnderType()), DB::toString(data_format_version.toUnderType())); - String parsed_zk_partition_key = formattedAST(KeyDescription::parse(from_zk.partition_key, columns, context).expression_list_ast); + String parsed_zk_partition_key = formattedAST(KeyDescription::parse(from_zk.partition_key, columns, context).original_expression_list_ast); if (partition_key != parsed_zk_partition_key) throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in partition key expression. " diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index cf9ff121966..9654b4ef37a 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -205,15 +205,8 @@ ProjectionDescription ProjectionDescription::getMinMaxCountProjection( } if (!primary_key_asts.empty()) { - ASTPtr first_key = primary_key_asts.front(); - if (auto * func = first_key->as()) - { - if (func->name == "__descendingKey") - first_key = func->arguments->children.front(); - } - - select_expression_list->children.push_back(makeASTFunction("min", first_key->clone())); - select_expression_list->children.push_back(makeASTFunction("max", first_key->clone())); + select_expression_list->children.push_back(makeASTFunction("min", primary_key_asts.front()->clone())); + select_expression_list->children.push_back(makeASTFunction("max", primary_key_asts.front()->clone())); } select_expression_list->children.push_back(makeASTFunction("count")); select_query->setExpression(ASTProjectionSelectQuery::Expression::SELECT, std::move(select_expression_list)); diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 316eced1ed6..de950a939ea 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -375,7 +375,7 @@ StorageKeeperMap::StorageKeeperMap( WriteBufferFromOwnString out; out << "KeeperMap metadata format version: 1\n" << "columns: " << metadata.columns.toString() - << "primary key: " << formattedAST(metadata.getPrimaryKey().expression_list_ast) << "\n"; + << "primary key: " << formattedAST(metadata.getPrimaryKey().original_expression_list_ast) << "\n"; metadata_string = out.str(); if (zk_root_path.empty()) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 850623157a1..406e01aebb3 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -6254,9 +6254,9 @@ void StorageReplicatedMergeTree::alter( if (ast_to_str(future_metadata.sorting_key.definition_ast) != ast_to_str(current_metadata->sorting_key.definition_ast)) { /// We serialize definition_ast as list, because code which apply ALTER (setTableStructure) expect serialized non empty expression - /// list here and we cannot change this representation for compatibility. Also we have preparsed AST `sorting_key.expression_list_ast` + /// list here and we cannot change this representation for compatibility. Also we have preparsed AST `sorting_key.original_expression_list_ast` /// in KeyDescription, but it contain version column for VersionedCollapsingMergeTree, which shouldn't be defined as a part of key definition AST. - /// So the best compatible way is just to convert definition_ast to list and serialize it. In all other places key.expression_list_ast should be used. + /// So the best compatible way is just to convert definition_ast to list and serialize it. In all other places key.original_expression_list_ast should be used. future_metadata_in_zk.sorting_key = serializeAST(*extractKeyExpressionList(future_metadata.sorting_key.definition_ast)); } diff --git a/tests/queries/0_stateless/03257_reverse_sorting_key_zookeeper.reference b/tests/queries/0_stateless/03257_reverse_sorting_key_zookeeper.reference new file mode 100644 index 00000000000..2f834350bf3 --- /dev/null +++ b/tests/queries/0_stateless/03257_reverse_sorting_key_zookeeper.reference @@ -0,0 +1,2 @@ +metadata format version: 1\ndate column: \nsampling expression: \nindex granularity: 2\nmode: 0\nsign column: \nprimary key: i DESC\ndata format version: 1\npartition key: \ngranularity bytes: 10000\nmerge parameters format version: 2\n +metadata format version: 1\ndate column: \nsampling expression: \nindex granularity: 2\nmode: 0\nsign column: \nprimary key: i, j DESC\ndata format version: 1\npartition key: \ngranularity bytes: 10000\nmerge parameters format version: 2\n diff --git a/tests/queries/0_stateless/03257_reverse_sorting_key_zookeeper.sql b/tests/queries/0_stateless/03257_reverse_sorting_key_zookeeper.sql new file mode 100644 index 00000000000..aa7a0325f25 --- /dev/null +++ b/tests/queries/0_stateless/03257_reverse_sorting_key_zookeeper.sql @@ -0,0 +1,16 @@ +-- Tags: zookeeper + +drop table if exists x1; +drop table if exists x2; + +create table x1 (i Nullable(int)) engine ReplicatedMergeTree('/clickhouse/tables/{database}/x1', 'r1') order by i desc settings allow_nullable_key = 1, index_granularity = 2, index_granularity_bytes = 10000, allow_experimental_reverse_key = 1; + +create table x2 (i Nullable(int), j Nullable(int)) engine ReplicatedMergeTree('/clickhouse/tables/{database}/x2', 'r1') order by (i, j desc) settings allow_nullable_key = 1, index_granularity = 2, index_granularity_bytes = 10000, allow_experimental_reverse_key = 1; + +set allow_unrestricted_reads_from_keeper = 'true'; + +select value from system.zookeeper where path = '/clickhouse/tables/' || currentDatabase() || '/x1' and name = 'metadata'; +select value from system.zookeeper where path = '/clickhouse/tables/' || currentDatabase() || '/x2' and name = 'metadata'; + +drop table x1; +drop table x2; From c801184c4fa72cc0d9c83b07d8d442de8ae92778 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 29 Oct 2024 10:40:43 +0800 Subject: [PATCH 08/14] Stabilize test --- tests/queries/0_stateless/03257_reverse_sorting_key.sql | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/03257_reverse_sorting_key.sql b/tests/queries/0_stateless/03257_reverse_sorting_key.sql index ab177197446..e3fabbb9620 100644 --- a/tests/queries/0_stateless/03257_reverse_sorting_key.sql +++ b/tests/queries/0_stateless/03257_reverse_sorting_key.sql @@ -8,6 +8,8 @@ create table x1 (i Nullable(int)) engine MergeTree order by i desc settings allo insert into x1 select * from numbers(100); +optimize table x1 final; + select * from x1 where i = 3; select count() from x1 where i between 3 and 10; @@ -24,6 +26,8 @@ create table x2 (i Nullable(int), j Nullable(int)) engine MergeTree order by (i, insert into x2 select number % 10, number + 1000 from numbers(100); +optimize table x2 final; + select * from x2 where j = 1003; select count() from x2 where i between 3 and 10 and j between 1003 and 1008; From a2ce825491130621e72a2df1b689cee06c053467 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 29 Oct 2024 14:55:15 +0800 Subject: [PATCH 09/14] Add another setting to fix test --- tests/queries/0_stateless/03257_reverse_sorting_key.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/03257_reverse_sorting_key.sql b/tests/queries/0_stateless/03257_reverse_sorting_key.sql index e3fabbb9620..454dace2476 100644 --- a/tests/queries/0_stateless/03257_reverse_sorting_key.sql +++ b/tests/queries/0_stateless/03257_reverse_sorting_key.sql @@ -1,4 +1,5 @@ set optimize_read_in_order = 1; +set read_in_order_two_level_merge_threshold=100; drop table if exists x1; From f8437fcdac27c95c92d1a0331dffc209fcd21e99 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 31 Oct 2024 17:19:31 +0000 Subject: [PATCH 10/14] Try to add ASTStorageOrderByElement. --- src/Interpreters/MutationsInterpreter.cpp | 2 +- src/Parsers/ASTFunction.cpp | 8 -- src/Parsers/ASTOrderByElement.cpp | 14 ++++ src/Parsers/ASTOrderByElement.h | 19 +++++ src/Parsers/ExpressionElementParsers.cpp | 12 ++- src/Parsers/ExpressionElementParsers.h | 5 ++ src/Parsers/ExpressionListParsers.cpp | 2 +- src/Parsers/ExpressionListParsers.h | 5 ++ src/Parsers/ParserCreateQuery.cpp | 40 +++++----- src/Parsers/ParserCreateQuery.h | 5 ++ src/Parsers/ParserProjectionSelectQuery.cpp | 2 +- .../Optimizations/optimizeReadInOrder.cpp | 2 +- .../QueryPlan/ReadFromMergeTree.cpp | 2 +- .../QueryPlan/ReadFromSystemNumbersStep.cpp | 2 +- src/Storages/KeyDescription.cpp | 76 +++++++++++-------- src/Storages/KeyDescription.h | 9 +-- src/Storages/MergeTree/MergeTask.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +- .../MergeTree/MergeTreeDataWriter.cpp | 4 +- .../ReplicatedMergeTreeTableMetadata.cpp | 23 +++--- src/Storages/StorageKeeperMap.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 4 +- src/Storages/TTLDescription.cpp | 2 +- .../0_stateless/03257_reverse_sorting_key.sql | 8 +- 25 files changed, 163 insertions(+), 91 deletions(-) diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 2f8bb8c16ac..50b5143f6b2 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -1445,7 +1445,7 @@ std::optional MutationsInterpreter::getStorageSortDescriptionIf { if (header.has(sort_columns[i])) { - if (reverse_flags[i]) + if (!reverse_flags.empty() && reverse_flags[i]) sort_description.emplace_back(sort_columns[i], -1, 1); else sort_description.emplace_back(sort_columns[i], 1, 1); diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index ae4ba0cc336..53d44e2f325 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -284,14 +284,6 @@ static bool formatNamedArgWithHiddenValue(IAST * arg, const IAST::FormatSettings void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - /// Internal function for reversing MergeTree sorting keys - if (name == "__descendingKey") - { - arguments->formatImpl(settings, state, frame); - settings.ostr << (settings.hilite ? hilite_keyword : "") << " DESC" << (settings.hilite ? hilite_none : ""); - return; - } - frame.expression_list_prepend_whitespace = false; if (kind == Kind::CODEC || kind == Kind::STATISTICS || kind == Kind::BACKUP_NAME) frame.allow_operators = false; diff --git a/src/Parsers/ASTOrderByElement.cpp b/src/Parsers/ASTOrderByElement.cpp index 09193a8b5e1..06b1a58a930 100644 --- a/src/Parsers/ASTOrderByElement.cpp +++ b/src/Parsers/ASTOrderByElement.cpp @@ -57,4 +57,18 @@ void ASTOrderByElement::formatImpl(const FormatSettings & settings, FormatState } } +void ASTStorageOrderByElement::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const +{ + hash_state.update(direction); + IAST::updateTreeHashImpl(hash_state, ignore_aliases); +} + +void ASTStorageOrderByElement::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + children.front()->formatImpl(settings, state, frame); + + if (direction == -1) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " DESC" << (settings.hilite ? hilite_none : ""); +} + } diff --git a/src/Parsers/ASTOrderByElement.h b/src/Parsers/ASTOrderByElement.h index 6edf84d7bde..836c896196c 100644 --- a/src/Parsers/ASTOrderByElement.h +++ b/src/Parsers/ASTOrderByElement.h @@ -82,4 +82,23 @@ private: std::unordered_map positions; }; +class ASTStorageOrderByElement : public IAST +{ +public: + int direction = 1; /// 1 for ASC, -1 for DESC + + ASTPtr clone() const override + { + auto clone = std::make_shared(*this); + clone->cloneChildren(); + return clone; + } + + String getID(char) const override { return "StorageOrderByElement"; } + void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override; + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; +}; + } diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 7fef956179e..f5c7e76129b 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -2174,6 +2174,12 @@ bool ParserStorageOrderByElement::parseImpl(Pos & pos, ASTPtr & node, Expected & if (!elem_p.parse(pos, expr_elem, expected)) return false; + if (!allow_order) + { + node = std::move(expr_elem); + return true; + } + int direction = 1; if (descending.ignore(pos, expected) || desc.ignore(pos, expected)) @@ -2181,7 +2187,11 @@ bool ParserStorageOrderByElement::parseImpl(Pos & pos, ASTPtr & node, Expected & else ascending.ignore(pos, expected) || asc.ignore(pos, expected); - node = direction == 1 ? std::move(expr_elem) : makeASTFunction("__descendingKey", std::move(expr_elem)); + auto storage_elem = std::make_shared(); + storage_elem->children.push_back(std::move(expr_elem)); + storage_elem->direction = direction; + + node = std::move(storage_elem); return true; } diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index 467813b7aa0..a8e49f7e560 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -436,7 +436,12 @@ protected: */ class ParserStorageOrderByElement : public IParserBase { +public: + explicit ParserStorageOrderByElement(bool allow_order_) : allow_order(allow_order_) {} + protected: + bool allow_order; + const char * getName() const override { return "element of storage ORDER BY expression"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 5396340bc36..ac2381c3483 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -327,7 +327,7 @@ bool ParserNotEmptyExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected bool ParserStorageOrderByExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - return ParserList(std::make_unique(), std::make_unique(TokenType::Comma), false) + return ParserList(std::make_unique(allow_order), std::make_unique(TokenType::Comma), false) .parse(pos, node, expected); } diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 0fc5875156d..8e2304f3e47 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -251,7 +251,12 @@ protected: class ParserStorageOrderByExpressionList : public IParserBase { +public: + explicit ParserStorageOrderByExpressionList(bool allow_order_) : allow_order(allow_order_) {} + protected: + bool allow_order; + const char * getName() const override { return "storage order by expression"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 56f661eb46e..5a852fc4017 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -26,6 +26,7 @@ #include #include #include +#include namespace DB @@ -496,8 +497,8 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E bool ParserStorageOrderByClause::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserStorageOrderByExpressionList order_list_p; - ParserStorageOrderByElement order_elem_p; + ParserStorageOrderByExpressionList order_list_p(allow_order); + ParserStorageOrderByElement order_elem_p(allow_order); ParserToken s_lparen(TokenType::OpeningRoundBracket); ParserToken s_rparen(TokenType::ClosingRoundBracket); @@ -506,6 +507,11 @@ bool ParserStorageOrderByClause::parseImpl(Pos & pos, ASTPtr & node, Expected & /// Check possible ASC|DESC suffix for single key if (order_elem_p.parse(pos, order_by, expected)) { + /// This is needed because 'order by (x, y)' is parsed as tuple. + /// We can remove ASTStorageOrderByElement if no ASC|DESC suffix was specified. + if (const auto * elem = order_by->as(); elem && elem->direction > 0) + order_by = elem->children.front(); + node = order_by; return true; } @@ -514,23 +520,21 @@ bool ParserStorageOrderByClause::parseImpl(Pos & pos, ASTPtr & node, Expected & if (pos->type == TokenType::BareWord && std::string_view(pos->begin, pos->size()) == "tuple") ++pos; - if (s_lparen.ignore(pos, expected)) - { - auto tuple_function = std::make_shared(); - tuple_function->name = "tuple"; - if (order_list_p.parse(pos, order_by, expected)) - tuple_function->arguments = std::move(order_by); - else - tuple_function->arguments = std::make_shared(); - tuple_function->children.push_back(tuple_function->arguments); - order_by = std::move(tuple_function); - s_rparen.check(pos, expected); + if (!s_lparen.ignore(pos, expected)) + return false; - node = order_by; - return true; - } + if (!order_list_p.parse(pos, order_by, expected)) + order_by = std::make_shared(); - return false; + if (!s_rparen.ignore(pos, expected)) + return false; + + auto tuple_function = std::make_shared(); + tuple_function->name = "tuple"; + tuple_function->arguments = std::move(order_by); + + node = std::move(tuple_function); + return true; } bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) @@ -546,7 +550,7 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserIdentifierWithOptionalParameters ident_with_optional_params_p; ParserExpression expression_p; - ParserStorageOrderByClause order_by_p; + ParserStorageOrderByClause order_by_p(/*allow_order_*/ true); ParserSetQuery settings_p(/* parse_only_internals_ = */ true); ParserTTLExpressionList parser_ttl_list; ParserStringLiteral string_literal_parser; diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 76e20ca107b..5ab42d72f4a 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -90,7 +90,12 @@ protected: class ParserStorageOrderByClause : public IParserBase { +public: + explicit ParserStorageOrderByClause(bool allow_order_) : allow_order(allow_order_) {} + protected: + bool allow_order; + const char * getName() const override { return "storage order by clause"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; diff --git a/src/Parsers/ParserProjectionSelectQuery.cpp b/src/Parsers/ParserProjectionSelectQuery.cpp index cd29fcfddb3..39456876b1c 100644 --- a/src/Parsers/ParserProjectionSelectQuery.cpp +++ b/src/Parsers/ParserProjectionSelectQuery.cpp @@ -22,7 +22,7 @@ bool ParserProjectionSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ParserNotEmptyExpressionList exp_list_for_with_clause(false); ParserNotEmptyExpressionList exp_list_for_select_clause(true); /// Allows aliases without AS keyword. - ParserStorageOrderByExpressionList order_list_p; + ParserStorageOrderByExpressionList order_list_p(/*allow_order_*/ false); ASTPtr with_expression_list; ASTPtr select_expression_list; diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index 25564c55255..cd37e24f068 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -372,7 +372,7 @@ InputOrderInfoPtr buildInputOrderFromSortDescription( while (next_description_column < description.size() && next_sort_key < sorting_key.column_names.size()) { const auto & sorting_key_column = sorting_key.column_names[next_sort_key]; - int reverse_indicator = sorting_key.reverse_flags[next_sort_key] ? -1 : 1; + int reverse_indicator = (!sorting_key.reverse_flags.empty() && sorting_key.reverse_flags[next_sort_key]) ? -1 : 1; const auto & sort_column_description = description[next_description_column]; /// If required order depend on collation, it cannot be matched with primary key order. diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 9bc259dd292..145f1327de9 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1403,7 +1403,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( for (size_t i = 0; i < sort_columns_size; ++i) { - if (reverse_flags[i]) + if (!reverse_flags.empty() && reverse_flags[i]) sort_description.emplace_back(sort_columns[i], -1, 1); else sort_description.emplace_back(sort_columns[i], 1, 1); diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index d2f3423400f..dc9a4e4ab93 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -411,7 +411,7 @@ ReadFromSystemNumbersStep::ReadFromSystemNumbersStep( context_) , column_names{column_names_} , storage{std::move(storage_)} - , key_expression{KeyDescription::parse(column_names[0], storage_snapshot->metadata->columns, context).expression} + , key_expression{KeyDescription::parse(column_names[0], storage_snapshot->metadata->columns, context, false).expression} , max_block_size{max_block_size_} , num_streams{num_streams_} , limit_length_and_offset(InterpreterSelectQuery::getLimitLengthAndOffset(query_info.query->as(), context)) diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index ca00ad8c936..e78b82ea29f 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -24,7 +25,6 @@ namespace ErrorCodes KeyDescription::KeyDescription(const KeyDescription & other) : definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr) - , original_expression_list_ast(other.original_expression_list_ast ? other.original_expression_list_ast->clone() : nullptr) , expression_list_ast(other.expression_list_ast ? other.expression_list_ast->clone() : nullptr) , sample_block(other.sample_block) , column_names(other.column_names) @@ -46,11 +46,6 @@ KeyDescription & KeyDescription::operator=(const KeyDescription & other) else definition_ast.reset(); - if (other.original_expression_list_ast) - original_expression_list_ast = other.original_expression_list_ast->clone(); - else - original_expression_list_ast.reset(); - if (other.expression_list_ast) expression_list_ast = other.expression_list_ast->clone(); else @@ -129,34 +124,38 @@ KeyDescription KeyDescription::getSortingKeyFromAST( { KeyDescription result; result.definition_ast = definition_ast; - result.original_expression_list_ast = extractKeyExpressionList(definition_ast); + auto key_expression_list = extractKeyExpressionList(definition_ast); + + result.expression_list_ast = std::make_shared(); + for (const auto & child : key_expression_list->children) + { + auto real_key = child; + if (auto * elem = child->as()) + { + real_key = elem->children.front(); + result.reverse_flags.emplace_back(elem->direction < 0); + } + + result.expression_list_ast->children.push_back(real_key); + result.column_names.emplace_back(real_key->getColumnName()); + } if (additional_column) { result.additional_column = additional_column; ASTPtr column_identifier = std::make_shared(*additional_column); - result.original_expression_list_ast->children.push_back(column_identifier); + result.column_names.emplace_back(column_identifier->getColumnName()); + result.expression_list_ast->children.push_back(column_identifier); + + if (!result.reverse_flags.empty()) + result.reverse_flags.emplace_back(false); } - result.expression_list_ast = std::make_shared(); - const auto & children = result.original_expression_list_ast->children; - for (const auto & child : children) - { - if (auto * func = child->as()) - { - if (func->name == "__descendingKey") - { - auto & real_key = func->arguments->children.front(); - result.column_names.emplace_back(real_key->getColumnName()); - result.reverse_flags.emplace_back(true); - result.expression_list_ast->children.push_back(real_key->clone()); - continue; - } - } - result.column_names.emplace_back(child->getColumnName()); - result.reverse_flags.emplace_back(false); - result.expression_list_ast->children.push_back(child->clone()); - } + if (!result.reverse_flags.empty() && result.reverse_flags.size() != result.expression_list_ast->children.size()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "The size of reverse_flags ({}) does not match the size of KeyDescription {}", + result.reverse_flags.size(), result.expression_list_ast->children.size()); { auto expr = result.expression_list_ast->clone(); @@ -179,22 +178,39 @@ KeyDescription KeyDescription::getSortingKeyFromAST( return result; } +ASTPtr KeyDescription::getOriginalExpressionList() const +{ + if (!expression_list_ast || reverse_flags.empty()) + return expression_list_ast; + + auto expr_list = std::make_shared(); + size_t size = expression_list_ast->children.size(); + for (size_t i = 0; i < size; ++i) + { + auto column_ast = std::make_shared(); + column_ast->children.push_back(expression_list_ast->children[i]); + column_ast->direction = (!reverse_flags.empty() && reverse_flags[i]) ? -1 : 1; + expr_list->children.push_back(std::move(column_ast)); + } + + return expr_list; +} + KeyDescription KeyDescription::buildEmptyKey() { KeyDescription result; - result.original_expression_list_ast = std::make_shared(); result.expression_list_ast = std::make_shared(); result.expression = std::make_shared(ActionsDAG(), ExpressionActionsSettings{}); return result; } -KeyDescription KeyDescription::parse(const String & str, const ColumnsDescription & columns, ContextPtr context) +KeyDescription KeyDescription::parse(const String & str, const ColumnsDescription & columns, ContextPtr context, bool allow_order) { KeyDescription result; if (str.empty()) return result; - ParserStorageOrderByClause parser; + ParserStorageOrderByClause parser(allow_order); ASTPtr ast = parseQuery(parser, "(" + str + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); FunctionNameNormalizer::visit(ast.get()); diff --git a/src/Storages/KeyDescription.h b/src/Storages/KeyDescription.h index d3f8b3fad2b..a90e8f6e9a4 100644 --- a/src/Storages/KeyDescription.h +++ b/src/Storages/KeyDescription.h @@ -14,10 +14,7 @@ struct KeyDescription /// primary key in merge tree can be part of sorting key) ASTPtr definition_ast; - /// Original user defined ASTExpressionList with key fields, example: (x DESC, toStartOfMonth(date))). - ASTPtr original_expression_list_ast; - - /// Same as above but without special function __descendingKey, a.k.a, ASC|DESC suffix. + /// ASTExpressionList with key fields, example: (x DESC, toStartOfMonth(date))). ASTPtr expression_list_ast; /// Expression from expression_list_ast created by ExpressionAnalyzer. Useful, @@ -73,6 +70,8 @@ struct KeyDescription const ColumnsDescription & columns, ContextPtr context); + ASTPtr getOriginalExpressionList() const; + KeyDescription() = default; /// We need custom copy constructors because we don't want @@ -84,7 +83,7 @@ struct KeyDescription static bool moduloToModuloLegacyRecursive(ASTPtr node_expr); /// Parse description from string - static KeyDescription parse(const String & str, const ColumnsDescription & columns, ContextPtr context); + static KeyDescription parse(const String & str, const ColumnsDescription & columns, ContextPtr context, bool allow_order); }; } diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 02365009bcd..fce9e7ec6ca 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -1691,7 +1691,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() const for (size_t i = 0; i < sort_columns_size; ++i) { - if (reverse_flags[i]) + if (!reverse_flags.empty() && reverse_flags[i]) sort_description.emplace_back(sort_columns[i], -1, 1); else sort_description.emplace_back(sort_columns[i], 1, 1); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index a13abb19cca..870f915bd46 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -665,7 +665,7 @@ void MergeTreeData::checkProperties( size_t num_sorting_keys = new_metadata.sorting_key.column_names.size(); for (size_t i = 0; i < num_sorting_keys; ++i) { - if (new_metadata.sorting_key.reverse_flags[i]) + if (!new_metadata.sorting_key.reverse_flags.empty() && new_metadata.sorting_key.reverse_flags[i]) { throw Exception( ErrorCodes::ILLEGAL_COLUMN, diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 4d847e768c6..ff1993cd548 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1091,7 +1091,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( if (i < index->size()) { index_columns->emplace_back(index->at(i), primary_key.data_types[i], primary_key.column_names[i]); - reverse_flags.push_back(primary_key.reverse_flags[i]); + reverse_flags.push_back(!primary_key.reverse_flags.empty() && primary_key.reverse_flags[i]); } else { diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 56e868836d8..8c004d1ca48 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -524,7 +524,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( for (size_t i = 0; i < sort_columns_size; ++i) { - if (reverse_flags[i]) + if (!reverse_flags.empty() && reverse_flags[i]) sort_description.emplace_back(sort_columns[i], -1, 1); else sort_description.emplace_back(sort_columns[i], 1, 1); @@ -798,7 +798,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( for (size_t i = 0; i < sort_columns_size; ++i) { - if (reverse_flags[i]) + if (!reverse_flags.empty() && reverse_flags[i]) sort_description.emplace_back(sort_columns[i], -1, 1); else sort_description.emplace_back(sort_columns[i], 1, 1); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 9e44a9479d9..f341ab4816b 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -77,23 +77,26 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr /// So rules in zookeeper metadata is following: /// - When we have only ORDER BY, than store it in "primary key:" row of /metadata /// - When we have both, than store PRIMARY KEY in "primary key:" row and ORDER BY in "sorting key:" row of /metadata - - primary_key = formattedASTNormalized(metadata_snapshot->getPrimaryKey().original_expression_list_ast); if (metadata_snapshot->isPrimaryKeyDefined()) { - /// We don't use preparsed AST `sorting_key.original_expression_list_ast` because + primary_key = formattedASTNormalized(metadata_snapshot->getPrimaryKey().expression_list_ast); + /// We don't use preparsed AST `sorting_key.expression_list_ast` because /// it contain version column for VersionedCollapsingMergeTree, which /// is not stored in ZooKeeper for compatibility reasons. So the best /// compatible way is just to convert definition_ast to list and - /// serialize it. In all other places key.original_expression_list_ast should be + /// serialize it. In all other places key.expression_list_ast should be /// used. sorting_key = formattedASTNormalized(extractKeyExpressionList(metadata_snapshot->getSortingKey().definition_ast)); } + else + { + primary_key = formattedASTNormalized(metadata_snapshot->getPrimaryKey().getOriginalExpressionList()); + } data_format_version = data.format_version; if (data.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) - partition_key = formattedASTNormalized(metadata_snapshot->getPartitionKey().original_expression_list_ast); + partition_key = formattedASTNormalized(metadata_snapshot->getPartitionKey().expression_list_ast); ttl_table = formattedASTNormalized(metadata_snapshot->getTableTTLs().definition_ast); @@ -301,7 +304,7 @@ void ReplicatedMergeTreeTableMetadata::checkImmutableFieldsEquals(const Replicat /// NOTE: You can make a less strict check of match expressions so that tables do not break from small changes /// in formatAST code. - String parsed_zk_primary_key = formattedAST(KeyDescription::parse(from_zk.primary_key, columns, context).original_expression_list_ast); + String parsed_zk_primary_key = formattedAST(KeyDescription::parse(from_zk.primary_key, columns, context, true).getOriginalExpressionList()); if (primary_key != parsed_zk_primary_key) throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in primary key. " "Stored in ZooKeeper: {}, parsed from ZooKeeper: {}, local: {}", @@ -313,7 +316,7 @@ void ReplicatedMergeTreeTableMetadata::checkImmutableFieldsEquals(const Replicat "Stored in ZooKeeper: {}, local: {}", DB::toString(from_zk.data_format_version.toUnderType()), DB::toString(data_format_version.toUnderType())); - String parsed_zk_partition_key = formattedAST(KeyDescription::parse(from_zk.partition_key, columns, context).original_expression_list_ast); + String parsed_zk_partition_key = formattedAST(KeyDescription::parse(from_zk.partition_key, columns, context, false).expression_list_ast); if (partition_key != parsed_zk_partition_key) throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in partition key expression. " @@ -326,7 +329,7 @@ void ReplicatedMergeTreeTableMetadata::checkEquals(const ReplicatedMergeTreeTabl checkImmutableFieldsEquals(from_zk, columns, context); - String parsed_zk_sampling_expression = formattedAST(KeyDescription::parse(from_zk.sampling_expression, columns, context).definition_ast); + String parsed_zk_sampling_expression = formattedAST(KeyDescription::parse(from_zk.sampling_expression, columns, context, false).definition_ast); if (sampling_expression != parsed_zk_sampling_expression) { throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in sample expression. " @@ -334,7 +337,7 @@ void ReplicatedMergeTreeTableMetadata::checkEquals(const ReplicatedMergeTreeTabl from_zk.sampling_expression, parsed_zk_sampling_expression, sampling_expression); } - String parsed_zk_sorting_key = formattedAST(extractKeyExpressionList(KeyDescription::parse(from_zk.sorting_key, columns, context).definition_ast)); + String parsed_zk_sorting_key = formattedAST(extractKeyExpressionList(KeyDescription::parse(from_zk.sorting_key, columns, context, true).definition_ast)); if (sorting_key != parsed_zk_sorting_key) { throw Exception(ErrorCodes::METADATA_MISMATCH, @@ -343,7 +346,7 @@ void ReplicatedMergeTreeTableMetadata::checkEquals(const ReplicatedMergeTreeTabl from_zk.sorting_key, parsed_zk_sorting_key, sorting_key); } - auto parsed_primary_key = KeyDescription::parse(primary_key, columns, context); + auto parsed_primary_key = KeyDescription::parse(primary_key, columns, context, true); String parsed_zk_ttl_table = formattedAST(TTLTableDescription::parse(from_zk.ttl_table, columns, context, parsed_primary_key).definition_ast); if (ttl_table != parsed_zk_ttl_table) { diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index de950a939ea..316eced1ed6 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -375,7 +375,7 @@ StorageKeeperMap::StorageKeeperMap( WriteBufferFromOwnString out; out << "KeeperMap metadata format version: 1\n" << "columns: " << metadata.columns.toString() - << "primary key: " << formattedAST(metadata.getPrimaryKey().original_expression_list_ast) << "\n"; + << "primary key: " << formattedAST(metadata.getPrimaryKey().expression_list_ast) << "\n"; metadata_string = out.str(); if (zk_root_path.empty()) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 406e01aebb3..850623157a1 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -6254,9 +6254,9 @@ void StorageReplicatedMergeTree::alter( if (ast_to_str(future_metadata.sorting_key.definition_ast) != ast_to_str(current_metadata->sorting_key.definition_ast)) { /// We serialize definition_ast as list, because code which apply ALTER (setTableStructure) expect serialized non empty expression - /// list here and we cannot change this representation for compatibility. Also we have preparsed AST `sorting_key.original_expression_list_ast` + /// list here and we cannot change this representation for compatibility. Also we have preparsed AST `sorting_key.expression_list_ast` /// in KeyDescription, but it contain version column for VersionedCollapsingMergeTree, which shouldn't be defined as a part of key definition AST. - /// So the best compatible way is just to convert definition_ast to list and serialize it. In all other places key.original_expression_list_ast should be used. + /// So the best compatible way is just to convert definition_ast to list and serialize it. In all other places key.expression_list_ast should be used. future_metadata_in_zk.sorting_key = serializeAST(*extractKeyExpressionList(future_metadata.sorting_key.definition_ast)); } diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 4845984cc88..ebf3d12f2b6 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -275,7 +275,7 @@ TTLDescription TTLDescription::getTTLFromAST( for (size_t i = 0; i < ttl_element->group_by_key.size(); ++i) { if (ttl_element->group_by_key[i]->getColumnName() != pk_columns[i]) - throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, "TTL Expression GROUP BY key should be a prefix of primary key"); + throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, "TTL Expression GROUP BY key should be a prefix of primary key {} {}", ttl_element->group_by_key[i]->getColumnName(), pk_columns[i]); used_primary_key_columns_set.insert(pk_columns[i]); } diff --git a/tests/queries/0_stateless/03257_reverse_sorting_key.sql b/tests/queries/0_stateless/03257_reverse_sorting_key.sql index 454dace2476..221d6e70927 100644 --- a/tests/queries/0_stateless/03257_reverse_sorting_key.sql +++ b/tests/queries/0_stateless/03257_reverse_sorting_key.sql @@ -15,11 +15,11 @@ select * from x1 where i = 3; select count() from x1 where i between 3 and 10; -explain pipeline select * from x1 order by i desc limit 5; +explain pipeline select * from x1 order by i desc limit 5 settings max_threads=1; select * from x1 order by i desc limit 5; -explain pipeline select * from x1 order by i limit 5; +explain pipeline select * from x1 order by i limit 5 settings max_threads=1; select * from x1 order by i limit 5; @@ -33,11 +33,11 @@ select * from x2 where j = 1003; select count() from x2 where i between 3 and 10 and j between 1003 and 1008; -explain pipeline select * from x2 order by i, j desc limit 5; +explain pipeline select * from x2 order by i, j desc limit 5 settings max_threads=1; select * from x2 order by i, j desc limit 5; -explain pipeline select * from x2 order by i, j limit 5; +explain pipeline select * from x2 order by i, j limit 5 settings max_threads=1; select * from x2 order by i, j limit 5; From c2bcf151c754c8e50380b17b07055e1ca67c4346 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 4 Nov 2024 13:45:53 +0000 Subject: [PATCH 11/14] Update tests tegs. --- tests/queries/0_stateless/03257_reverse_sorting_key.sql | 2 ++ .../queries/0_stateless/03257_reverse_sorting_key_zookeeper.sql | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03257_reverse_sorting_key.sql b/tests/queries/0_stateless/03257_reverse_sorting_key.sql index 221d6e70927..6aaeb2bacd6 100644 --- a/tests/queries/0_stateless/03257_reverse_sorting_key.sql +++ b/tests/queries/0_stateless/03257_reverse_sorting_key.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-merge-tree-settings + set optimize_read_in_order = 1; set read_in_order_two_level_merge_threshold=100; diff --git a/tests/queries/0_stateless/03257_reverse_sorting_key_zookeeper.sql b/tests/queries/0_stateless/03257_reverse_sorting_key_zookeeper.sql index aa7a0325f25..fd903d3a4de 100644 --- a/tests/queries/0_stateless/03257_reverse_sorting_key_zookeeper.sql +++ b/tests/queries/0_stateless/03257_reverse_sorting_key_zookeeper.sql @@ -1,4 +1,4 @@ --- Tags: zookeeper +-- Tags: zookeeper, no-random-merge-tree-settings drop table if exists x1; drop table if exists x2; From aac75cdb71ea39074b4d12ec161927a19792f766 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 6 Nov 2024 17:26:36 +0100 Subject: [PATCH 12/14] Update 03257_reverse_sorting_key_zookeeper.sql --- .../queries/0_stateless/03257_reverse_sorting_key_zookeeper.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03257_reverse_sorting_key_zookeeper.sql b/tests/queries/0_stateless/03257_reverse_sorting_key_zookeeper.sql index fd903d3a4de..108d348fb76 100644 --- a/tests/queries/0_stateless/03257_reverse_sorting_key_zookeeper.sql +++ b/tests/queries/0_stateless/03257_reverse_sorting_key_zookeeper.sql @@ -1,4 +1,4 @@ --- Tags: zookeeper, no-random-merge-tree-settings +-- Tags: zookeeper, no-random-merge-tree-settings, no-replicated-database drop table if exists x1; drop table if exists x2; From 6dbe20839afe23f89043bd78048f0c1a6993bb8b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 15 Nov 2024 16:02:07 +0000 Subject: [PATCH 13/14] Fix nulls direction check. --- .../Optimizations/optimizeReadInOrder.cpp | 2 +- .../03257_reverse_sorting_key.reference | 33 +++++++++++-------- .../0_stateless/03257_reverse_sorting_key.sql | 4 +++ 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index a0921359790..50154c25842 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -426,7 +426,7 @@ SortingInputOrder buildInputOrderFromSortDescription( /// Since sorting key columns are always sorted with NULLS LAST, reading in order /// supported only for ASC NULLS LAST ("in order"), and DESC NULLS FIRST ("reverse") const auto column_is_nullable = sorting_key.data_types[next_sort_key]->isNullable(); - if (column_is_nullable && sort_column_description.nulls_direction != 1) + if (column_is_nullable && sort_column_description.nulls_direction != sort_column_description.direction) break; /// Direction for current sort key. diff --git a/tests/queries/0_stateless/03257_reverse_sorting_key.reference b/tests/queries/0_stateless/03257_reverse_sorting_key.reference index 5371ee21f44..ef51ac39dea 100644 --- a/tests/queries/0_stateless/03257_reverse_sorting_key.reference +++ b/tests/queries/0_stateless/03257_reverse_sorting_key.reference @@ -1,22 +1,25 @@ 3 8 +Sorting (Sorting for ORDER BY) +Prefix sort description: __table1.i DESC +Result sort description: __table1.i DESC (Expression) ExpressionTransform (Limit) Limit (Sorting) - MergeSortingTransform - LimitsCheckingTransform - PartialSortingTransform - (Expression) - ExpressionTransform - (ReadFromMergeTree) - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 99 98 97 96 95 +Sorting (Sorting for ORDER BY) +Prefix sort description: __table1.i ASC +Result sort description: __table1.i ASC (Expression) ExpressionTransform (Limit) @@ -34,22 +37,26 @@ ExpressionTransform 4 3 1003 6 +Sorting (Sorting for ORDER BY) +Prefix sort description: __table1.i ASC, __table1.j DESC +Result sort description: __table1.i ASC, __table1.j DESC (Expression) ExpressionTransform (Limit) Limit (Sorting) - FinishSortingTransform - PartialSortingTransform - (Expression) - ExpressionTransform - (ReadFromMergeTree) - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 0 1090 0 1080 0 1070 0 1060 0 1050 +Sorting (Sorting for ORDER BY) +Prefix sort description: __table1.i ASC +Result sort description: __table1.i ASC, __table1.j ASC (Expression) ExpressionTransform (Limit) diff --git a/tests/queries/0_stateless/03257_reverse_sorting_key.sql b/tests/queries/0_stateless/03257_reverse_sorting_key.sql index 6aaeb2bacd6..9d2f279026f 100644 --- a/tests/queries/0_stateless/03257_reverse_sorting_key.sql +++ b/tests/queries/0_stateless/03257_reverse_sorting_key.sql @@ -17,10 +17,12 @@ select * from x1 where i = 3; select count() from x1 where i between 3 and 10; +select trimLeft(explain) from (explain actions=1 select * from x1 order by i desc limit 5 settings max_threads=1, enable_analyzer=1) where explain ilike '%sort%'; explain pipeline select * from x1 order by i desc limit 5 settings max_threads=1; select * from x1 order by i desc limit 5; +select trimLeft(explain) from (explain actions=1 select * from x1 order by i limit 5 settings max_threads=1, enable_analyzer=1) where explain ilike '%sort%'; explain pipeline select * from x1 order by i limit 5 settings max_threads=1; select * from x1 order by i limit 5; @@ -35,10 +37,12 @@ select * from x2 where j = 1003; select count() from x2 where i between 3 and 10 and j between 1003 and 1008; +select trimLeft(explain) from (explain actions=1 select * from x2 order by i, j desc limit 5 settings max_threads=1, enable_analyzer=1) where explain ilike '%sort%'; explain pipeline select * from x2 order by i, j desc limit 5 settings max_threads=1; select * from x2 order by i, j desc limit 5; +select trimLeft(explain) from (explain actions=1 select * from x2 order by i, j limit 5 settings max_threads=1, enable_analyzer=1) where explain ilike '%sort%'; explain pipeline select * from x2 order by i, j limit 5 settings max_threads=1; select * from x2 order by i, j limit 5; From 8953babcd0622120a4489de6bd7d9887a2351b38 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 20 Nov 2024 19:49:13 +0100 Subject: [PATCH 14/14] Update 03257_reverse_sorting_key.sql --- tests/queries/0_stateless/03257_reverse_sorting_key.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/03257_reverse_sorting_key.sql b/tests/queries/0_stateless/03257_reverse_sorting_key.sql index 9d2f279026f..0777d101ca1 100644 --- a/tests/queries/0_stateless/03257_reverse_sorting_key.sql +++ b/tests/queries/0_stateless/03257_reverse_sorting_key.sql @@ -17,12 +17,12 @@ select * from x1 where i = 3; select count() from x1 where i between 3 and 10; -select trimLeft(explain) from (explain actions=1 select * from x1 order by i desc limit 5 settings max_threads=1, enable_analyzer=1) where explain ilike '%sort%'; +select trimLeft(explain) from (explain actions=1 select * from x1 order by i desc limit 5) where explain ilike '%sort%' settings max_threads=1, enable_analyzer=1; explain pipeline select * from x1 order by i desc limit 5 settings max_threads=1; select * from x1 order by i desc limit 5; -select trimLeft(explain) from (explain actions=1 select * from x1 order by i limit 5 settings max_threads=1, enable_analyzer=1) where explain ilike '%sort%'; +select trimLeft(explain) from (explain actions=1 select * from x1 order by i limit 5) where explain ilike '%sort%' settings max_threads=1, enable_analyzer=1; explain pipeline select * from x1 order by i limit 5 settings max_threads=1; select * from x1 order by i limit 5; @@ -37,12 +37,12 @@ select * from x2 where j = 1003; select count() from x2 where i between 3 and 10 and j between 1003 and 1008; -select trimLeft(explain) from (explain actions=1 select * from x2 order by i, j desc limit 5 settings max_threads=1, enable_analyzer=1) where explain ilike '%sort%'; +select trimLeft(explain) from (explain actions=1 select * from x2 order by i, j desc limit 5) where explain ilike '%sort%' settings max_threads=1, enable_analyzer=1; explain pipeline select * from x2 order by i, j desc limit 5 settings max_threads=1; select * from x2 order by i, j desc limit 5; -select trimLeft(explain) from (explain actions=1 select * from x2 order by i, j limit 5 settings max_threads=1, enable_analyzer=1) where explain ilike '%sort%'; +select trimLeft(explain) from (explain actions=1 select * from x2 order by i, j limit 5) where explain ilike '%sort%' settings max_threads=1, enable_analyzer=1; explain pipeline select * from x2 order by i, j limit 5 settings max_threads=1; select * from x2 order by i, j limit 5;